[pornhub] Fix extraction (closes #12007)

9 years ago · e64b0fca14
parent 78ef214d2d
commit e64b0fca14
1 changed files with 17 additions and 4 deletions
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@ -156,11 +156,24 @@ class PornHubIE(InfoExtractor):
        comment_count = self._extract_count(
            r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
        video_variables = {}
        for video_variablename, quote, video_variable in re.findall(
                r'(player_quality_[0-9]{3,4}p[0-9a-z]+?)=\s*(["\'])(.*?)\2;', webpage):
            video_variables[video_variablename] = video_variable
        encoded_video_urls = []
        for encoded_video_url in re.findall(
                r'player_quality_[0-9]{3,4}p\s*=(.*?);', webpage):
            encoded_video_urls.append(encoded_video_url)
        # Decode the URLs 
        video_urls = []
-        for quote, video_url in re.findall(
+        for url in encoded_video_urls:
-                r'player_quality_[0-9]{3,4}p\s*=\s*(["\'])(.+?)\1;', webpage):
+            for varname, varval in video_variables.items():
-            video_urls.append(compat_urllib_parse_unquote(re.sub(
+                url = url.replace(varname, varval)
-                r'{0}\s*\+\s*{0}'.format(quote), '', video_url)))
+            url = url.replace('+', '')
            url = url.replace(' ', '')
            video_urls.append(url)
        if webpage.find('"encrypted":true') != -1:
            password = compat_urllib_parse_unquote_plus(