From 351368cb9a6731b886a58f5a10fd6b302bbe47be Mon Sep 17 00:00:00 2001 From: The-MAGI <110553776+The-MAGI@users.noreply.github.com> Date: Mon, 6 May 2024 01:57:38 +0300 Subject: [PATCH] [ie/youporn] Fix extractor (#8827) Closes #7967 Authored by: The-MAGI --- yt_dlp/extractor/youporn.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py index 6ee0abcae..6d4e31bf3 100644 --- a/yt_dlp/extractor/youporn.py +++ b/yt_dlp/extractor/youporn.py @@ -72,15 +72,15 @@ class YouPornIE(InfoExtractor): 'id': '16290308', 'age_limit': 18, 'categories': [], - 'description': 'md5:00ea70f642f431c379763c17c2f396bc', + 'description': str, # TODO: detect/remove SEO spam description in ytdl backport 'display_id': 'tinderspecial-trailer1', 'duration': 298.0, 'ext': 'mp4', 'upload_date': '20201123', 'uploader': 'Ersties', 'tags': [], - 'thumbnail': 'https://fi1.ypncdn.com/202011/23/16290308/original/8/tinderspecial-trailer1-8(m=eaAaaEPbaaaa).jpg', - 'timestamp': 1606089600, + 'thumbnail': r're:https://.+\.jpg', + 'timestamp': 1606147564, 'title': 'Tinder In Real Life', 'view_count': int, } @@ -88,11 +88,17 @@ class YouPornIE(InfoExtractor): def _real_extract(self, url): video_id, display_id = self._match_valid_url(url).group('id', 'display_id') - definitions = self._download_json( - f'https://www.youporn.com/api/video/media_definitions/{video_id}/', display_id or video_id) + self._set_cookie('.youporn.com', 'age_verified', '1') + webpage = self._download_webpage(f'https://www.youporn.com/watch/{video_id}', video_id) + definitions = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id)['mediaDefinitions'] - def get_format_data(data, f): - return traverse_obj(data, lambda _, v: v['format'] == f and url_or_none(v['videoUrl'])) + def get_format_data(data, stream_type): + info_url = traverse_obj(data, (lambda _, v: v['format'] == stream_type, 'videoUrl', {url_or_none}, any)) + if not info_url: + return [] + return traverse_obj( + self._download_json(info_url, video_id, f'Downloading {stream_type} info JSON', fatal=False), + lambda _, v: v['format'] == stream_type and url_or_none(v['videoUrl'])) formats = [] # Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s @@ -123,10 +129,6 @@ class YouPornIE(InfoExtractor): f['height'] = height formats.append(f) - webpage = self._download_webpage( - 'http://www.youporn.com/watch/%s' % video_id, display_id, - headers={'Cookie': 'age_verified=1'}) - title = self._html_search_regex( r'(?s)