From 3660d3f33f15488bb7ba58c8d1854e2cb15eba90 Mon Sep 17 00:00:00 2001 From: JChris246 Date: Sun, 16 Mar 2025 02:40:02 -0400 Subject: [PATCH 1/5] feat: add extractor iceporn --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/iceporn.py | 50 +++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 yt_dlp/extractor/iceporn.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 3ab0f5efa2..7a1055adb6 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -839,6 +839,7 @@ from .hypem import HypemIE from .hypergryph import MonsterSirenHypergryphMusicIE from .hytale import HytaleIE from .icareus import IcareusIE +from .iceporn import IcePornIE from .ichinanalive import ( IchinanaLiveClipIE, IchinanaLiveIE, diff --git a/yt_dlp/extractor/iceporn.py b/yt_dlp/extractor/iceporn.py new file mode 100644 index 0000000000..b4d820389e --- /dev/null +++ b/yt_dlp/extractor/iceporn.py @@ -0,0 +1,50 @@ +from .common import InfoExtractor +from ..utils import int_or_none, parse_duration + + +class IcePornIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?iceporn\.com/video/(?P[0-9]+)/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://www.iceporn.com/video/2296835/eva-karera-gets-her-trimmed-cunt-plowed', + 'md5': '844482e1c3c45831859748550a1b8dcf', + 'info_dict': { + 'id': '2296835', + 'display_id': 'eva-karera-gets-her-trimmed-cunt-plowed', + 'title': 'Eva Karera gets her trimmed cunt plowed', + 'description': 're:Eva Karera Gets Her Trimmed Cunt Plowed - Pornstar, Milf, Blowjob, Big Boobs Porn Movies - 2296835', + 'thumbnail': 're:https?://g\\d.iceppsn.com/media/videos/tmb/\\d+/preview/\\d+.jpg', + 'ext': 'mp4', + 'duration': 2178 + } + }] + + def _real_extract(self, url): + video_id, display_id = self._match_valid_url(url).group('id', 'display_id') + + webpage = self._download_webpage(url, video_id) + video_data = self._download_json('https://www.iceporn.com/player_config_json/', video_id, query={ + 'vid': video_id, 'aid': 0, 'domain_id': 0, 'embed': 0, 'ref': 'null', 'check_speed': 0 + }, headers={ + 'Accept': 'application/json' + }) + + formats = [] + for quality_id, video_url in video_data.get('files', {}).items(): + if video_url: + formats.append({ + 'url': video_url, + 'format_id': quality_id + }) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': video_data.get('title') + or self._html_search_regex(r'

(.+?)

', + webpage, 'title'), + 'formats': formats, + 'thumbnail': video_data.get('poster'), + 'duration': int_or_none(video_data.get('duration')) + or parse_duration(video_data.get('duration_format')), + 'description': self._html_search_meta('description', webpage), + } From 9136c8c72eafaddfbe34c4176781acaa4da5547b Mon Sep 17 00:00:00 2001 From: JChris246 Date: Sun, 16 Mar 2025 03:01:19 -0400 Subject: [PATCH 2/5] fix: add trailing commas --- yt_dlp/extractor/iceporn.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/iceporn.py b/yt_dlp/extractor/iceporn.py index b4d820389e..7e71a3cec4 100644 --- a/yt_dlp/extractor/iceporn.py +++ b/yt_dlp/extractor/iceporn.py @@ -14,8 +14,8 @@ class IcePornIE(InfoExtractor): 'description': 're:Eva Karera Gets Her Trimmed Cunt Plowed - Pornstar, Milf, Blowjob, Big Boobs Porn Movies - 2296835', 'thumbnail': 're:https?://g\\d.iceppsn.com/media/videos/tmb/\\d+/preview/\\d+.jpg', 'ext': 'mp4', - 'duration': 2178 - } + 'duration': 2178, + }, }] def _real_extract(self, url): @@ -23,9 +23,9 @@ class IcePornIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_data = self._download_json('https://www.iceporn.com/player_config_json/', video_id, query={ - 'vid': video_id, 'aid': 0, 'domain_id': 0, 'embed': 0, 'ref': 'null', 'check_speed': 0 + 'vid': video_id, 'aid': 0, 'domain_id': 0, 'embed': 0, 'ref': 'null', 'check_speed': 0, }, headers={ - 'Accept': 'application/json' + 'Accept': 'application/json', }) formats = [] @@ -33,7 +33,7 @@ class IcePornIE(InfoExtractor): if video_url: formats.append({ 'url': video_url, - 'format_id': quality_id + 'format_id': quality_id, }) return { From 461afcce2b18128c7be848c7e20be9ba3cd63a7c Mon Sep 17 00:00:00 2001 From: JChris246 Date: Sun, 16 Mar 2025 16:42:18 -0400 Subject: [PATCH 3/5] refactor: combine extractors drtuber, iceporn and nuvid --- yt_dlp/extractor/_extractors.py | 2 - yt_dlp/extractor/drtuber.py | 163 +++++++++++++++++++++++++++----- yt_dlp/extractor/iceporn.py | 50 ---------- yt_dlp/extractor/nuvid.py | 99 ------------------- 4 files changed, 141 insertions(+), 173 deletions(-) delete mode 100644 yt_dlp/extractor/iceporn.py delete mode 100644 yt_dlp/extractor/nuvid.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 7a1055adb6..b78de59a90 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -839,7 +839,6 @@ from .hypem import HypemIE from .hypergryph import MonsterSirenHypergryphMusicIE from .hytale import HytaleIE from .icareus import IcareusIE -from .iceporn import IcePornIE from .ichinanalive import ( IchinanaLiveClipIE, IchinanaLiveIE, @@ -1422,7 +1421,6 @@ from .nuum import ( NuumMediaIE, NuumTabIE, ) -from .nuvid import NuvidIE from .nytimes import ( NYTimesArticleIE, NYTimesCookingIE, diff --git a/yt_dlp/extractor/drtuber.py b/yt_dlp/extractor/drtuber.py index 6a1fe9010b..ebf5405726 100644 --- a/yt_dlp/extractor/drtuber.py +++ b/yt_dlp/extractor/drtuber.py @@ -5,13 +5,15 @@ from ..utils import ( NO_DEFAULT, int_or_none, parse_duration, + strip_or_none, str_to_int, + url_or_none, ) class DrTuberIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P\d+)(?:/(?P[\w-]+))?' - _EMBED_REGEX = [r']+?src=["\'](?P(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)'] + _VALID_URL = r'https?://(?:(?:www|m)\.)?(?Pdrtuber|iceporn|nuvid)\.com/(?:video|embed)/(?P\d+)(?:/(?P[\w-]+))?' + _EMBED_REGEX = [r']+?src=["\'](?P(?:https?:)?//(?:www\.)?(?:drtuber|iceporn|nuvid)\.com/embed/\d+)'] _TESTS = [{ 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', 'md5': '93e680cf2536ad0dfb7e74d94a89facd', @@ -21,14 +23,92 @@ class DrTuberIE(InfoExtractor): 'ext': 'mp4', 'title': 'hot perky blonde naked golf', 'like_count': int, + 'dislike_count': int, 'comment_count': int, - 'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'], + 'categories': ['babe', 'blonde', 'erotic', 'outdoor', 'softcore', 'solo'], 'thumbnail': r're:https?://.*\.jpg$', 'age_limit': 18, + 'duration': 304, + 'description': 'Welcome to this hot porn video named Hot Perky Blonde Naked Golf. DrTuber is the best place for watching xxx movies online!' + }, + }, { + 'url': 'https://www.iceporn.com/video/2296835/eva-karera-gets-her-trimmed-cunt-plowed', + 'md5': '88be0402a06e61cd1dfaea69dc8623a7', + 'info_dict': { + 'id': '2296835', + 'display_id': 'eva-karera-gets-her-trimmed-cunt-plowed', + 'title': 'Eva Karera gets her trimmed cunt plowed', + 'description': 're:Eva Karera Gets Her Trimmed Cunt Plowed - Pornstar, Milf, Blowjob, Big Boobs Porn Movies - 2296835', + 'thumbnail': 're:https?://g\\d.iceppsn.com/media/videos/tmb/\\d+/preview/\\d+.jpg', + 'ext': 'mp4', + 'duration': 2178, + 'age_limit': 18, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'categories': ['Big Boobs', 'Blowjob', 'Brunette', 'Doggystyle', 'Hardcore', 'Hd', 'Lingerie', 'Masturbation', 'Milf', 'Pornstar', 'Titjob'], + }, + }, { + 'url': 'https://www.nuvid.com/video/6513023/italian-babe', + 'md5': '772d2f8288f3d3c5c45f7a41761c7844', + 'info_dict': { + 'id': '6513023', + 'display_id': 'italian-babe', + 'description': 'Welcome to this hot Italian porn video named Italian Babe. Nuvid is the best place for watching xxx movies online!', + 'ext': 'mp4', + 'title': 'italian babe', + 'duration': 321.0, + 'age_limit': 18, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'thumbnail': r're:https?://.+\.jpg', + 'categories': ['Amateur', 'BBW', 'Brunette', 'Fingering', 'Italian', 'Softcore', 'Solo', 'Webcam'], + }, + }, { + 'url': 'https://m.nuvid.com/video/6523263', + 'md5': 'ebd22ce8e47e1d9a4d0756a15c67da52', + 'info_dict': { + 'id': '6523263', + 'display_id': '6523263', + 'ext': 'mp4', + 'title': 'Slut brunette college student anal dorm', + 'description': 'Welcome to this hot Brunette porn video named Slut Brunette College Student Anal Dorm. Nuvid is the best place for watching xxx movies online!', + 'duration': 421.0, + 'age_limit': 18, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'thumbnail': r're:https?://.+\.jpg', + 'thumbnails': list, + 'categories': list, + }, + }, { + 'url': 'http://m.nuvid.com/video/6415801/', + 'md5': '638d5ececb138d5753593f751ae3f697', + 'info_dict': { + 'id': '6415801', + 'display_id': '6415801', + 'ext': 'mp4', + 'title': 'My best friend wanted to fuck my wife for a long time', + 'description': 'Welcome to this hot Redhead porn video named My Best Friend Wanted To Fuck My Wife For A Long Time. Nuvid is the best place for watching xxx movies online!', + 'duration': 1882, + 'age_limit': 18, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'thumbnail': r're:https?://.+\.jpg', + 'categories': list, }, }, { 'url': 'http://www.drtuber.com/embed/489939', 'only_matching': True, + }, { + 'url': 'https://www.iceporn.com/video/2296835', + 'only_matching': True, + }, { + 'url': 'https://www.nuvid.com/video/6513023', + 'only_matching': True, }, { 'url': 'http://m.drtuber.com/video/3893529/lingerie-blowjob-from-beautiful-teen', 'only_matching': True, @@ -36,69 +116,108 @@ class DrTuberIE(InfoExtractor): def _real_extract(self, url): mobj = self._match_valid_url(url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id + video_id, display_id, domain = mobj.group('id', 'display_id', 'domain') + display_id = display_id or video_id webpage = self._download_webpage( - f'http://www.drtuber.com/video/{video_id}', display_id) + f'http://www.{domain}.com/video/{video_id}', display_id) video_data = self._download_json( - 'http://www.drtuber.com/player_config_json/', video_id, query={ + f'http://www.{domain}.com/player_config_json/', video_id, query={ 'vid': video_id, 'embed': 0, 'aid': 0, 'domain_id': 0, + }, headers={ + 'Accept': 'application/json', }) + qualities = { + 'lq': '360p', + 'hq': '720p', + '4k': '2160p', + } + formats = [] for format_id, video_url in video_data['files'].items(): if video_url: formats.append({ 'format_id': format_id, - 'quality': 2 if format_id == 'hq' else 1, + 'quality': qualities.get(format_id) or format_id, + 'height': int_or_none(qualities.get(format_id)[:-1]), 'url': video_url, }) + self._check_formats(formats, video_id) duration = int_or_none(video_data.get('duration')) or parse_duration( video_data.get('duration_format')) - title = self._html_search_regex( - (r']+class=["\']title[^>]+>([^<]+)', + title = video_data.get('title') or self._html_search_regex( + (r'

(.+?)

', + r']+class=["\']title[^>]+>([^<]+)', r'([^<]+)\s*@\s+DrTuber', r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<', r'<p[^>]+class="title_substrate">([^<]+)</p>', r'<title>([^<]+) - \d+'), webpage, 'title') - thumbnail = self._html_search_regex( - r'poster="([^"]+)"', - webpage, 'thumbnail', fatal=False) + mobile_webpage = None + if not title: + mobile_webpage = self._download_webpage( + f'http://m.{domain}.com/video/{video_id}', + video_id, 'Downloading mobile video page', fatal=False) or '' + + title = strip_or_none(video_data.get('title') or self._html_search_regex( + (r'''<span\s[^>]*?\btitle\s*=\s*(?P<q>"|'|\b)(?P<title>[^"]+)(?P=q)\s*>''', + r'''<div\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)thumb-holder video(?P=q)>\s*<h5\b[^>]*>(?P<title>[^<]+)</h5''', + r'''<span\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)title_thumb(?P=q)>(?P<title>[^<]+)</span'''), + mobile_webpage, 'title', group='title')) + + thumbnails = [] + if not mobile_webpage: + mobile_webpage = self._download_webpage( + f'http://m.{domain}.com/video/{video_id}', + video_id, 'Downloading mobile video page', fatal=False) or '' + + thumbnails = [ + {'url': thumb_url} for thumb_url in re.findall( + r'<div\s+class\s*=\s*"video-tmb-wrap"\s*>\s*<img\s+src\s*=\s*"([^"]+)"\s*/>', mobile_webpage) + if url_or_none(thumb_url)] + + if url_or_none(video_data.get('poster')): + thumbnails.append({'url': video_data['poster'], 'preference': 1}) def extract_count(id_, name, default=NO_DEFAULT): return str_to_int(self._html_search_regex( - rf'<span[^>]+(?:class|id)="{id_}"[^>]*>([\d,\.]+)</span>', - webpage, f'{name} count', default=default, fatal=False)) + rf'<span[^>]+(?:class|id)="{id_}"[^>]*>(?P<{name}>[\d,\.]+)</span>', + webpage, f'{name} count', default=default, fatal=False, group=name)) - like_count = extract_count('rate_likes', 'like') - dislike_count = extract_count('rate_dislikes', 'dislike', default=None) - comment_count = extract_count('comments_count', 'comment') + like_count = extract_count('(?:rate_likes|rate_votes|video_rate_votes)', 'like') + dislike_count = extract_count('(?:rate_dislikes|rate_votes|video_rate_votes)', 'dislike', default=None) + comment_count = extract_count('(?:comments_count|comments__counter)', 'comment') cats_str = self._search_regex( - r'<div[^>]+class="categories_list">(.+?)</div>', + r'<div[^>]+class="(?:categories_list|data_categories|video-cat)">(.+?)</div>', webpage, 'categories', fatal=False) - categories = [] if not cats_str else re.findall( - r'<a title="([^"]+)"', cats_str) + + categories = None + if cats_str: + for pattern in [r'<a[^>]+title="([^"]+)"', r'<a[^>]+href="/categories/([^"]+)"']: + categories = re.findall(pattern, cats_str) + if categories: + break return { 'id': video_id, 'display_id': display_id, 'formats': formats, 'title': title, - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, 'categories': categories, 'age_limit': self._rta_search(webpage), 'duration': duration, + 'description': self._html_search_meta('description', webpage), } diff --git a/yt_dlp/extractor/iceporn.py b/yt_dlp/extractor/iceporn.py deleted file mode 100644 index 7e71a3cec4..0000000000 --- a/yt_dlp/extractor/iceporn.py +++ /dev/null @@ -1,50 +0,0 @@ -from .common import InfoExtractor -from ..utils import int_or_none, parse_duration - - -class IcePornIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?iceporn\.com/video/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)' - _TESTS = [{ - 'url': 'https://www.iceporn.com/video/2296835/eva-karera-gets-her-trimmed-cunt-plowed', - 'md5': '844482e1c3c45831859748550a1b8dcf', - 'info_dict': { - 'id': '2296835', - 'display_id': 'eva-karera-gets-her-trimmed-cunt-plowed', - 'title': 'Eva Karera gets her trimmed cunt plowed', - 'description': 're:Eva Karera Gets Her Trimmed Cunt Plowed - Pornstar, Milf, Blowjob, Big Boobs Porn Movies - 2296835', - 'thumbnail': 're:https?://g\\d.iceppsn.com/media/videos/tmb/\\d+/preview/\\d+.jpg', - 'ext': 'mp4', - 'duration': 2178, - }, - }] - - def _real_extract(self, url): - video_id, display_id = self._match_valid_url(url).group('id', 'display_id') - - webpage = self._download_webpage(url, video_id) - video_data = self._download_json('https://www.iceporn.com/player_config_json/', video_id, query={ - 'vid': video_id, 'aid': 0, 'domain_id': 0, 'embed': 0, 'ref': 'null', 'check_speed': 0, - }, headers={ - 'Accept': 'application/json', - }) - - formats = [] - for quality_id, video_url in video_data.get('files', {}).items(): - if video_url: - formats.append({ - 'url': video_url, - 'format_id': quality_id, - }) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': video_data.get('title') - or self._html_search_regex(r'<div.*class=[\'"]caption[\'"].*?><h2>(.+?)</h2>', - webpage, 'title'), - 'formats': formats, - 'thumbnail': video_data.get('poster'), - 'duration': int_or_none(video_data.get('duration')) - or parse_duration(video_data.get('duration_format')), - 'description': self._html_search_meta('description', webpage), - } diff --git a/yt_dlp/extractor/nuvid.py b/yt_dlp/extractor/nuvid.py deleted file mode 100644 index 99a426b25f..0000000000 --- a/yt_dlp/extractor/nuvid.py +++ /dev/null @@ -1,99 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, - strip_or_none, - traverse_obj, - url_or_none, -) - - -class NuvidIE(InfoExtractor): - _VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'https://www.nuvid.com/video/6513023/italian-babe', - 'md5': '772d2f8288f3d3c5c45f7a41761c7844', - 'info_dict': { - 'id': '6513023', - 'ext': 'mp4', - 'title': 'italian babe', - 'duration': 321.0, - 'age_limit': 18, - 'thumbnail': r're:https?://.+\.jpg', - }, - }, { - 'url': 'https://m.nuvid.com/video/6523263', - 'md5': 'ebd22ce8e47e1d9a4d0756a15c67da52', - 'info_dict': { - 'id': '6523263', - 'ext': 'mp4', - 'title': 'Slut brunette college student anal dorm', - 'duration': 421.0, - 'age_limit': 18, - 'thumbnail': r're:https?://.+\.jpg', - 'thumbnails': list, - }, - }, { - 'url': 'http://m.nuvid.com/video/6415801/', - 'md5': '638d5ececb138d5753593f751ae3f697', - 'info_dict': { - 'id': '6415801', - 'ext': 'mp4', - 'title': 'My best friend wanted to fuck my wife for a long time', - 'duration': 1882, - 'age_limit': 18, - 'thumbnail': r're:https?://.+\.jpg', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - qualities = { - 'lq': '360p', - 'hq': '720p', - } - - json_url = f'https://www.nuvid.com/player_config_json/?vid={video_id}&aid=0&domain_id=0&embed=0&check_speed=0' - video_data = self._download_json( - json_url, video_id, headers={ - 'Accept': 'application/json, text/javascript, */*; q = 0.01', - 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', - }) - - webpage = self._download_webpage( - f'http://m.nuvid.com/video/{video_id}', - video_id, 'Downloading video page', fatal=False) or '' - - title = strip_or_none(video_data.get('title') or self._html_search_regex( - (r'''<span\s[^>]*?\btitle\s*=\s*(?P<q>"|'|\b)(?P<title>[^"]+)(?P=q)\s*>''', - r'''<div\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)thumb-holder video(?P=q)>\s*<h5\b[^>]*>(?P<title>[^<]+)</h5''', - r'''<span\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)title_thumb(?P=q)>(?P<title>[^<]+)</span'''), - webpage, 'title', group='title')) - - formats = [{ - 'url': source, - 'format_id': qualities.get(quality), - 'height': int_or_none(qualities.get(quality)[:-1]), - } for quality, source in video_data.get('files').items() if source] - - self._check_formats(formats, video_id) - - duration = parse_duration(traverse_obj(video_data, 'duration', 'duration_format')) - thumbnails = [ - {'url': thumb_url} for thumb_url in re.findall( - r'<div\s+class\s*=\s*"video-tmb-wrap"\s*>\s*<img\s+src\s*=\s*"([^"]+)"\s*/>', webpage) - if url_or_none(thumb_url)] - if url_or_none(video_data.get('poster')): - thumbnails.append({'url': video_data['poster'], 'preference': 1}) - - return { - 'id': video_id, - 'formats': formats, - 'title': title, - 'thumbnails': thumbnails, - 'duration': duration, - 'age_limit': 18, - } From b2248420be0fd9639e4c3917f39c80862680afbe Mon Sep 17 00:00:00 2001 From: JChris246 <JChris246@users.noreply.github.com> Date: Sun, 16 Mar 2025 16:47:10 -0400 Subject: [PATCH 4/5] fix: lint errors --- yt_dlp/extractor/drtuber.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/drtuber.py b/yt_dlp/extractor/drtuber.py index ebf5405726..1de4218edc 100644 --- a/yt_dlp/extractor/drtuber.py +++ b/yt_dlp/extractor/drtuber.py @@ -5,8 +5,8 @@ from ..utils import ( NO_DEFAULT, int_or_none, parse_duration, - strip_or_none, str_to_int, + strip_or_none, url_or_none, ) @@ -29,7 +29,7 @@ class DrTuberIE(InfoExtractor): 'thumbnail': r're:https?://.*\.jpg$', 'age_limit': 18, 'duration': 304, - 'description': 'Welcome to this hot porn video named Hot Perky Blonde Naked Golf. DrTuber is the best place for watching xxx movies online!' + 'description': 'Welcome to this hot porn video named Hot Perky Blonde Naked Golf. DrTuber is the best place for watching xxx movies online!', }, }, { 'url': 'https://www.iceporn.com/video/2296835/eva-karera-gets-her-trimmed-cunt-plowed', From 6b06ae4a0e470bf1e2e4f70278fe03e622d46d88 Mon Sep 17 00:00:00 2001 From: JChris246 <JChris246@users.noreply.github.com> Date: Sun, 16 Mar 2025 18:46:35 -0400 Subject: [PATCH 5/5] fix: address PR comments --- yt_dlp/extractor/drtuber.py | 72 ++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/yt_dlp/extractor/drtuber.py b/yt_dlp/extractor/drtuber.py index 1de4218edc..0ff52387cd 100644 --- a/yt_dlp/extractor/drtuber.py +++ b/yt_dlp/extractor/drtuber.py @@ -4,7 +4,9 @@ from .common import InfoExtractor from ..utils import ( NO_DEFAULT, int_or_none, + make_archive_id, parse_duration, + qualities, str_to_int, strip_or_none, url_or_none, @@ -30,6 +32,7 @@ class DrTuberIE(InfoExtractor): 'age_limit': 18, 'duration': 304, 'description': 'Welcome to this hot porn video named Hot Perky Blonde Naked Golf. DrTuber is the best place for watching xxx movies online!', + '_old_archive_ids': ['nuvid 1740434'], }, }, { 'url': 'https://www.iceporn.com/video/2296835/eva-karera-gets-her-trimmed-cunt-plowed', @@ -47,6 +50,7 @@ class DrTuberIE(InfoExtractor): 'dislike_count': int, 'comment_count': int, 'categories': ['Big Boobs', 'Blowjob', 'Brunette', 'Doggystyle', 'Hardcore', 'Hd', 'Lingerie', 'Masturbation', 'Milf', 'Pornstar', 'Titjob'], + '_old_archive_ids': ['nuvid 2296835'], }, }, { 'url': 'https://www.nuvid.com/video/6513023/italian-babe', @@ -64,6 +68,7 @@ class DrTuberIE(InfoExtractor): 'comment_count': int, 'thumbnail': r're:https?://.+\.jpg', 'categories': ['Amateur', 'BBW', 'Brunette', 'Fingering', 'Italian', 'Softcore', 'Solo', 'Webcam'], + '_old_archive_ids': ['nuvid 6513023'], }, }, { 'url': 'https://m.nuvid.com/video/6523263', @@ -82,6 +87,7 @@ class DrTuberIE(InfoExtractor): 'thumbnail': r're:https?://.+\.jpg', 'thumbnails': list, 'categories': list, + '_old_archive_ids': ['nuvid 6523263'], }, }, { 'url': 'http://m.nuvid.com/video/6415801/', @@ -99,6 +105,7 @@ class DrTuberIE(InfoExtractor): 'comment_count': int, 'thumbnail': r're:https?://.+\.jpg', 'categories': list, + '_old_archive_ids': ['nuvid 6415801'], }, }, { 'url': 'http://www.drtuber.com/embed/489939', @@ -132,19 +139,20 @@ class DrTuberIE(InfoExtractor): 'Accept': 'application/json', }) - qualities = { - 'lq': '360p', - 'hq': '720p', - '4k': '2160p', + QUALITIES = { + 'lq': 360, + 'hq': 720, + '4k': 2160, } + quality = qualities(tuple(QUALITIES)) formats = [] for format_id, video_url in video_data['files'].items(): if video_url: formats.append({ 'format_id': format_id, - 'quality': qualities.get(format_id) or format_id, - 'height': int_or_none(qualities.get(format_id)[:-1]), + 'quality': quality(format_id), + 'height': QUALITIES.get(format_id), 'url': video_url, }) self._check_formats(formats, video_id) @@ -152,8 +160,17 @@ class DrTuberIE(InfoExtractor): duration = int_or_none(video_data.get('duration')) or parse_duration( video_data.get('duration_format')) + mobile_webpage = self._download_webpage( + f'http://m.{domain}.com/video/{video_id}', + video_id, 'Downloading mobile video page', fatal=False) or '' + + thumbnails = [ + {'url': thumb_url} for thumb_url in re.findall( + r'<div\s+class\s*=\s*"video-tmb-wrap"\s*>\s*<img\s+src\s*=\s*"([^"]+)"\s*/>', mobile_webpage) + if url_or_none(thumb_url)] + title = video_data.get('title') or self._html_search_regex( - (r'<div.*class=[\'"]caption[\'"].*?><h2>(.+?)</h2>', + (r'<div [^>]*class=[\'"]caption[\'"][^>]*><h2>([^<]+)</h2>', r'<h1[^>]+class=["\']title[^>]+>([^<]+)', r'<title>([^<]+)\s*@\s+DrTuber', r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<', @@ -161,51 +178,32 @@ class DrTuberIE(InfoExtractor): r'<title>([^<]+) - \d+'), webpage, 'title') - mobile_webpage = None if not title: - mobile_webpage = self._download_webpage( - f'http://m.{domain}.com/video/{video_id}', - video_id, 'Downloading mobile video page', fatal=False) or '' - title = strip_or_none(video_data.get('title') or self._html_search_regex( (r'''<span\s[^>]*?\btitle\s*=\s*(?P<q>"|'|\b)(?P<title>[^"]+)(?P=q)\s*>''', r'''<div\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)thumb-holder video(?P=q)>\s*<h5\b[^>]*>(?P<title>[^<]+)</h5''', r'''<span\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)title_thumb(?P=q)>(?P<title>[^<]+)</span'''), mobile_webpage, 'title', group='title')) - thumbnails = [] - if not mobile_webpage: - mobile_webpage = self._download_webpage( - f'http://m.{domain}.com/video/{video_id}', - video_id, 'Downloading mobile video page', fatal=False) or '' - - thumbnails = [ - {'url': thumb_url} for thumb_url in re.findall( - r'<div\s+class\s*=\s*"video-tmb-wrap"\s*>\s*<img\s+src\s*=\s*"([^"]+)"\s*/>', mobile_webpage) - if url_or_none(thumb_url)] - if url_or_none(video_data.get('poster')): thumbnails.append({'url': video_data['poster'], 'preference': 1}) def extract_count(id_, name, default=NO_DEFAULT): return str_to_int(self._html_search_regex( - rf'<span[^>]+(?:class|id)="{id_}"[^>]*>(?P<{name}>[\d,\.]+)</span>', - webpage, f'{name} count', default=default, fatal=False, group=name)) + rf'<span[^>]+(?:class|id)="{id_}"[^>]*>([\d,\.]+)%?</span>', + webpage, f'{name} count', default=default, fatal=False)) + percent_rate = extract_count('?:rate_percent|video_rate_rate', 'vote percent rate', default=None) like_count = extract_count('(?:rate_likes|rate_votes|video_rate_votes)', 'like') dislike_count = extract_count('(?:rate_dislikes|rate_votes|video_rate_votes)', 'dislike', default=None) - comment_count = extract_count('(?:comments_count|comments__counter)', 'comment') + + if percent_rate: + like_count = round(percent_rate * like_count / 100) + dislike_count = round(100 - percent_rate * dislike_count / 100) cats_str = self._search_regex( r'<div[^>]+class="(?:categories_list|data_categories|video-cat)">(.+?)</div>', - webpage, 'categories', fatal=False) - - categories = None - if cats_str: - for pattern in [r'<a[^>]+title="([^"]+)"', r'<a[^>]+href="/categories/([^"]+)"']: - categories = re.findall(pattern, cats_str) - if categories: - break + webpage, 'categories', fatal=False) or '' return { 'id': video_id, @@ -215,9 +213,11 @@ class DrTuberIE(InfoExtractor): 'thumbnails': thumbnails, 'like_count': like_count, 'dislike_count': dislike_count, - 'comment_count': comment_count, - 'categories': categories, + 'comment_count': extract_count('(?:comments_count|comments__counter)', 'comment'), + 'categories': (re.findall(r'<a[^>]+title="([^"]+)"', cats_str) + or re.findall(r'<a[^>]+href="/categories/([^"]+)"', cats_str)), 'age_limit': self._rta_search(webpage), 'duration': duration, 'description': self._html_search_meta('description', webpage), + '_old_archive_ids': [make_archive_id('Nuvid', video_id)], }