diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index f7e3f25c3b..918aeb7d78 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1426,7 +1426,6 @@ from .nuum import ( NuumMediaIE, NuumTabIE, ) -from .nuvid import NuvidIE from .nytimes import ( NYTimesArticleIE, NYTimesCookingIE, diff --git a/yt_dlp/extractor/drtuber.py b/yt_dlp/extractor/drtuber.py index 6a1fe9010b..0ff52387cd 100644 --- a/yt_dlp/extractor/drtuber.py +++ b/yt_dlp/extractor/drtuber.py @@ -4,14 +4,18 @@ from .common import InfoExtractor from ..utils import ( NO_DEFAULT, int_or_none, + make_archive_id, parse_duration, + qualities, str_to_int, + strip_or_none, + url_or_none, ) class DrTuberIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P\d+)(?:/(?P[\w-]+))?' - _EMBED_REGEX = [r']+?src=["\'](?P(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)'] + _VALID_URL = r'https?://(?:(?:www|m)\.)?(?Pdrtuber|iceporn|nuvid)\.com/(?:video|embed)/(?P\d+)(?:/(?P[\w-]+))?' + _EMBED_REGEX = [r']+?src=["\'](?P(?:https?:)?//(?:www\.)?(?:drtuber|iceporn|nuvid)\.com/embed/\d+)'] _TESTS = [{ 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', 'md5': '93e680cf2536ad0dfb7e74d94a89facd', @@ -21,14 +25,97 @@ class DrTuberIE(InfoExtractor): 'ext': 'mp4', 'title': 'hot perky blonde naked golf', 'like_count': int, + 'dislike_count': int, 'comment_count': int, - 'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'], + 'categories': ['babe', 'blonde', 'erotic', 'outdoor', 'softcore', 'solo'], 'thumbnail': r're:https?://.*\.jpg$', 'age_limit': 18, + 'duration': 304, + 'description': 'Welcome to this hot porn video named Hot Perky Blonde Naked Golf. DrTuber is the best place for watching xxx movies online!', + '_old_archive_ids': ['nuvid 1740434'], + }, + }, { + 'url': 'https://www.iceporn.com/video/2296835/eva-karera-gets-her-trimmed-cunt-plowed', + 'md5': '88be0402a06e61cd1dfaea69dc8623a7', + 'info_dict': { + 'id': '2296835', + 'display_id': 'eva-karera-gets-her-trimmed-cunt-plowed', + 'title': 'Eva Karera gets her trimmed cunt plowed', + 'description': 're:Eva Karera Gets Her Trimmed Cunt Plowed - Pornstar, Milf, Blowjob, Big Boobs Porn Movies - 2296835', + 'thumbnail': 're:https?://g\\d.iceppsn.com/media/videos/tmb/\\d+/preview/\\d+.jpg', + 'ext': 'mp4', + 'duration': 2178, + 'age_limit': 18, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'categories': ['Big Boobs', 'Blowjob', 'Brunette', 'Doggystyle', 'Hardcore', 'Hd', 'Lingerie', 'Masturbation', 'Milf', 'Pornstar', 'Titjob'], + '_old_archive_ids': ['nuvid 2296835'], + }, + }, { + 'url': 'https://www.nuvid.com/video/6513023/italian-babe', + 'md5': '772d2f8288f3d3c5c45f7a41761c7844', + 'info_dict': { + 'id': '6513023', + 'display_id': 'italian-babe', + 'description': 'Welcome to this hot Italian porn video named Italian Babe. Nuvid is the best place for watching xxx movies online!', + 'ext': 'mp4', + 'title': 'italian babe', + 'duration': 321.0, + 'age_limit': 18, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'thumbnail': r're:https?://.+\.jpg', + 'categories': ['Amateur', 'BBW', 'Brunette', 'Fingering', 'Italian', 'Softcore', 'Solo', 'Webcam'], + '_old_archive_ids': ['nuvid 6513023'], + }, + }, { + 'url': 'https://m.nuvid.com/video/6523263', + 'md5': 'ebd22ce8e47e1d9a4d0756a15c67da52', + 'info_dict': { + 'id': '6523263', + 'display_id': '6523263', + 'ext': 'mp4', + 'title': 'Slut brunette college student anal dorm', + 'description': 'Welcome to this hot Brunette porn video named Slut Brunette College Student Anal Dorm. Nuvid is the best place for watching xxx movies online!', + 'duration': 421.0, + 'age_limit': 18, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'thumbnail': r're:https?://.+\.jpg', + 'thumbnails': list, + 'categories': list, + '_old_archive_ids': ['nuvid 6523263'], + }, + }, { + 'url': 'http://m.nuvid.com/video/6415801/', + 'md5': '638d5ececb138d5753593f751ae3f697', + 'info_dict': { + 'id': '6415801', + 'display_id': '6415801', + 'ext': 'mp4', + 'title': 'My best friend wanted to fuck my wife for a long time', + 'description': 'Welcome to this hot Redhead porn video named My Best Friend Wanted To Fuck My Wife For A Long Time. Nuvid is the best place for watching xxx movies online!', + 'duration': 1882, + 'age_limit': 18, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'thumbnail': r're:https?://.+\.jpg', + 'categories': list, + '_old_archive_ids': ['nuvid 6415801'], }, }, { 'url': 'http://www.drtuber.com/embed/489939', 'only_matching': True, + }, { + 'url': 'https://www.iceporn.com/video/2296835', + 'only_matching': True, + }, { + 'url': 'https://www.nuvid.com/video/6513023', + 'only_matching': True, }, { 'url': 'http://m.drtuber.com/video/3893529/lingerie-blowjob-from-beautiful-teen', 'only_matching': True, @@ -36,69 +123,101 @@ class DrTuberIE(InfoExtractor): def _real_extract(self, url): mobj = self._match_valid_url(url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id + video_id, display_id, domain = mobj.group('id', 'display_id', 'domain') + display_id = display_id or video_id webpage = self._download_webpage( - f'http://www.drtuber.com/video/{video_id}', display_id) + f'http://www.{domain}.com/video/{video_id}', display_id) video_data = self._download_json( - 'http://www.drtuber.com/player_config_json/', video_id, query={ + f'http://www.{domain}.com/player_config_json/', video_id, query={ 'vid': video_id, 'embed': 0, 'aid': 0, 'domain_id': 0, + }, headers={ + 'Accept': 'application/json', }) + QUALITIES = { + 'lq': 360, + 'hq': 720, + '4k': 2160, + } + quality = qualities(tuple(QUALITIES)) + formats = [] for format_id, video_url in video_data['files'].items(): if video_url: formats.append({ 'format_id': format_id, - 'quality': 2 if format_id == 'hq' else 1, + 'quality': quality(format_id), + 'height': QUALITIES.get(format_id), 'url': video_url, }) + self._check_formats(formats, video_id) duration = int_or_none(video_data.get('duration')) or parse_duration( video_data.get('duration_format')) - title = self._html_search_regex( - (r']+class=["\']title[^>]+>([^<]+)', + mobile_webpage = self._download_webpage( + f'http://m.{domain}.com/video/{video_id}', + video_id, 'Downloading mobile video page', fatal=False) or '' + + thumbnails = [ + {'url': thumb_url} for thumb_url in re.findall( + r'\s*', mobile_webpage) + if url_or_none(thumb_url)] + + title = video_data.get('title') or self._html_search_regex( + (r'
]*class=[\'"]caption[\'"][^>]*>

([^<]+)

', + r']+class=["\']title[^>]+>([^<]+)', r'([^<]+)\s*@\s+DrTuber', r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<', r'<p[^>]+class="title_substrate">([^<]+)</p>', r'<title>([^<]+) - \d+'), webpage, 'title') - thumbnail = self._html_search_regex( - r'poster="([^"]+)"', - webpage, 'thumbnail', fatal=False) + if not title: + title = strip_or_none(video_data.get('title') or self._html_search_regex( + (r'''<span\s[^>]*?\btitle\s*=\s*(?P<q>"|'|\b)(?P<title>[^"]+)(?P=q)\s*>''', + r'''<div\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)thumb-holder video(?P=q)>\s*<h5\b[^>]*>(?P<title>[^<]+)</h5''', + r'''<span\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)title_thumb(?P=q)>(?P<title>[^<]+)</span'''), + mobile_webpage, 'title', group='title')) + + if url_or_none(video_data.get('poster')): + thumbnails.append({'url': video_data['poster'], 'preference': 1}) def extract_count(id_, name, default=NO_DEFAULT): return str_to_int(self._html_search_regex( - rf'<span[^>]+(?:class|id)="{id_}"[^>]*>([\d,\.]+)</span>', + rf'<span[^>]+(?:class|id)="{id_}"[^>]*>([\d,\.]+)%?</span>', webpage, f'{name} count', default=default, fatal=False)) - like_count = extract_count('rate_likes', 'like') - dislike_count = extract_count('rate_dislikes', 'dislike', default=None) - comment_count = extract_count('comments_count', 'comment') + percent_rate = extract_count('?:rate_percent|video_rate_rate', 'vote percent rate', default=None) + like_count = extract_count('(?:rate_likes|rate_votes|video_rate_votes)', 'like') + dislike_count = extract_count('(?:rate_dislikes|rate_votes|video_rate_votes)', 'dislike', default=None) + + if percent_rate: + like_count = round(percent_rate * like_count / 100) + dislike_count = round(100 - percent_rate * dislike_count / 100) cats_str = self._search_regex( - r'<div[^>]+class="categories_list">(.+?)</div>', - webpage, 'categories', fatal=False) - categories = [] if not cats_str else re.findall( - r'<a title="([^"]+)"', cats_str) + r'<div[^>]+class="(?:categories_list|data_categories|video-cat)">(.+?)</div>', + webpage, 'categories', fatal=False) or '' return { 'id': video_id, 'display_id': display_id, 'formats': formats, 'title': title, - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'like_count': like_count, 'dislike_count': dislike_count, - 'comment_count': comment_count, - 'categories': categories, + 'comment_count': extract_count('(?:comments_count|comments__counter)', 'comment'), + 'categories': (re.findall(r'<a[^>]+title="([^"]+)"', cats_str) + or re.findall(r'<a[^>]+href="/categories/([^"]+)"', cats_str)), 'age_limit': self._rta_search(webpage), 'duration': duration, + 'description': self._html_search_meta('description', webpage), + '_old_archive_ids': [make_archive_id('Nuvid', video_id)], } diff --git a/yt_dlp/extractor/nuvid.py b/yt_dlp/extractor/nuvid.py deleted file mode 100644 index 99a426b25f..0000000000 --- a/yt_dlp/extractor/nuvid.py +++ /dev/null @@ -1,99 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, - strip_or_none, - traverse_obj, - url_or_none, -) - - -class NuvidIE(InfoExtractor): - _VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'https://www.nuvid.com/video/6513023/italian-babe', - 'md5': '772d2f8288f3d3c5c45f7a41761c7844', - 'info_dict': { - 'id': '6513023', - 'ext': 'mp4', - 'title': 'italian babe', - 'duration': 321.0, - 'age_limit': 18, - 'thumbnail': r're:https?://.+\.jpg', - }, - }, { - 'url': 'https://m.nuvid.com/video/6523263', - 'md5': 'ebd22ce8e47e1d9a4d0756a15c67da52', - 'info_dict': { - 'id': '6523263', - 'ext': 'mp4', - 'title': 'Slut brunette college student anal dorm', - 'duration': 421.0, - 'age_limit': 18, - 'thumbnail': r're:https?://.+\.jpg', - 'thumbnails': list, - }, - }, { - 'url': 'http://m.nuvid.com/video/6415801/', - 'md5': '638d5ececb138d5753593f751ae3f697', - 'info_dict': { - 'id': '6415801', - 'ext': 'mp4', - 'title': 'My best friend wanted to fuck my wife for a long time', - 'duration': 1882, - 'age_limit': 18, - 'thumbnail': r're:https?://.+\.jpg', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - qualities = { - 'lq': '360p', - 'hq': '720p', - } - - json_url = f'https://www.nuvid.com/player_config_json/?vid={video_id}&aid=0&domain_id=0&embed=0&check_speed=0' - video_data = self._download_json( - json_url, video_id, headers={ - 'Accept': 'application/json, text/javascript, */*; q = 0.01', - 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', - }) - - webpage = self._download_webpage( - f'http://m.nuvid.com/video/{video_id}', - video_id, 'Downloading video page', fatal=False) or '' - - title = strip_or_none(video_data.get('title') or self._html_search_regex( - (r'''<span\s[^>]*?\btitle\s*=\s*(?P<q>"|'|\b)(?P<title>[^"]+)(?P=q)\s*>''', - r'''<div\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)thumb-holder video(?P=q)>\s*<h5\b[^>]*>(?P<title>[^<]+)</h5''', - r'''<span\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)title_thumb(?P=q)>(?P<title>[^<]+)</span'''), - webpage, 'title', group='title')) - - formats = [{ - 'url': source, - 'format_id': qualities.get(quality), - 'height': int_or_none(qualities.get(quality)[:-1]), - } for quality, source in video_data.get('files').items() if source] - - self._check_formats(formats, video_id) - - duration = parse_duration(traverse_obj(video_data, 'duration', 'duration_format')) - thumbnails = [ - {'url': thumb_url} for thumb_url in re.findall( - r'<div\s+class\s*=\s*"video-tmb-wrap"\s*>\s*<img\s+src\s*=\s*"([^"]+)"\s*/>', webpage) - if url_or_none(thumb_url)] - if url_or_none(video_data.get('poster')): - thumbnails.append({'url': video_data['poster'], 'preference': 1}) - - return { - 'id': video_id, - 'formats': formats, - 'title': title, - 'thumbnails': thumbnails, - 'duration': duration, - 'age_limit': 18, - }