refactor: combine extractors drtuber, iceporn and nuvid

pull/12631/head
JChris246 1 month ago
parent 9136c8c72e
commit 461afcce2b

@ -839,7 +839,6 @@ from .hypem import HypemIE
from .hypergryph import MonsterSirenHypergryphMusicIE from .hypergryph import MonsterSirenHypergryphMusicIE
from .hytale import HytaleIE from .hytale import HytaleIE
from .icareus import IcareusIE from .icareus import IcareusIE
from .iceporn import IcePornIE
from .ichinanalive import ( from .ichinanalive import (
IchinanaLiveClipIE, IchinanaLiveClipIE,
IchinanaLiveIE, IchinanaLiveIE,
@ -1422,7 +1421,6 @@ from .nuum import (
NuumMediaIE, NuumMediaIE,
NuumTabIE, NuumTabIE,
) )
from .nuvid import NuvidIE
from .nytimes import ( from .nytimes import (
NYTimesArticleIE, NYTimesArticleIE,
NYTimesCookingIE, NYTimesCookingIE,

@ -5,13 +5,15 @@ from ..utils import (
NO_DEFAULT, NO_DEFAULT,
int_or_none, int_or_none,
parse_duration, parse_duration,
strip_or_none,
str_to_int, str_to_int,
url_or_none,
) )
class DrTuberIE(InfoExtractor): class DrTuberIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?' _VALID_URL = r'https?://(?:(?:www|m)\.)?(?P<domain>drtuber|iceporn|nuvid)\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
_EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)'] _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?(?:drtuber|iceporn|nuvid)\.com/embed/\d+)']
_TESTS = [{ _TESTS = [{
'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
'md5': '93e680cf2536ad0dfb7e74d94a89facd', 'md5': '93e680cf2536ad0dfb7e74d94a89facd',
@ -21,14 +23,92 @@ class DrTuberIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'hot perky blonde naked golf', 'title': 'hot perky blonde naked golf',
'like_count': int, 'like_count': int,
'dislike_count': int,
'comment_count': int, 'comment_count': int,
'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'], 'categories': ['babe', 'blonde', 'erotic', 'outdoor', 'softcore', 'solo'],
'thumbnail': r're:https?://.*\.jpg$', 'thumbnail': r're:https?://.*\.jpg$',
'age_limit': 18, 'age_limit': 18,
'duration': 304,
'description': 'Welcome to this hot porn video named Hot Perky Blonde Naked Golf. DrTuber is the best place for watching xxx movies online!'
},
}, {
'url': 'https://www.iceporn.com/video/2296835/eva-karera-gets-her-trimmed-cunt-plowed',
'md5': '88be0402a06e61cd1dfaea69dc8623a7',
'info_dict': {
'id': '2296835',
'display_id': 'eva-karera-gets-her-trimmed-cunt-plowed',
'title': 'Eva Karera gets her trimmed cunt plowed',
'description': 're:Eva Karera Gets Her Trimmed Cunt Plowed - Pornstar, Milf, Blowjob, Big Boobs Porn Movies - 2296835',
'thumbnail': 're:https?://g\\d.iceppsn.com/media/videos/tmb/\\d+/preview/\\d+.jpg',
'ext': 'mp4',
'duration': 2178,
'age_limit': 18,
'like_count': int,
'dislike_count': int,
'comment_count': int,
'categories': ['Big Boobs', 'Blowjob', 'Brunette', 'Doggystyle', 'Hardcore', 'Hd', 'Lingerie', 'Masturbation', 'Milf', 'Pornstar', 'Titjob'],
},
}, {
'url': 'https://www.nuvid.com/video/6513023/italian-babe',
'md5': '772d2f8288f3d3c5c45f7a41761c7844',
'info_dict': {
'id': '6513023',
'display_id': 'italian-babe',
'description': 'Welcome to this hot Italian porn video named Italian Babe. Nuvid is the best place for watching xxx movies online!',
'ext': 'mp4',
'title': 'italian babe',
'duration': 321.0,
'age_limit': 18,
'like_count': int,
'dislike_count': int,
'comment_count': int,
'thumbnail': r're:https?://.+\.jpg',
'categories': ['Amateur', 'BBW', 'Brunette', 'Fingering', 'Italian', 'Softcore', 'Solo', 'Webcam'],
},
}, {
'url': 'https://m.nuvid.com/video/6523263',
'md5': 'ebd22ce8e47e1d9a4d0756a15c67da52',
'info_dict': {
'id': '6523263',
'display_id': '6523263',
'ext': 'mp4',
'title': 'Slut brunette college student anal dorm',
'description': 'Welcome to this hot Brunette porn video named Slut Brunette College Student Anal Dorm. Nuvid is the best place for watching xxx movies online!',
'duration': 421.0,
'age_limit': 18,
'like_count': int,
'dislike_count': int,
'comment_count': int,
'thumbnail': r're:https?://.+\.jpg',
'thumbnails': list,
'categories': list,
},
}, {
'url': 'http://m.nuvid.com/video/6415801/',
'md5': '638d5ececb138d5753593f751ae3f697',
'info_dict': {
'id': '6415801',
'display_id': '6415801',
'ext': 'mp4',
'title': 'My best friend wanted to fuck my wife for a long time',
'description': 'Welcome to this hot Redhead porn video named My Best Friend Wanted To Fuck My Wife For A Long Time. Nuvid is the best place for watching xxx movies online!',
'duration': 1882,
'age_limit': 18,
'like_count': int,
'dislike_count': int,
'comment_count': int,
'thumbnail': r're:https?://.+\.jpg',
'categories': list,
}, },
}, { }, {
'url': 'http://www.drtuber.com/embed/489939', 'url': 'http://www.drtuber.com/embed/489939',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.iceporn.com/video/2296835',
'only_matching': True,
}, {
'url': 'https://www.nuvid.com/video/6513023',
'only_matching': True,
}, { }, {
'url': 'http://m.drtuber.com/video/3893529/lingerie-blowjob-from-beautiful-teen', 'url': 'http://m.drtuber.com/video/3893529/lingerie-blowjob-from-beautiful-teen',
'only_matching': True, 'only_matching': True,
@ -36,69 +116,108 @@ class DrTuberIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id, display_id, domain = mobj.group('id', 'display_id', 'domain')
display_id = mobj.group('display_id') or video_id display_id = display_id or video_id
webpage = self._download_webpage( webpage = self._download_webpage(
f'http://www.drtuber.com/video/{video_id}', display_id) f'http://www.{domain}.com/video/{video_id}', display_id)
video_data = self._download_json( video_data = self._download_json(
'http://www.drtuber.com/player_config_json/', video_id, query={ f'http://www.{domain}.com/player_config_json/', video_id, query={
'vid': video_id, 'vid': video_id,
'embed': 0, 'embed': 0,
'aid': 0, 'aid': 0,
'domain_id': 0, 'domain_id': 0,
}, headers={
'Accept': 'application/json',
}) })
qualities = {
'lq': '360p',
'hq': '720p',
'4k': '2160p',
}
formats = [] formats = []
for format_id, video_url in video_data['files'].items(): for format_id, video_url in video_data['files'].items():
if video_url: if video_url:
formats.append({ formats.append({
'format_id': format_id, 'format_id': format_id,
'quality': 2 if format_id == 'hq' else 1, 'quality': qualities.get(format_id) or format_id,
'height': int_or_none(qualities.get(format_id)[:-1]),
'url': video_url, 'url': video_url,
}) })
self._check_formats(formats, video_id)
duration = int_or_none(video_data.get('duration')) or parse_duration( duration = int_or_none(video_data.get('duration')) or parse_duration(
video_data.get('duration_format')) video_data.get('duration_format'))
title = self._html_search_regex( title = video_data.get('title') or self._html_search_regex(
(r'<h1[^>]+class=["\']title[^>]+>([^<]+)', (r'<div.*class=[\'"]caption[\'"].*?><h2>(.+?)</h2>',
r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
r'<title>([^<]+)\s*@\s+DrTuber', r'<title>([^<]+)\s*@\s+DrTuber',
r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<', r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
r'<p[^>]+class="title_substrate">([^<]+)</p>', r'<p[^>]+class="title_substrate">([^<]+)</p>',
r'<title>([^<]+) - \d+'), r'<title>([^<]+) - \d+'),
webpage, 'title') webpage, 'title')
thumbnail = self._html_search_regex( mobile_webpage = None
r'poster="([^"]+)"', if not title:
webpage, 'thumbnail', fatal=False) mobile_webpage = self._download_webpage(
f'http://m.{domain}.com/video/{video_id}',
video_id, 'Downloading mobile video page', fatal=False) or ''
title = strip_or_none(video_data.get('title') or self._html_search_regex(
(r'''<span\s[^>]*?\btitle\s*=\s*(?P<q>"|'|\b)(?P<title>[^"]+)(?P=q)\s*>''',
r'''<div\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)thumb-holder video(?P=q)>\s*<h5\b[^>]*>(?P<title>[^<]+)</h5''',
r'''<span\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)title_thumb(?P=q)>(?P<title>[^<]+)</span'''),
mobile_webpage, 'title', group='title'))
thumbnails = []
if not mobile_webpage:
mobile_webpage = self._download_webpage(
f'http://m.{domain}.com/video/{video_id}',
video_id, 'Downloading mobile video page', fatal=False) or ''
thumbnails = [
{'url': thumb_url} for thumb_url in re.findall(
r'<div\s+class\s*=\s*"video-tmb-wrap"\s*>\s*<img\s+src\s*=\s*"([^"]+)"\s*/>', mobile_webpage)
if url_or_none(thumb_url)]
if url_or_none(video_data.get('poster')):
thumbnails.append({'url': video_data['poster'], 'preference': 1})
def extract_count(id_, name, default=NO_DEFAULT): def extract_count(id_, name, default=NO_DEFAULT):
return str_to_int(self._html_search_regex( return str_to_int(self._html_search_regex(
rf'<span[^>]+(?:class|id)="{id_}"[^>]*>([\d,\.]+)</span>', rf'<span[^>]+(?:class|id)="{id_}"[^>]*>(?P<{name}>[\d,\.]+)</span>',
webpage, f'{name} count', default=default, fatal=False)) webpage, f'{name} count', default=default, fatal=False, group=name))
like_count = extract_count('rate_likes', 'like') like_count = extract_count('(?:rate_likes|rate_votes|video_rate_votes)', 'like')
dislike_count = extract_count('rate_dislikes', 'dislike', default=None) dislike_count = extract_count('(?:rate_dislikes|rate_votes|video_rate_votes)', 'dislike', default=None)
comment_count = extract_count('comments_count', 'comment') comment_count = extract_count('(?:comments_count|comments__counter)', 'comment')
cats_str = self._search_regex( cats_str = self._search_regex(
r'<div[^>]+class="categories_list">(.+?)</div>', r'<div[^>]+class="(?:categories_list|data_categories|video-cat)">(.+?)</div>',
webpage, 'categories', fatal=False) webpage, 'categories', fatal=False)
categories = [] if not cats_str else re.findall(
r'<a title="([^"]+)"', cats_str) categories = None
if cats_str:
for pattern in [r'<a[^>]+title="([^"]+)"', r'<a[^>]+href="/categories/([^"]+)"']:
categories = re.findall(pattern, cats_str)
if categories:
break
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'formats': formats, 'formats': formats,
'title': title, 'title': title,
'thumbnail': thumbnail, 'thumbnails': thumbnails,
'like_count': like_count, 'like_count': like_count,
'dislike_count': dislike_count, 'dislike_count': dislike_count,
'comment_count': comment_count, 'comment_count': comment_count,
'categories': categories, 'categories': categories,
'age_limit': self._rta_search(webpage), 'age_limit': self._rta_search(webpage),
'duration': duration, 'duration': duration,
'description': self._html_search_meta('description', webpage),
} }

@ -1,50 +0,0 @@
from .common import InfoExtractor
from ..utils import int_or_none, parse_duration
class IcePornIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?iceporn\.com/video/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)'
_TESTS = [{
'url': 'https://www.iceporn.com/video/2296835/eva-karera-gets-her-trimmed-cunt-plowed',
'md5': '844482e1c3c45831859748550a1b8dcf',
'info_dict': {
'id': '2296835',
'display_id': 'eva-karera-gets-her-trimmed-cunt-plowed',
'title': 'Eva Karera gets her trimmed cunt plowed',
'description': 're:Eva Karera Gets Her Trimmed Cunt Plowed - Pornstar, Milf, Blowjob, Big Boobs Porn Movies - 2296835',
'thumbnail': 're:https?://g\\d.iceppsn.com/media/videos/tmb/\\d+/preview/\\d+.jpg',
'ext': 'mp4',
'duration': 2178,
},
}]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
webpage = self._download_webpage(url, video_id)
video_data = self._download_json('https://www.iceporn.com/player_config_json/', video_id, query={
'vid': video_id, 'aid': 0, 'domain_id': 0, 'embed': 0, 'ref': 'null', 'check_speed': 0,
}, headers={
'Accept': 'application/json',
})
formats = []
for quality_id, video_url in video_data.get('files', {}).items():
if video_url:
formats.append({
'url': video_url,
'format_id': quality_id,
})
return {
'id': video_id,
'display_id': display_id,
'title': video_data.get('title')
or self._html_search_regex(r'<div.*class=[\'"]caption[\'"].*?><h2>(.+?)</h2>',
webpage, 'title'),
'formats': formats,
'thumbnail': video_data.get('poster'),
'duration': int_or_none(video_data.get('duration'))
or parse_duration(video_data.get('duration_format')),
'description': self._html_search_meta('description', webpage),
}

@ -1,99 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
strip_or_none,
traverse_obj,
url_or_none,
)
class NuvidIE(InfoExtractor):
_VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.nuvid.com/video/6513023/italian-babe',
'md5': '772d2f8288f3d3c5c45f7a41761c7844',
'info_dict': {
'id': '6513023',
'ext': 'mp4',
'title': 'italian babe',
'duration': 321.0,
'age_limit': 18,
'thumbnail': r're:https?://.+\.jpg',
},
}, {
'url': 'https://m.nuvid.com/video/6523263',
'md5': 'ebd22ce8e47e1d9a4d0756a15c67da52',
'info_dict': {
'id': '6523263',
'ext': 'mp4',
'title': 'Slut brunette college student anal dorm',
'duration': 421.0,
'age_limit': 18,
'thumbnail': r're:https?://.+\.jpg',
'thumbnails': list,
},
}, {
'url': 'http://m.nuvid.com/video/6415801/',
'md5': '638d5ececb138d5753593f751ae3f697',
'info_dict': {
'id': '6415801',
'ext': 'mp4',
'title': 'My best friend wanted to fuck my wife for a long time',
'duration': 1882,
'age_limit': 18,
'thumbnail': r're:https?://.+\.jpg',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
qualities = {
'lq': '360p',
'hq': '720p',
}
json_url = f'https://www.nuvid.com/player_config_json/?vid={video_id}&aid=0&domain_id=0&embed=0&check_speed=0'
video_data = self._download_json(
json_url, video_id, headers={
'Accept': 'application/json, text/javascript, */*; q = 0.01',
'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
})
webpage = self._download_webpage(
f'http://m.nuvid.com/video/{video_id}',
video_id, 'Downloading video page', fatal=False) or ''
title = strip_or_none(video_data.get('title') or self._html_search_regex(
(r'''<span\s[^>]*?\btitle\s*=\s*(?P<q>"|'|\b)(?P<title>[^"]+)(?P=q)\s*>''',
r'''<div\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)thumb-holder video(?P=q)>\s*<h5\b[^>]*>(?P<title>[^<]+)</h5''',
r'''<span\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)title_thumb(?P=q)>(?P<title>[^<]+)</span'''),
webpage, 'title', group='title'))
formats = [{
'url': source,
'format_id': qualities.get(quality),
'height': int_or_none(qualities.get(quality)[:-1]),
} for quality, source in video_data.get('files').items() if source]
self._check_formats(formats, video_id)
duration = parse_duration(traverse_obj(video_data, 'duration', 'duration_format'))
thumbnails = [
{'url': thumb_url} for thumb_url in re.findall(
r'<div\s+class\s*=\s*"video-tmb-wrap"\s*>\s*<img\s+src\s*=\s*"([^"]+)"\s*/>', webpage)
if url_or_none(thumb_url)]
if url_or_none(video_data.get('poster')):
thumbnails.append({'url': video_data['poster'], 'preference': 1})
return {
'id': video_id,
'formats': formats,
'title': title,
'thumbnails': thumbnails,
'duration': duration,
'age_limit': 18,
}
Loading…
Cancel
Save