mirror of https://github.com/yt-dlp/yt-dlp
Merge remote-tracking branch 'origin/master' into yt-live-from-start-range
commit
444e02ef3b
@ -0,0 +1,318 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
classproperty,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
urljoin
|
||||
)
|
||||
|
||||
|
||||
class BrainPOPBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'brainpop'
|
||||
_ORIGIN = '' # So that _VALID_URL doesn't crash
|
||||
_LOGIN_ERRORS = {
|
||||
1502: 'The username and password you entered did not match.', # LOGIN_FAILED
|
||||
1503: 'Payment method is expired.', # LOGIN_FAILED_ACCOUNT_NOT_ACTIVE
|
||||
1506: 'Your BrainPOP plan has expired.', # LOGIN_FAILED_ACCOUNT_EXPIRED
|
||||
1507: 'Terms not accepted.', # LOGIN_FAILED_TERMS_NOT_ACCEPTED
|
||||
1508: 'Account not activated.', # LOGIN_FAILED_SUBSCRIPTION_NOT_ACTIVE
|
||||
1512: 'The maximum number of devices permitted are logged in with your account right now.', # LOGIN_FAILED_LOGIN_LIMIT_REACHED
|
||||
1513: 'You are trying to access your account from outside of its allowed IP range.', # LOGIN_FAILED_INVALID_IP
|
||||
1514: 'Individual accounts are not included in your plan. Try again with your shared username and password.', # LOGIN_FAILED_MBP_DISABLED
|
||||
1515: 'Account not activated.', # LOGIN_FAILED_TEACHER_NOT_ACTIVE
|
||||
1523: 'That username and password won\'t work on this BrainPOP site.', # LOGIN_FAILED_NO_ACCESS
|
||||
1524: 'You\'ll need to join a class before you can login.', # LOGIN_FAILED_STUDENT_NO_PERIOD
|
||||
1526: 'Your account is locked. Reset your password, or ask a teacher or administrator for help.', # LOGIN_FAILED_ACCOUNT_LOCKED
|
||||
}
|
||||
|
||||
@classproperty
|
||||
def _VALID_URL(cls):
|
||||
root = re.escape(cls._ORIGIN).replace(r'https:', r'https?:').replace(r'www\.', r'(?:www\.)?')
|
||||
return rf'{root}/(?P<slug>[^/]+/[^/]+/(?P<id>[^/?#&]+))'
|
||||
|
||||
def _assemble_formats(self, slug, format_id, display_id, token='', extra_fields={}):
|
||||
formats = []
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'{urljoin(self._HLS_URL, slug)}.m3u8?{token}',
|
||||
display_id, 'mp4', m3u8_id=f'{format_id}-hls', fatal=False)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': f'{urljoin(self._VIDEO_URL, slug)}?{token}',
|
||||
})
|
||||
for f in formats:
|
||||
f.update(extra_fields)
|
||||
return formats
|
||||
|
||||
def _extract_adaptive_formats(self, data, token, display_id, key_format='%s', extra_fields={}):
|
||||
formats = []
|
||||
additional_key_formats = {
|
||||
'%s': {},
|
||||
'ad_%s': {
|
||||
'format_note': 'Audio description',
|
||||
'source_preference': -2
|
||||
}
|
||||
}
|
||||
for additional_key_format, additional_key_fields in additional_key_formats.items():
|
||||
for key_quality, key_index in enumerate(('high', 'low')):
|
||||
full_key_index = additional_key_format % (key_format % key_index)
|
||||
if data.get(full_key_index):
|
||||
formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, {
|
||||
'quality': -1 - key_quality,
|
||||
**additional_key_fields,
|
||||
**extra_fields
|
||||
}))
|
||||
return formats
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_res = self._download_json(
|
||||
'https://api.brainpop.com/api/login', None,
|
||||
data=json.dumps({'username': username, 'password': password}).encode(),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': self._ORIGIN
|
||||
}, note='Logging in', errnote='Unable to log in', expected_status=400)
|
||||
status_code = int_or_none(login_res['status_code'])
|
||||
if status_code != 1505:
|
||||
self.report_warning(
|
||||
f'Unable to login: {self._LOGIN_ERRORS.get(status_code) or login_res.get("message")}'
|
||||
or f'Got status code {status_code}')
|
||||
|
||||
|
||||
class BrainPOPIE(BrainPOPBaseIE):
|
||||
_ORIGIN = 'https://www.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||
_HLS_URL = 'https://hls.brainpop.com'
|
||||
_CDN_URL = 'https://cdn.brainpop.com'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.brainpop.com/health/conflictresolution/martinlutherkingjr/movie?ref=null',
|
||||
'md5': '3ead374233ae74c7f1b0029a01c972f0',
|
||||
'info_dict': {
|
||||
'id': '1f3259fa457292b4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Martin Luther King, Jr.',
|
||||
'display_id': 'martinlutherkingjr',
|
||||
'description': 'md5:f403dbb2bf3ccc7cf4c59d9e43e3c349',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.brainpop.com/science/space/bigbang/',
|
||||
'md5': '9a1ff0e77444dd9e437354eb669c87ec',
|
||||
'info_dict': {
|
||||
'id': 'acae52cd48c99acf',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Bang',
|
||||
'display_id': 'bigbang',
|
||||
'description': 'md5:3e53b766b0f116f631b13f4cae185d38',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, display_id = self._match_valid_url(url).group('slug', 'id')
|
||||
movie_data = self._download_json(
|
||||
f'https://api.brainpop.com/api/content/published/bp/en/{slug}/movie?full=1', display_id,
|
||||
'Downloading movie data JSON', 'Unable to download movie data')['data']
|
||||
topic_data = traverse_obj(self._download_json(
|
||||
f'https://api.brainpop.com/api/content/published/bp/en/{slug}?full=1', display_id,
|
||||
'Downloading topic data JSON', 'Unable to download topic data', fatal=False),
|
||||
('data', 'topic'), expected_type=dict) or movie_data['topic']
|
||||
|
||||
if not traverse_obj(movie_data, ('access', 'allow')):
|
||||
reason = traverse_obj(movie_data, ('access', 'reason'))
|
||||
if 'logged' in reason:
|
||||
self.raise_login_required(reason, metadata_available=True)
|
||||
else:
|
||||
self.raise_no_formats(reason, video_id=display_id)
|
||||
movie_feature = movie_data['feature']
|
||||
movie_feature_data = movie_feature['data']
|
||||
|
||||
formats, subtitles = [], {}
|
||||
formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', {
|
||||
'language': movie_feature.get('language') or 'en',
|
||||
'language_preference': 10
|
||||
}))
|
||||
for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items():
|
||||
formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', {
|
||||
'language': lang,
|
||||
'language_preference': -10
|
||||
}))
|
||||
|
||||
# TODO: Do localization fields also have subtitles?
|
||||
for name, url in movie_feature_data.items():
|
||||
lang = self._search_regex(
|
||||
r'^subtitles_(?P<lang>\w+)$', name, 'subtitle metadata', default=None)
|
||||
if lang and url:
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': urljoin(self._CDN_URL, url)
|
||||
})
|
||||
|
||||
return {
|
||||
'id': topic_data['topic_id'],
|
||||
'display_id': display_id,
|
||||
'title': topic_data.get('name'),
|
||||
'description': topic_data.get('synopsis'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class BrainPOPLegacyBaseIE(BrainPOPBaseIE):
|
||||
def _parse_js_topic_data(self, topic_data, display_id, token):
|
||||
movie_data = topic_data['movies']
|
||||
# TODO: Are there non-burned subtitles?
|
||||
formats = self._extract_adaptive_formats(movie_data, token, display_id)
|
||||
|
||||
return {
|
||||
'id': topic_data['EntryID'],
|
||||
'display_id': display_id,
|
||||
'title': topic_data.get('name'),
|
||||
'alt_title': topic_data.get('title'),
|
||||
'description': topic_data.get('synopsis'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, display_id = self._match_valid_url(url).group('slug', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
topic_data = self._search_json(
|
||||
r'var\s+content\s*=\s*', webpage, 'content data',
|
||||
display_id, end_pattern=';')['category']['unit']['topic']
|
||||
token = self._search_regex(r'ec_token\s*:\s*[\'"]([^\'"]+)', webpage, 'video token')
|
||||
return self._parse_js_topic_data(topic_data, display_id, token)
|
||||
|
||||
|
||||
class BrainPOPJrIE(BrainPOPLegacyBaseIE):
|
||||
_ORIGIN = 'https://jr.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos-jr.brainpop.com'
|
||||
_HLS_URL = 'https://hls-jr.brainpop.com'
|
||||
_CDN_URL = 'https://cdn-jr.brainpop.com'
|
||||
_TESTS = [{
|
||||
'url': 'https://jr.brainpop.com/health/feelingsandsel/emotions/',
|
||||
'md5': '04e0561bb21770f305a0ce6cf0d869ab',
|
||||
'info_dict': {
|
||||
'id': '347',
|
||||
'ext': 'mp4',
|
||||
'title': 'Emotions',
|
||||
'display_id': 'emotions',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://jr.brainpop.com/science/habitats/arctichabitats/',
|
||||
'md5': 'b0ed063bbd1910df00220ee29340f5d6',
|
||||
'info_dict': {
|
||||
'id': '29',
|
||||
'ext': 'mp4',
|
||||
'title': 'Arctic Habitats',
|
||||
'display_id': 'arctichabitats',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
|
||||
class BrainPOPELLIE(BrainPOPLegacyBaseIE):
|
||||
_ORIGIN = 'https://ell.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos-esl.brainpop.com'
|
||||
_HLS_URL = 'https://hls-esl.brainpop.com'
|
||||
_CDN_URL = 'https://cdn-esl.brainpop.com'
|
||||
_TESTS = [{
|
||||
'url': 'https://ell.brainpop.com/level1/unit1/lesson1/',
|
||||
'md5': 'a2012700cfb774acb7ad2e8834eed0d0',
|
||||
'info_dict': {
|
||||
'id': '1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lesson 1',
|
||||
'display_id': 'lesson1',
|
||||
'alt_title': 'Personal Pronouns',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ell.brainpop.com/level3/unit6/lesson5/',
|
||||
'md5': 'be19c8292c87b24aacfb5fda2f3f8363',
|
||||
'info_dict': {
|
||||
'id': '101',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lesson 5',
|
||||
'display_id': 'lesson5',
|
||||
'alt_title': 'Review: Unit 6',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
|
||||
class BrainPOPEspIE(BrainPOPLegacyBaseIE):
|
||||
IE_DESC = 'BrainPOP Español'
|
||||
_ORIGIN = 'https://esp.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||
_HLS_URL = 'https://hls.brainpop.com'
|
||||
_CDN_URL = 'https://cdn.brainpop.com/mx'
|
||||
_TESTS = [{
|
||||
'url': 'https://esp.brainpop.com/ciencia/la_diversidad_de_la_vida/ecosistemas/',
|
||||
'md5': 'cb3f062db2b3c5240ddfcfde7108f8c9',
|
||||
'info_dict': {
|
||||
'id': '3893',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ecosistemas',
|
||||
'display_id': 'ecosistemas',
|
||||
'description': 'md5:80fc55b07e241f8c8f2aa8d74deaf3c3',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://esp.brainpop.com/espanol/la_escritura/emily_dickinson/',
|
||||
'md5': '98c1b9559e0e33777209c425cda7dac4',
|
||||
'info_dict': {
|
||||
'id': '7146',
|
||||
'ext': 'mp4',
|
||||
'title': 'Emily Dickinson',
|
||||
'display_id': 'emily_dickinson',
|
||||
'description': 'md5:2795ad87b1d239c9711c1e92ab5a978b',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
|
||||
class BrainPOPFrIE(BrainPOPLegacyBaseIE):
|
||||
IE_DESC = 'BrainPOP Français'
|
||||
_ORIGIN = 'https://fr.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||
_HLS_URL = 'https://hls.brainpop.com'
|
||||
_CDN_URL = 'https://cdn.brainpop.com/fr'
|
||||
_TESTS = [{
|
||||
'url': 'https://fr.brainpop.com/sciencesdelaterre/energie/sourcesdenergie/',
|
||||
'md5': '97e7f48af8af93f8a2be11709f239371',
|
||||
'info_dict': {
|
||||
'id': '1651',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sources d\'énergie',
|
||||
'display_id': 'sourcesdenergie',
|
||||
'description': 'md5:7eece350f019a21ef9f64d4088b2d857',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://fr.brainpop.com/francais/ecrire/plagiat/',
|
||||
'md5': '0cf2b4f89804d0dd4a360a51310d445a',
|
||||
'info_dict': {
|
||||
'id': '5803',
|
||||
'ext': 'mp4',
|
||||
'title': 'Plagiat',
|
||||
'display_id': 'plagiat',
|
||||
'description': 'md5:4496d87127ace28e8b1eda116e77cd2b',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
|
||||
class BrainPOPIlIE(BrainPOPLegacyBaseIE):
|
||||
IE_DESC = 'BrainPOP Hebrew'
|
||||
_ORIGIN = 'https://il.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||
_HLS_URL = 'https://hls.brainpop.com'
|
||||
_CDN_URL = 'https://cdn.brainpop.com/he'
|
||||
_TESTS = [{
|
||||
'url': 'https://il.brainpop.com/category_9/subcategory_150/subjects_3782/',
|
||||
'md5': '9e4ea9dc60ecd385a6e5ca12ccf31641',
|
||||
'info_dict': {
|
||||
'id': '3782',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:e993632fcda0545d9205602ec314ad67',
|
||||
'display_id': 'subjects_3782',
|
||||
'description': 'md5:4cc084a8012beb01f037724423a4d4ed',
|
||||
},
|
||||
}]
|
@ -1,117 +1,185 @@
|
||||
import re
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
try_get,
|
||||
dict_get,
|
||||
get_element_html_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BravoTVIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
|
||||
'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
|
||||
'info_dict': {
|
||||
'id': 'epL0pmK1kQlT',
|
||||
'id': '3923059',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Top Chef Season 16 Winner Is...',
|
||||
'description': 'Find out who takes the title of Top Chef!',
|
||||
'uploader': 'NBCU-BRAV',
|
||||
'upload_date': '20190314',
|
||||
'timestamp': 1552591860,
|
||||
'season_number': 16,
|
||||
'episode_number': 15,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'The Top Chef Season 16 Winner Is...',
|
||||
'duration': 190.0,
|
||||
}
|
||||
'duration': 190.357,
|
||||
'season': 'Season 16',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
'only_matching': True,
|
||||
'url': 'https://www.bravotv.com/top-chef/season-20/episode-1/london-calling',
|
||||
'info_dict': {
|
||||
'id': '9000234570',
|
||||
'ext': 'mp4',
|
||||
'title': 'London Calling',
|
||||
'description': 'md5:5af95a8cbac1856bd10e7562f86bb759',
|
||||
'upload_date': '20230310',
|
||||
'timestamp': 1678410000,
|
||||
'season_number': 20,
|
||||
'episode_number': 1,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'London Calling',
|
||||
'duration': 3266.03,
|
||||
'season': 'Season 20',
|
||||
'chapters': 'count:7',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-1/closing-night',
|
||||
'info_dict': {
|
||||
'id': '3692045',
|
||||
'ext': 'mp4',
|
||||
'title': 'Closing Night',
|
||||
'description': 'md5:3170065c5c2f19548d72a4cbc254af63',
|
||||
'upload_date': '20180401',
|
||||
'timestamp': 1522623600,
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'series': 'In Ice Cold Blood',
|
||||
'episode': 'Closing Night',
|
||||
'duration': 2629.051,
|
||||
'season': 'Season 1',
|
||||
'chapters': 'count:6',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
|
||||
'info_dict': {
|
||||
'id': '3974019',
|
||||
'ext': 'mp4',
|
||||
'title': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
|
||||
'description': 'md5:f9d638dd6946a1c1c0533a9c6100eae5',
|
||||
'upload_date': '20190617',
|
||||
'timestamp': 1560790800,
|
||||
'season_number': 2,
|
||||
'episode_number': 16,
|
||||
'series': 'In Ice Cold Blood',
|
||||
'episode': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
|
||||
'duration': 68.235,
|
||||
'season': 'Season 2',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, display_id = self._match_valid_url(url).groups()
|
||||
site, display_id = self._match_valid_url(url).group('site', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
settings = self._parse_json(self._search_regex(
|
||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
|
||||
display_id)
|
||||
info = {}
|
||||
settings = self._search_json(
|
||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>', webpage, 'settings', display_id)
|
||||
tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '')
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
'formats': 'm3u,mpeg4',
|
||||
}
|
||||
account_pid, release_pid = [None] * 2
|
||||
tve = settings.get('ls_tve')
|
||||
|
||||
if tve:
|
||||
query['manifest'] = 'm3u'
|
||||
mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage)
|
||||
if mobj:
|
||||
account_pid, tp_path = mobj.groups()
|
||||
release_pid = tp_path.strip('/').split('/')[-1]
|
||||
else:
|
||||
account_pid = 'HNK2IC'
|
||||
tp_path = release_pid = tve['release_pid']
|
||||
if tve.get('entitlement') == 'auth':
|
||||
adobe_pass = settings.get('tve_adobe_auth', {})
|
||||
if site == 'bravotv':
|
||||
site = 'bravo'
|
||||
account_pid = tve.get('data-mpx-media-account-pid') or 'HNK2IC'
|
||||
account_id = tve['data-mpx-media-account-id']
|
||||
metadata = self._parse_json(
|
||||
tve.get('data-normalized-video', ''), display_id, fatal=False, transform_source=unescapeHTML)
|
||||
video_id = tve.get('data-guid') or metadata['guid']
|
||||
if tve.get('data-entitlement') == 'auth':
|
||||
auth = traverse_obj(settings, ('tve_adobe_auth', {dict})) or {}
|
||||
site = remove_end(site, 'tv')
|
||||
release_pid = tve['data-release-pid']
|
||||
resource = self._get_mvpd_resource(
|
||||
adobe_pass.get('adobePassResourceId') or site,
|
||||
tve['title'], release_pid, tve.get('rating'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, release_pid,
|
||||
adobe_pass.get('adobePassRequestorId') or site, resource)
|
||||
else:
|
||||
shared_playlist = settings['ls_playlist']
|
||||
account_pid = shared_playlist['account_pid']
|
||||
metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']]
|
||||
tp_path = release_pid = metadata.get('release_pid')
|
||||
if not release_pid:
|
||||
release_pid = metadata['guid']
|
||||
tp_path = 'media/guid/2140479951/' + release_pid
|
||||
info.update({
|
||||
'title': metadata['title'],
|
||||
'description': metadata.get('description'),
|
||||
'season_number': int_or_none(metadata.get('season_num')),
|
||||
'episode_number': int_or_none(metadata.get('episode_num')),
|
||||
})
|
||||
query['switch'] = 'progressive'
|
||||
tve.get('data-adobe-pass-resource-id') or auth.get('adobePassResourceId') or site,
|
||||
tve['data-title'], release_pid, tve.get('data-rating'))
|
||||
query.update({
|
||||
'switch': 'HLSServiceSecure',
|
||||
'auth': self._extract_mvpd_auth(
|
||||
url, release_pid, auth.get('adobePassRequestorId') or site, resource),
|
||||
})
|
||||
|
||||
tp_url = 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path)
|
||||
else:
|
||||
ls_playlist = traverse_obj(settings, ('ls_playlist', ..., {dict}), get_all=False) or {}
|
||||
account_pid = ls_playlist.get('mpxMediaAccountPid') or 'PHSl-B'
|
||||
account_id = ls_playlist['mpxMediaAccountId']
|
||||
video_id = ls_playlist['defaultGuid']
|
||||
metadata = traverse_obj(
|
||||
ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, {dict}), get_all=False)
|
||||
|
||||
tp_url = f'https://link.theplatform.com/s/{account_pid}/media/guid/{account_id}/{video_id}'
|
||||
tp_metadata = self._download_json(
|
||||
update_url_query(tp_url, {'format': 'preview'}),
|
||||
display_id, fatal=False)
|
||||
if tp_metadata:
|
||||
info.update({
|
||||
'title': tp_metadata.get('title'),
|
||||
'description': tp_metadata.get('description'),
|
||||
'duration': float_or_none(tp_metadata.get('duration'), 1000),
|
||||
'season_number': int_or_none(
|
||||
dict_get(tp_metadata, ('pl1$seasonNumber', 'nbcu$seasonNumber'))),
|
||||
'episode_number': int_or_none(
|
||||
dict_get(tp_metadata, ('pl1$episodeNumber', 'nbcu$episodeNumber'))),
|
||||
# For some reason the series is sometimes wrapped into a single element array.
|
||||
'series': try_get(
|
||||
dict_get(tp_metadata, ('pl1$show', 'nbcu$show')),
|
||||
lambda x: x[0] if isinstance(x, list) else x,
|
||||
expected_type=str),
|
||||
'episode': dict_get(
|
||||
tp_metadata, ('pl1$episodeName', 'nbcu$episodeName', 'title')),
|
||||
})
|
||||
update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
|
||||
|
||||
seconds_or_none = lambda x: float_or_none(x, 1000)
|
||||
chapters = traverse_obj(tp_metadata, ('chapters', ..., {
|
||||
'start_time': ('startTime', {seconds_or_none}),
|
||||
'end_time': ('endTime', {seconds_or_none}),
|
||||
}))
|
||||
# prune pointless single chapters that span the entire duration from short videos
|
||||
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
|
||||
chapters = None
|
||||
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'id': release_pid,
|
||||
'url': smuggle_url(update_url_query(tp_url, query), {'force_smil_url': True}),
|
||||
'ie_key': 'ThePlatform',
|
||||
})
|
||||
return info
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
update_url_query(f'{tp_url}/stream.m3u8', query), video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': chapters,
|
||||
**merge_dicts(traverse_obj(tp_metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {seconds_or_none}),
|
||||
'timestamp': ('pubDate', {seconds_or_none}),
|
||||
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
|
||||
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
|
||||
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),
|
||||
'episode': (('title', 'pl1$episodeNumber', 'nbcu$episodeNumber'), {str_or_none}),
|
||||
'age_limit': ('ratings', ..., 'rating', {parse_age_limit}),
|
||||
}, get_all=False), traverse_obj(metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('durationInSeconds', {int_or_none}),
|
||||
'timestamp': ('airDate', {unified_timestamp}),
|
||||
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'episode': 'episodeTitle',
|
||||
'series': 'show',
|
||||
}))
|
||||
}
|
||||
|
@ -0,0 +1,61 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ClipchampIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU',
|
||||
'info_dict': {
|
||||
'id': 'gRXZ4ZhdDaU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Untitled video',
|
||||
'uploader': 'Alexander Schwartz',
|
||||
'timestamp': 1680805580,
|
||||
'upload_date': '20230406',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
_STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
|
||||
_STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']
|
||||
|
||||
storage_location = data.get('storage_location')
|
||||
if storage_location != 'cf_stream':
|
||||
raise ExtractorError(f'Unsupported clip storage location "{storage_location}"')
|
||||
|
||||
path = data['download_url']
|
||||
iframe = self._download_webpage(
|
||||
f'https://iframe.cloudflarestream.com/{path}', video_id, 'Downloading player iframe')
|
||||
subdomain = self._search_regex(
|
||||
r'\bcustomer-domain-prefix=["\']([\w-]+)["\']', iframe,
|
||||
'subdomain', fatal=False) or 'customer-2ut9yn3y6fta1yxe'
|
||||
|
||||
formats = self._extract_mpd_formats(
|
||||
self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id,
|
||||
query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash')
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4',
|
||||
query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), {str}))) or None,
|
||||
**traverse_obj(data, {
|
||||
'title': ('project', 'project_name', {str}),
|
||||
'timestamp': ('created_at', {unified_timestamp}),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
}),
|
||||
}
|
@ -0,0 +1,192 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DLFBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?deutschlandfunk\.de/'
|
||||
_BUTTON_REGEX = r'(<button[^>]+alt="Anhören"[^>]+data-audio-diraid[^>]*>)'
|
||||
|
||||
def _parse_button_attrs(self, button, audio_id=None):
|
||||
attrs = extract_attributes(button)
|
||||
audio_id = audio_id or attrs['data-audio-diraid']
|
||||
|
||||
url = traverse_obj(
|
||||
attrs, 'data-audio-download-src', 'data-audio', 'data-audioreference',
|
||||
'data-audio-src', expected_type=url_or_none)
|
||||
ext = determine_ext(url)
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'extractor_key': DLFIE.ie_key(),
|
||||
'extractor': DLFIE.IE_NAME,
|
||||
**traverse_obj(attrs, {
|
||||
'title': (('data-audiotitle', 'data-audio-title', 'data-audio-download-tracking-title'), {str}),
|
||||
'duration': (('data-audioduration', 'data-audio-duration'), {int_or_none}),
|
||||
'thumbnail': ('data-audioimage', {url_or_none}),
|
||||
'uploader': 'data-audio-producer',
|
||||
'series': 'data-audio-series',
|
||||
'channel': 'data-audio-origin-site-name',
|
||||
'webpage_url': ('data-audio-download-tracking-path', {url_or_none}),
|
||||
}, get_all=False),
|
||||
'formats': (self._extract_m3u8_formats(url, audio_id, fatal=False)
|
||||
if ext == 'm3u8' else [{'url': url, 'ext': ext, 'vcodec': 'none'}])
|
||||
}
|
||||
|
||||
|
||||
class DLFIE(DLFBaseIE):
|
||||
IE_NAME = 'dlf'
|
||||
_VALID_URL = DLFBaseIE._VALID_URL_BASE + r'[\w-]+-dlf-(?P<id>[\da-f]{8})-100\.html'
|
||||
_TESTS = [
|
||||
# Audio as an HLS stream
|
||||
{
|
||||
'url': 'https://www.deutschlandfunk.de/tanz-der-saiteninstrumente-das-wild-strings-trio-aus-slowenien-dlf-03a3eb19-100.html',
|
||||
'info_dict': {
|
||||
'id': '03a3eb19',
|
||||
'title': r're:Tanz der Saiteninstrumente [-/] Das Wild Strings Trio aus Slowenien',
|
||||
'ext': 'm4a',
|
||||
'duration': 3298,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'On Stage',
|
||||
'channel': 'deutschlandfunk'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8'
|
||||
},
|
||||
'skip': 'This webpage no longer exists'
|
||||
}, {
|
||||
'url': 'https://www.deutschlandfunk.de/russische-athleten-kehren-zurueck-auf-die-sportbuehne-ein-gefaehrlicher-tueroeffner-dlf-d9cc1856-100.html',
|
||||
'info_dict': {
|
||||
'id': 'd9cc1856',
|
||||
'title': 'Russische Athleten kehren zurück auf die Sportbühne: Ein gefährlicher Türöffner',
|
||||
'ext': 'mp3',
|
||||
'duration': 291,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Kommentare und Themen der Woche',
|
||||
'channel': 'deutschlandfunk'
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, audio_id)
|
||||
|
||||
return self._parse_button_attrs(
|
||||
self._search_regex(self._BUTTON_REGEX, webpage, 'button'), audio_id)
|
||||
|
||||
|
||||
class DLFCorpusIE(DLFBaseIE):
|
||||
IE_NAME = 'dlf:corpus'
|
||||
IE_DESC = 'DLF Multi-feed Archives'
|
||||
_VALID_URL = DLFBaseIE._VALID_URL_BASE + r'(?P<id>(?![\w-]+-dlf-[\da-f]{8})[\w-]+-\d+)\.html'
|
||||
_TESTS = [
|
||||
# Recorded news broadcast with referrals to related broadcasts
|
||||
{
|
||||
'url': 'https://www.deutschlandfunk.de/fechten-russland-belarus-ukraine-protest-100.html',
|
||||
'info_dict': {
|
||||
'id': 'fechten-russland-belarus-ukraine-protest-100',
|
||||
'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
|
||||
'description': 'md5:91340aab29c71aa7518ad5be13d1e8ad'
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '1fc5d64a',
|
||||
'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
|
||||
'ext': 'mp3',
|
||||
'duration': 252,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/aad16241-6b76-4a09-958b-96d0ee1d6f57/512x512.jpg?t=1679480020313',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Sport',
|
||||
'channel': 'deutschlandfunk'
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '2ada145f',
|
||||
'title': r're:(?:Sportpolitik / )?Fechtverband votiert für Rückkehr russischer Athleten',
|
||||
'ext': 'mp3',
|
||||
'duration': 336,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/FILE_93982766f7317df30409b8a184ac044a/512x512.jpg?t=1678547581005',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Deutschlandfunk Nova',
|
||||
'channel': 'deutschlandfunk-nova'
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '5e55e8c9',
|
||||
'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
|
||||
'ext': 'mp3',
|
||||
'duration': 187,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Sport am Samstag',
|
||||
'channel': 'deutschlandfunk'
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '47e1a096',
|
||||
'title': r're:Rückkehr Russlands im Fechten [-/] "Fassungslos, dass es einfach so passiert ist"',
|
||||
'ext': 'mp3',
|
||||
'duration': 602,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/da4c494a-21cc-48b4-9cc7-40e09fd442c2/512x512.jpg?t=1678562155770',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Sport am Samstag',
|
||||
'channel': 'deutschlandfunk'
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '5e55e8c9',
|
||||
'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
|
||||
'ext': 'mp3',
|
||||
'duration': 187,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Sport am Samstag',
|
||||
'channel': 'deutschlandfunk'
|
||||
}
|
||||
}]
|
||||
},
|
||||
# Podcast feed with tag buttons, playlist count fluctuates
|
||||
{
|
||||
'url': 'https://www.deutschlandfunk.de/kommentare-und-themen-der-woche-100.html',
|
||||
'info_dict': {
|
||||
'id': 'kommentare-und-themen-der-woche-100',
|
||||
'title': 'Meinung - Kommentare und Themen der Woche',
|
||||
'description': 'md5:2901bbd65cd2d45e116d399a099ce5d5',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
},
|
||||
# Podcast feed with no description
|
||||
{
|
||||
'url': 'https://www.deutschlandfunk.de/podcast-tolle-idee-100.html',
|
||||
'info_dict': {
|
||||
'id': 'podcast-tolle-idee-100',
|
||||
'title': 'Wissenschaftspodcast - Tolle Idee! - Was wurde daraus?',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'description': self._html_search_meta(
|
||||
['description', 'og:description', 'twitter:description'], webpage, default=None),
|
||||
'title': self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage, default=None),
|
||||
'entries': map(self._parse_button_attrs, re.findall(self._BUTTON_REGEX, webpage)),
|
||||
}
|
@ -0,0 +1,254 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
urlhandle_detect_ext,
|
||||
)
|
||||
|
||||
|
||||
class GlobalPlayerBaseIE(InfoExtractor):
|
||||
def _get_page_props(self, url, video_id):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self._search_nextjs_data(webpage, video_id)['props']['pageProps']
|
||||
|
||||
def _request_ext(self, url, video_id):
|
||||
return urlhandle_detect_ext(self._request_webpage( # Server rejects HEAD requests
|
||||
url, video_id, note='Determining source extension'))
|
||||
|
||||
def _extract_audio(self, episode, series):
|
||||
return {
|
||||
'vcodec': 'none',
|
||||
**traverse_obj(series, {
|
||||
'series': 'title',
|
||||
'series_id': 'id',
|
||||
'thumbnail': 'imageUrl',
|
||||
'uploader': 'itunesAuthor', # podcasts only
|
||||
}),
|
||||
**traverse_obj(episode, {
|
||||
'id': 'id',
|
||||
'description': ('description', {clean_html}),
|
||||
'duration': ('duration', {parse_duration}),
|
||||
'thumbnail': 'imageUrl',
|
||||
'url': 'streamUrl',
|
||||
'timestamp': (('pubDate', 'startDate'), {unified_timestamp}),
|
||||
'title': 'title',
|
||||
}, get_all=False)
|
||||
}
|
||||
|
||||
|
||||
class GlobalPlayerLiveIE(GlobalPlayerBaseIE):
|
||||
_VALID_URL = r'https?://www\.globalplayer\.com/live/(?P<id>\w+)/\w+'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.globalplayer.com/live/smoothchill/uk/',
|
||||
'info_dict': {
|
||||
'id': '2mx1E',
|
||||
'ext': 'aac',
|
||||
'display_id': 'smoothchill-uk',
|
||||
'title': 're:^Smooth Chill.+$',
|
||||
'thumbnail': 'https://herald.musicradio.com/media/f296ade8-50c9-4f60-911f-924e96873620.png',
|
||||
'description': 'Music To Chill To',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
}, {
|
||||
# national station
|
||||
'url': 'https://www.globalplayer.com/live/heart/uk/',
|
||||
'info_dict': {
|
||||
'id': '2mwx4',
|
||||
'ext': 'aac',
|
||||
'description': 'turn up the feel good!',
|
||||
'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
|
||||
'live_status': 'is_live',
|
||||
'title': 're:^Heart UK.+$',
|
||||
'display_id': 'heart-uk',
|
||||
},
|
||||
}, {
|
||||
# regional variation
|
||||
'url': 'https://www.globalplayer.com/live/heart/london/',
|
||||
'info_dict': {
|
||||
'id': 'AMqg',
|
||||
'ext': 'aac',
|
||||
'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
|
||||
'title': 're:^Heart London.+$',
|
||||
'live_status': 'is_live',
|
||||
'display_id': 'heart-london',
|
||||
'description': 'turn up the feel good!',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
station = self._get_page_props(url, video_id)['station']
|
||||
stream_url = station['streamUrl']
|
||||
|
||||
return {
|
||||
'id': station['id'],
|
||||
'display_id': join_nonempty('brandSlug', 'slug', from_dict=station) or station.get('legacyStationPrefix'),
|
||||
'url': stream_url,
|
||||
'ext': self._request_ext(stream_url, video_id),
|
||||
'vcodec': 'none',
|
||||
'is_live': True,
|
||||
**traverse_obj(station, {
|
||||
'title': (('name', 'brandName'), {str_or_none}),
|
||||
'description': 'tagline',
|
||||
'thumbnail': 'brandLogo',
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
|
||||
class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
|
||||
_VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
# "live playlist"
|
||||
'url': 'https://www.globalplayer.com/playlists/8bLk/',
|
||||
'info_dict': {
|
||||
'id': '8bLk',
|
||||
'ext': 'aac',
|
||||
'live_status': 'is_live',
|
||||
'description': 'md5:e10f5e10b01a7f2c14ba815509fbb38d',
|
||||
'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=',
|
||||
'title': 're:^Classic FM Hall of Fame.+$'
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
station = self._get_page_props(url, video_id)['playlistData']
|
||||
stream_url = station['streamUrl']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': stream_url,
|
||||
'ext': self._request_ext(stream_url, video_id),
|
||||
'vcodec': 'none',
|
||||
'is_live': True,
|
||||
**traverse_obj(station, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'thumbnail': 'image',
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
|
||||
_VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)/|catchup/\w+/\w+/)(?P<id>\w+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
# podcast
|
||||
'url': 'https://www.globalplayer.com/podcasts/42KuaM/',
|
||||
'playlist_mincount': 5,
|
||||
'info_dict': {
|
||||
'id': '42KuaM',
|
||||
'title': 'Filthy Ritual',
|
||||
'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
|
||||
'categories': ['Society & Culture', 'True Crime'],
|
||||
'uploader': 'Global',
|
||||
'description': 'md5:da5b918eac9ae319454a10a563afacf9',
|
||||
},
|
||||
}, {
|
||||
# radio catchup
|
||||
'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/',
|
||||
'playlist_mincount': 3,
|
||||
'info_dict': {
|
||||
'id': '46vyD7z',
|
||||
'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
|
||||
'title': 'Nick Ferrari',
|
||||
'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
|
||||
props = self._get_page_props(url, video_id)
|
||||
series = props['podcastInfo'] if podcast else props['catchupInfo']
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': video_id,
|
||||
'entries': [self._extract_audio(ep, series) for ep in traverse_obj(
|
||||
series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))],
|
||||
'categories': traverse_obj(series, ('categories', ..., 'name')) or None,
|
||||
**traverse_obj(series, {
|
||||
'description': 'description',
|
||||
'thumbnail': 'imageUrl',
|
||||
'title': 'title',
|
||||
'uploader': 'itunesAuthor', # podcasts only
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE):
|
||||
_VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)|catchup/\w+/\w+)/episodes/(?P<id>\w+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
# podcast
|
||||
'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/',
|
||||
'info_dict': {
|
||||
'id': '7DrfNnE',
|
||||
'ext': 'mp3',
|
||||
'title': 'Filthy Ritual - Trailer',
|
||||
'description': 'md5:1f1562fd0f01b4773b590984f94223e0',
|
||||
'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
|
||||
'duration': 225.0,
|
||||
'timestamp': 1681254900,
|
||||
'series': 'Filthy Ritual',
|
||||
'series_id': '42KuaM',
|
||||
'upload_date': '20230411',
|
||||
'uploader': 'Global',
|
||||
},
|
||||
}, {
|
||||
# radio catchup
|
||||
'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/',
|
||||
'info_dict': {
|
||||
'id': '2zGq26Vcv1fCWhddC4JAwETXWe',
|
||||
'ext': 'm4a',
|
||||
'timestamp': 1682056800,
|
||||
'series': 'Nick Ferrari',
|
||||
'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
|
||||
'upload_date': '20230421',
|
||||
'series_id': '46vyD7z',
|
||||
'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
|
||||
'title': 'Nick Ferrari',
|
||||
'duration': 10800.0,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
|
||||
props = self._get_page_props(url, video_id)
|
||||
episode = props['podcastEpisode'] if podcast else props['catchupEpisode']
|
||||
|
||||
return self._extract_audio(
|
||||
episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {})
|
||||
|
||||
|
||||
class GlobalPlayerVideoIE(GlobalPlayerBaseIE):
|
||||
_VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/',
|
||||
'info_dict': {
|
||||
'id': '2JsSZ7Gm2uP',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd',
|
||||
'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550',
|
||||
'upload_date': '20230420',
|
||||
'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
meta = self._get_page_props(url, video_id)['videoData']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
**traverse_obj(meta, {
|
||||
'url': 'url',
|
||||
'thumbnail': ('image', 'url'),
|
||||
'title': 'title',
|
||||
'upload_date': ('publish_date', {unified_strdate}),
|
||||
'description': 'description',
|
||||
}),
|
||||
}
|
@ -0,0 +1,83 @@
|
||||
from .common import InfoExtractor
|
||||
from .dailymotion import DailymotionIE
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class GMANetworkVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www)\.gmanetwork\.com/(?:\w+/){3}(?P<id>\d+)/(?P<display_id>[\w-]+)/video'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.gmanetwork.com/fullepisodes/home/running_man_philippines/168677/running-man-philippines-catch-the-thief-full-chapter-2/video?section=home',
|
||||
'info_dict': {
|
||||
'id': '28BqW0AXPe0',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20220919',
|
||||
'uploader_url': 'http://www.youtube.com/channel/UChsoPNR5x-wdSO2GrOSIWqQ',
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'uploader': 'YoüLOL',
|
||||
'channel_id': 'UChsoPNR5x-wdSO2GrOSIWqQ',
|
||||
'duration': 5313,
|
||||
'comment_count': int,
|
||||
'tags': 'count:22',
|
||||
'uploader_id': 'UChsoPNR5x-wdSO2GrOSIWqQ',
|
||||
'title': 'Running Man Philippines: Catch the Thief (FULL CHAPTER 2)',
|
||||
'channel_url': 'https://www.youtube.com/channel/UChsoPNR5x-wdSO2GrOSIWqQ',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/28BqW0AXPe0/maxresdefault.jpg',
|
||||
'release_timestamp': 1663594212,
|
||||
'age_limit': 0,
|
||||
'channel_follower_count': int,
|
||||
'categories': ['Entertainment'],
|
||||
'description': 'md5:811bdcea74f9c48051824e494756e926',
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'channel': 'YoüLOL',
|
||||
'availability': 'public',
|
||||
'release_date': '20220919',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.gmanetwork.com/fullepisodes/home/more_than_words/87059/more-than-words-full-episode-80/video?section=home',
|
||||
'info_dict': {
|
||||
'id': 'yiDOExw2aSA',
|
||||
'ext': 'mp4',
|
||||
'live_status': 'not_live',
|
||||
'channel': 'GMANetwork',
|
||||
'like_count': int,
|
||||
'channel_follower_count': int,
|
||||
'description': 'md5:6d00cd658394fa1a5071200d3ed4be05',
|
||||
'duration': 1419,
|
||||
'age_limit': 0,
|
||||
'comment_count': int,
|
||||
'upload_date': '20181003',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/yiDOExw2aSA/maxresdefault.webp',
|
||||
'availability': 'public',
|
||||
'playable_in_embed': True,
|
||||
'channel_id': 'UCKL5hAuzgFQsyrsQKgU0Qng',
|
||||
'title': 'More Than Words: Full Episode 80 (Finale)',
|
||||
'uploader_id': 'GMANETWORK',
|
||||
'categories': ['Entertainment'],
|
||||
'uploader': 'GMANetwork',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCKL5hAuzgFQsyrsQKgU0Qng',
|
||||
'tags': 'count:29',
|
||||
'view_count': int,
|
||||
'uploader_url': 'http://www.youtube.com/user/GMANETWORK',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
content_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
# webpage route
|
||||
youtube_id = self._search_regex(
|
||||
r'var\s*YOUTUBE_VIDEO\s*=\s*[\'"]+(?P<yt_id>[\w-]+)', webpage, 'youtube_id', fatal=False)
|
||||
if youtube_id:
|
||||
return self.url_result(youtube_id, YoutubeIE, youtube_id)
|
||||
|
||||
# api call route
|
||||
# more info at https://aphrodite.gmanetwork.com/fullepisodes/assets/fullepisodes/js/dist/fullepisodes_video.js?v=1.1.11
|
||||
network_url = self._search_regex(
|
||||
r'NETWORK_URL\s*=\s*[\'"](?P<url>[^\'"]+)', webpage, 'network_url')
|
||||
json_data = self._download_json(f'{network_url}api/data/content/video/{content_id}', display_id)
|
||||
if json_data.get('video_file'):
|
||||
return self.url_result(json_data['video_file'], YoutubeIE, json_data['video_file'])
|
||||
else:
|
||||
return self.url_result(json_data['dailymotion_file'], DailymotionIE, json_data['dailymotion_file'])
|
@ -1,37 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class HentaiStigmaIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://hentai\.animestigma\.com/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://hentai.animestigma.com/inyouchuu-etsu-bonus/',
|
||||
'md5': '4e3d07422a68a4cc363d8f57c8bf0d23',
|
||||
'info_dict': {
|
||||
'id': 'inyouchuu-etsu-bonus',
|
||||
'ext': 'mp4',
|
||||
'title': 'Inyouchuu Etsu Bonus',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2[^>]+class="posttitle"[^>]*><a[^>]*>([^<]+)</a>',
|
||||
webpage, 'title')
|
||||
wrap_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src="([^"]+mp4)"', webpage, 'wrapper url')
|
||||
wrap_webpage = self._download_webpage(wrap_url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'age_limit': 18,
|
||||
}
|
@ -0,0 +1,72 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .jwplatform import JWPlatformIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_class,
|
||||
)
|
||||
|
||||
|
||||
class HollywoodReporterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hollywoodreporter\.com/video/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hollywoodreporter.com/video/chris-pine-michelle-rodriguez-dungeons-dragons-cast-directors-on-what-it-took-to-make-film-sxsw-2023/',
|
||||
'info_dict': {
|
||||
'id': 'zH4jZaR5',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:a9a1c073770a32f178955997712c4bd9',
|
||||
'description': 'The cast and directors of \'Dungeons & Dragons: Honor Among Thieves\' talk about their new film.',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/zH4jZaR5/poster.jpg?width=720',
|
||||
'upload_date': '20230312',
|
||||
'timestamp': 1678586423,
|
||||
'duration': 242.0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
data = extract_attributes(get_element_html_by_class('vlanding-video-card__link', webpage) or '')
|
||||
video_id = data['data-video-showcase-trigger']
|
||||
showcase_type = data['data-video-showcase-type']
|
||||
|
||||
if showcase_type == 'jwplayer':
|
||||
return self.url_result(f'jwplatform:{video_id}', JWPlatformIE)
|
||||
elif showcase_type == 'youtube':
|
||||
return self.url_result(video_id, 'Youtube')
|
||||
else:
|
||||
raise ExtractorError(f'Unsupported showcase type "{showcase_type}"')
|
||||
|
||||
|
||||
class HollywoodReporterPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hollywoodreporter\.com/vcategory/(?P<slug>[\w-]+)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hollywoodreporter.com/vcategory/heat-vision-breakdown-57822/',
|
||||
'playlist_mincount': 109,
|
||||
'info_dict': {
|
||||
'id': '57822',
|
||||
'title': 'heat-vision-breakdown',
|
||||
}
|
||||
}]
|
||||
|
||||
def _fetch_page(self, slug, pl_id, page):
|
||||
page += 1
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.hollywoodreporter.com/vcategory/{slug}-{pl_id}/page/{page}/',
|
||||
pl_id, note=f'Downloading playlist page {page}')
|
||||
section = get_element_by_class('video-playlist-river', webpage) or ''
|
||||
|
||||
for url in re.findall(r'<a[^>]+href="([^"]+)"[^>]+class="c-title__link', section):
|
||||
yield self.url_result(url, HollywoodReporterIE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, pl_id = self._match_valid_url(url).group('slug', 'id')
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(functools.partial(self._fetch_page, slug, pl_id), 15), pl_id, slug)
|
@ -0,0 +1,15 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class HrefLiRedirectIE(InfoExtractor):
|
||||
IE_NAME = 'href.li'
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'https?://href\.li/\?(?P<url>.+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://href.li/?https://www.reddit.com/r/cats/comments/12bluel/my_cat_helps_me_with_water/?utm_source=share&utm_medium=android_app&utm_name=androidcss&utm_term=1&utm_content=share_button',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(self._match_valid_url(url).group('url'))
|
@ -0,0 +1,47 @@
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class PGATourIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pgatour\.com/video/[\w-]+/(?P<tc>T)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pgatour.com/video/competition/T6322447785112/adam-hadwin-2023-the-players-round-4-18th-hole-shot-1',
|
||||
'info_dict': {
|
||||
'id': '6322447785112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Adam Hadwin | 2023 THE PLAYERS | Round 4 | 18th hole | Shot 1',
|
||||
'uploader_id': '6116716431001',
|
||||
'upload_date': '20230312',
|
||||
'timestamp': 1678653136,
|
||||
'duration': 20.011,
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'tags': 'count:7',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.pgatour.com/video/features/6322506425112/follow-the-players-trophy-on-championship-sunday',
|
||||
'info_dict': {
|
||||
'id': '6322506425112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Follow THE PLAYERS trophy on Championship Sunday',
|
||||
'description': 'md5:4d29e4bdfa03694a0ebfd08950398568',
|
||||
'uploader_id': '6082840763001',
|
||||
'upload_date': '20230313',
|
||||
'timestamp': 1678739835,
|
||||
'duration': 123.435,
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'tags': 'count:8',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, is_tourcast = self._match_valid_url(url).group('id', 'tc')
|
||||
|
||||
# From https://www.pgatour.com/_next/static/chunks/pages/_app-8bcf849560daf38d.js
|
||||
account_id = '6116716431001' if is_tourcast else '6082840763001'
|
||||
player_id = 'Vsd5Umu8r' if is_tourcast else 'FWIBYMBPj'
|
||||
|
||||
return self.url_result(
|
||||
f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}',
|
||||
BrightcoveNewIE)
|
@ -1,42 +1,60 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, urljoin
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
get_element_by_class,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class PornezIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornez\.net/video(?P<id>[0-9]+)/'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?pornez\.net/(?:video(?P<id>\w+)|watch)/'
|
||||
_TESTS = [{
|
||||
'url': 'https://pornez.net/video344819/mistresst-funny_penis_names-wmv/',
|
||||
'md5': '2e19a0a1cff3a5dbea0ef1b9e80bcbbc',
|
||||
'info_dict': {
|
||||
'id': '344819',
|
||||
'ext': 'mp4',
|
||||
'title': r'mistresst funny_penis_names wmv',
|
||||
'title': 'mistresst funny_penis_names wmv',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://pornez.net/watch/leana+lovings+stiff+for+stepdaughter/',
|
||||
'info_dict': {
|
||||
'id': '156161',
|
||||
'ext': 'mp4',
|
||||
'title': 'Watch leana lovings stiff for stepdaughter porn video.',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://pornez.net/videovzs27fj/tutor4k-e14-blue-wave-1080p-nbq-tutor4k-e14-blue-wave/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
iframe_src = self._html_search_regex(
|
||||
r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe', fatal=True)
|
||||
iframe_src = urljoin('https://pornez.net', iframe_src)
|
||||
title = self._html_search_meta(['name', 'twitter:title', 'og:title'], webpage, 'title', default=None)
|
||||
if title is None:
|
||||
title = self._search_regex(r'<h1>(.*?)</h1>', webpage, 'title', fatal=True)
|
||||
thumbnail = self._html_search_meta(['thumbnailUrl'], webpage, 'title', default=None)
|
||||
webpage = self._download_webpage(iframe_src, video_id)
|
||||
entries = self._parse_html5_media_entries(iframe_src, webpage, video_id)[0]
|
||||
for format in entries['formats']:
|
||||
height = self._search_regex(r'_(\d+)\.m3u8', format['url'], 'height')
|
||||
format['format_id'] = '%sp' % height
|
||||
format['height'] = int_or_none(height)
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
r'<link[^>]+\bhref=["\']https?://pornez.net/\?p=(\w+)["\']', webpage, 'id')
|
||||
|
||||
iframe_src = self._html_search_regex(r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe')
|
||||
iframe = self._download_webpage(urljoin('https://pornez.net', iframe_src), video_id)
|
||||
|
||||
entries = self._parse_html5_media_entries(iframe_src, iframe, video_id)[0]
|
||||
for fmt in entries['formats']:
|
||||
height = self._search_regex(r'_(\d+)\.m3u8', fmt['url'], 'height')
|
||||
fmt['format_id'] = '%sp' % height
|
||||
fmt['height'] = int_or_none(height)
|
||||
|
||||
entries.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': 18
|
||||
'title': (clean_html(get_element_by_class('video-title', webpage))
|
||||
or self._html_search_meta(
|
||||
['twitter:title', 'og:title', 'description'], webpage, 'title', default=None)),
|
||||
'thumbnail': self._html_search_meta(['thumbnailUrl'], webpage, 'thumb', default=None),
|
||||
'age_limit': 18,
|
||||
})
|
||||
return entries
|
||||
|
@ -0,0 +1,285 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor, ExtractorError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
traverse_obj,
|
||||
urljoin,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class RTVCPlayBaseIE(InfoExtractor):
|
||||
_BASE_VALID_URL = r'https?://(?:www\.)?rtvcplay\.co'
|
||||
|
||||
def _extract_player_config(self, webpage, video_id):
|
||||
return self._search_json(
|
||||
r'<script\b[^>]*>[^<]*(?:var|let|const)\s+config\s*=', re.sub(r'"\s*\+\s*"', '', webpage),
|
||||
'player_config', video_id, transform_source=js_to_json)
|
||||
|
||||
def _extract_formats_and_subtitles_player_config(self, player_config, video_id):
|
||||
formats, subtitles = [], {}
|
||||
for source in traverse_obj(player_config, ('sources', ..., lambda _, v: url_or_none(v['url']))):
|
||||
ext = mimetype2ext(source.get('mimetype'), default=determine_ext(source['url']))
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
source['url'], video_id, 'mp4', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'url': source['url'],
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
|
||||
class RTVCPlayIE(RTVCPlayBaseIE):
|
||||
_VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/(?P<category>(?!embed)[^/]+)/(?:[^?#]+/)?(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rtvcplay.co/en-vivo/canal-institucional',
|
||||
'info_dict': {
|
||||
'id': 'canal-institucional',
|
||||
'title': r're:^Canal Institucional',
|
||||
'description': 'md5:eff9e548394175928059320c006031ea',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'live_status': 'is_live',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtvcplay.co/en-vivo/senal-colombia',
|
||||
'info_dict': {
|
||||
'id': 'senal-colombia',
|
||||
'title': r're:^Señal Colombia',
|
||||
'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'live_status': 'is_live',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtvcplay.co/en-vivo/radio-nacional',
|
||||
'info_dict': {
|
||||
'id': 'radio-nacional',
|
||||
'title': r're:^Radio Nacional',
|
||||
'description': 'md5:5de009bc6a9fa79d2a6cf0b73f977d53',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'live_status': 'is_live',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtvcplay.co/peliculas-ficcion/senoritas',
|
||||
'md5': '1288ee6f6d1330d880f98bff2ed710a3',
|
||||
'info_dict': {
|
||||
'id': 'senoritas',
|
||||
'title': 'Señoritas',
|
||||
'description': 'md5:f095a2bb52cb6cf279daf6302f86fb32',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa/james-regresa-clases-28022022',
|
||||
'md5': 'f040a7380a269ad633cf837384d5e9fc',
|
||||
'info_dict': {
|
||||
'id': 'james-regresa-clases-28022022',
|
||||
'title': 'James regresa a clases - 28/02/2022',
|
||||
'description': 'md5:c5dcdf757c7ab29305e8763c6007e675',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtvcplay.co/peliculas-documentales/llinas-el-cerebro-y-el-universo',
|
||||
'info_dict': {
|
||||
'id': 'llinas-el-cerebro-y-el-universo',
|
||||
'title': 'Llinás, el cerebro y el universo',
|
||||
'description': 'md5:add875bf2309bb52b3e8b9b06116d9b0',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa',
|
||||
'info_dict': {
|
||||
'id': 'profe-en-tu-casa',
|
||||
'title': 'Profe en tu casa',
|
||||
'description': 'md5:47dbe20e263194413b1db2a2805a4f2e',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'playlist_mincount': 537,
|
||||
}, {
|
||||
'url': 'https://www.rtvcplay.co/series-al-oido/relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura',
|
||||
'info_dict': {
|
||||
'id': 'relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura',
|
||||
'title': 'Relato de un náufrago: una travesía del periodismo a la literatura',
|
||||
'description': 'md5:6da28fdca4a5a568ea47ef65ef775603',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'https://www.rtvcplay.co/series-al-oido/diez-versiones',
|
||||
'info_dict': {
|
||||
'id': 'diez-versiones',
|
||||
'title': 'Diez versiones',
|
||||
'description': 'md5:997471ed971cb3fd8e41969457675306',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, category = self._match_valid_url(url).group('id', 'category')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
hydration = self._search_json(
|
||||
r'window\.__RTVCPLAY_STATE__\s*=', webpage, 'hydration',
|
||||
video_id, transform_source=js_to_json)['content']['currentContent']
|
||||
|
||||
asset_id = traverse_obj(hydration, ('video', 'assetid'))
|
||||
if asset_id:
|
||||
hls_url = hydration['base_url_hls'].replace('[node:field_asset_id]', asset_id)
|
||||
else:
|
||||
hls_url = traverse_obj(hydration, ('channel', 'hls'))
|
||||
|
||||
metadata = traverse_obj(hydration, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'thumbnail': ((('channel', 'image', 'logo'), ('resource', 'image', 'cover_desktop')), 'path'),
|
||||
}, get_all=False)
|
||||
|
||||
# Probably it's a program's page
|
||||
if not hls_url:
|
||||
seasons = traverse_obj(
|
||||
hydration, ('widgets', lambda _, y: y['type'] == 'seasonList', 'contents'),
|
||||
get_all=False)
|
||||
if not seasons:
|
||||
podcast_episodes = hydration.get('audios')
|
||||
if not podcast_episodes:
|
||||
raise ExtractorError('Could not find asset_id nor program playlist nor podcast episodes')
|
||||
|
||||
return self.playlist_result([
|
||||
self.url_result(episode['file'], url_transparent=True, **traverse_obj(episode, {
|
||||
'title': 'title',
|
||||
'description': ('description', {clean_html}),
|
||||
'episode_number': ('chapter_number', {float_or_none}, {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
})) for episode in podcast_episodes], video_id, **metadata)
|
||||
|
||||
entries = [self.url_result(
|
||||
urljoin(url, episode['slug']), url_transparent=True,
|
||||
**traverse_obj(season, {
|
||||
'season': 'title',
|
||||
'season_number': ('season', {int_or_none}),
|
||||
}), **traverse_obj(episode, {
|
||||
'title': 'title',
|
||||
'thumbnail': ('image', 'cover', 'path'),
|
||||
'episode_number': ('chapter_number', {int_or_none}),
|
||||
})) for season in seasons for episode in traverse_obj(season, ('contents', ...))]
|
||||
|
||||
return self.playlist_result(entries, video_id, **metadata)
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(hls_url, video_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': category == 'en-vivo',
|
||||
**metadata,
|
||||
}
|
||||
|
||||
|
||||
class RTVCPlayEmbedIE(RTVCPlayBaseIE):
|
||||
_VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/embed/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rtvcplay.co/embed/72b0e699-248b-4929-a4a8-3782702fa7f9',
|
||||
'md5': 'ed529aeaee7aa2a72afe91ac7d1177a8',
|
||||
'info_dict': {
|
||||
'id': '72b0e699-248b-4929-a4a8-3782702fa7f9',
|
||||
'title': 'Tráiler: Señoritas',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'ext': 'mp4',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_config = self._extract_player_config(webpage, video_id)
|
||||
formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id)
|
||||
|
||||
asset_id = traverse_obj(player_config, ('rtvcplay', 'assetid'))
|
||||
metadata = {} if not asset_id else self._download_json(
|
||||
f'https://cms.rtvcplay.co/api/v1/video/asset-id/{asset_id}', video_id, fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'thumbnail': ('image', ..., 'thumbnail', 'path'),
|
||||
}, get_all=False)
|
||||
}
|
||||
|
||||
|
||||
class RTVCKalturaIE(RTVCPlayBaseIE):
|
||||
_VALID_URL = r'https?://media\.rtvc\.gov\.co/kalturartvc/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://media.rtvc.gov.co/kalturartvc/indexSC.html',
|
||||
'info_dict': {
|
||||
'id': 'indexSC',
|
||||
'title': r're:^Señal Colombia',
|
||||
'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'live_status': 'is_live',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_config = self._extract_player_config(webpage, video_id)
|
||||
formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id)
|
||||
|
||||
channel_id = traverse_obj(player_config, ('rtvcplay', 'channelId'))
|
||||
metadata = {} if not channel_id else self._download_json(
|
||||
f'https://cms.rtvcplay.co/api/v1/taxonomy_term/streaming/{channel_id}', video_id, fatal=False)
|
||||
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
traverse_obj(metadata, ('channel', 'hls')), video_id, 'mp4', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': True,
|
||||
**traverse_obj(metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'thumbnail': ('channel', 'image', 'logo', 'path'),
|
||||
})
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
from .common import InfoExtractor
|
||||
from .rtvcplay import RTVCKalturaIE
|
||||
|
||||
|
||||
class SenalColombiaLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?senalcolombia\.tv/(?P<id>senal-en-vivo)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.senalcolombia.tv/senal-en-vivo',
|
||||
'info_dict': {
|
||||
'id': 'indexSC',
|
||||
'title': 're:^Señal Colombia',
|
||||
'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'live_status': 'is_live',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
hydration = self._search_json(
|
||||
r'<script\b[^>]*data-drupal-selector\s*=\s*"[^"]*drupal-settings-json[^"]*"[^>]*>',
|
||||
webpage, 'hydration', display_id)
|
||||
|
||||
return self.url_result(hydration['envivosrc'], RTVCKalturaIE, display_id)
|
@ -0,0 +1,108 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import clean_html, float_or_none, get_element_by_class, js_to_json, traverse_obj
|
||||
|
||||
|
||||
class WeVidiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?wevidi\.net/watch/(?P<id>[\w-]{11})'
|
||||
_TESTS = [{
|
||||
'url': 'https://wevidi.net/watch/2th7UO5F4KV',
|
||||
'md5': 'b913d1ff5bbad499e2c7ef4aa6d829d7',
|
||||
'info_dict': {
|
||||
'id': '2th7UO5F4KV',
|
||||
'ext': 'mp4',
|
||||
'title': 'YouTube Alternative: WeVidi - customizable channels & more',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:73a27d0a87d49fbcc5584566326ebeed',
|
||||
'uploader': 'eclecRC',
|
||||
'duration': 932.098,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://wevidi.net/watch/ievRuuQHbPS',
|
||||
'md5': 'ce8a94989a959bff9003fa27ee572935',
|
||||
'info_dict': {
|
||||
'id': 'ievRuuQHbPS',
|
||||
'ext': 'mp4',
|
||||
'title': 'WeVidi Playlists',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:32cdfca272687390d9bd9b0c9c6153ee',
|
||||
'uploader': 'WeVidi',
|
||||
'duration': 36.1999,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://wevidi.net/watch/PcMzDWaQSWb',
|
||||
'md5': '55ee0d3434be5d9e5cc76b83f2bb57ec',
|
||||
'info_dict': {
|
||||
'id': 'PcMzDWaQSWb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cat blep',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:e2c9e2b54b8bb424cc64937c8fdc068f',
|
||||
'uploader': 'WeVidi',
|
||||
'duration': 41.972,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://wevidi.net/watch/wJnRqDHNe_u',
|
||||
'md5': 'c8f263dd47e66cc17546b3abf47b5a77',
|
||||
'info_dict': {
|
||||
'id': 'wJnRqDHNe_u',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gissy Talks: YouTube Alternatives',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:e65036f0d4af80e0af191bd11af5195e',
|
||||
'uploader': 'GissyEva',
|
||||
'duration': 630.451,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://wevidi.net/watch/4m1c4yJR_yc',
|
||||
'md5': 'c63ce5ca6990dce86855fc02ca5bc1ed',
|
||||
'info_dict': {
|
||||
'id': '4m1c4yJR_yc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Enough of that! - Awesome Exilez Podcast',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:96af99dd63468b2dfab3020560e3e9b2',
|
||||
'uploader': 'eclecRC',
|
||||
'duration': 6.804,
|
||||
}
|
||||
}]
|
||||
|
||||
def _extract_formats(self, wvplayer_props):
|
||||
# Taken from WeVidi player JS: https://wevidi.net/layouts/default/static/player.min.js
|
||||
resolution_map = {
|
||||
1: 144,
|
||||
2: 240,
|
||||
3: 360,
|
||||
4: 480,
|
||||
5: 720,
|
||||
6: 1080
|
||||
}
|
||||
|
||||
src_path = f'{wvplayer_props["srcVID"]}/{wvplayer_props["srcUID"]}/{wvplayer_props["srcNAME"]}'
|
||||
for res in traverse_obj(wvplayer_props, ('resolutions', ..., {int}, {lambda x: x or None})):
|
||||
format_id = str(-(res // -2) - 1)
|
||||
yield {
|
||||
'acodec': 'mp4a.40.2',
|
||||
'ext': 'mp4',
|
||||
'format_id': format_id,
|
||||
'height': resolution_map.get(res),
|
||||
'url': f'https://www.wevidi.net/videoplayback/{src_path}/{format_id}',
|
||||
'vcodec': 'avc1.42E01E',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
wvplayer_props = self._search_json(
|
||||
r'WVPlayer\(', webpage, 'player', video_id,
|
||||
transform_source=lambda x: js_to_json(x.replace('||', '}')))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': clean_html(get_element_by_class('video_title', webpage)),
|
||||
'description': clean_html(get_element_by_class('descr_long', webpage)),
|
||||
'uploader': clean_html(get_element_by_class('username', webpage)),
|
||||
'formats': list(self._extract_formats(wvplayer_props)),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'duration': float_or_none(wvplayer_props.get('duration')),
|
||||
}
|
@ -0,0 +1,50 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class WhypIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?whyp\.it/tracks/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.whyp.it/tracks/18337/home-page-example-track-b4kq7',
|
||||
'md5': 'c1187b42ebf8605284e3dc92aeb33d16',
|
||||
'info_dict': {
|
||||
'url': 'https://cdn.whyp.it/50eb17cc-e9ff-4e18-b89b-dc9206a95cb1.mp3',
|
||||
'id': '18337',
|
||||
'title': 'Home Page Example Track',
|
||||
'description': 'md5:bd758000fb93f3159339c852b5b9133c',
|
||||
'ext': 'mp3',
|
||||
'duration': 52.82,
|
||||
'uploader': 'Brad',
|
||||
'uploader_id': '1',
|
||||
'thumbnail': 'https://cdn.whyp.it/a537bb36-3373-4c61-96c8-27fc1b2f427a.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.whyp.it/tracks/18337',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
unique_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, unique_id)
|
||||
data = self._search_nuxt_data(webpage, unique_id)['rawTrack']
|
||||
|
||||
return {
|
||||
'url': data['audio_url'],
|
||||
'id': unique_id,
|
||||
**traverse_obj(data, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'uploader': ('user', 'username'),
|
||||
'uploader_id': ('user', 'id', {str_or_none}),
|
||||
'thumbnail': ('artwork_url', {url_or_none}),
|
||||
}),
|
||||
'ext': 'mp3',
|
||||
'vcodec': 'none',
|
||||
'http_headers': {'Referer': 'https://whyp.it/'},
|
||||
}
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue