diff --git a/test/test_subtitles.py b/test/test_subtitles.py index efd69b33d9..53e0b4eaf8 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -14,7 +14,6 @@ from yt_dlp.extractor import ( NRKTVIE, PBSIE, CeskaTelevizeIE, - ComedyCentralIE, DailymotionIE, DemocracynowIE, LyndaIE, @@ -279,23 +278,6 @@ class TestNPOSubtitles(BaseTestSubtitles): self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4') -@is_download_test -@unittest.skip('IE broken') -class TestMTVSubtitles(BaseTestSubtitles): - url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans' - IE = ComedyCentralIE - - def getInfoDict(self): - return super().getInfoDict()['entries'][0] - - def test_allsubtitles(self): - self.DL.params['writesubtitles'] = True - self.DL.params['allsubtitles'] = True - subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), {'en'}) - self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961') - - @is_download_test class TestNRKSubtitles(BaseTestSubtitles): url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1' diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index bb595f924b..504bdbed8b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -411,10 +411,7 @@ from .cnn import ( CNNIE, CNNIndonesiaIE, ) -from .comedycentral import ( - ComedyCentralIE, - ComedyCentralTVIE, -) +from .comedycentral import ComedyCentralIE from .commonmistakes import ( BlobIE, CommonMistakesIE, @@ -1187,15 +1184,7 @@ from .moview import MoviewPlayIE from .moviezine import MoviezineIE from .movingimage import MovingImageIE from .msn import MSNIE -from .mtv import ( - MTVDEIE, - MTVIE, - MTVItaliaIE, - MTVItaliaProgrammaIE, - MTVJapanIE, - MTVServicesEmbeddedIE, - MTVVideoIE, -) +from .mtv import MTVIE from .muenchentv import MuenchenTVIE from .murrtube import ( MurrtubeIE, diff --git a/yt_dlp/extractor/bet.py b/yt_dlp/extractor/bet.py index 3a8e743092..d8fc47f7b3 100644 --- a/yt_dlp/extractor/bet.py +++ b/yt_dlp/extractor/bet.py @@ -1,79 +1,47 @@ -from .mtv import MTVServicesInfoExtractor -from ..utils import unified_strdate - - -class BetIE(MTVServicesInfoExtractor): - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P.+?)\.html' - _TESTS = [ - { - 'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html', - 'info_dict': { - 'id': '07e96bd3-8850-3051-b856-271b457f0ab8', - 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism', - 'ext': 'flv', - 'title': 'A Conversation With President Obama', - 'description': 'President Obama urges persistence in confronting racism and bias.', - 'duration': 1534, - 'upload_date': '20141208', - 'thumbnail': r're:(?i)^https?://.*\.jpg$', - 'subtitles': { - 'en': 'mincount:2', - }, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, +from .mtv import MTVServicesBaseIE + + +class BetIE(MTVServicesBaseIE): + _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:video-clips|episodes)/(?P[\da-z]{6})' + _TESTS = [{ + 'url': 'https://www.bet.com/video-clips/w9mk7v', + 'info_dict': { + 'id': '3022d121-d191-43fd-b5fb-b2c26f335497', + 'ext': 'mp4', + 'display_id': 'w9mk7v', + 'title': 'New Normal', + 'description': 'md5:d7898c124713b4646cecad9d16ff01f3', + 'duration': 30.08, + 'series': 'Tyler Perry\'s Sistas', + 'season': 'Season 0', + 'season_number': 0, + 'episode': 'Episode 0', + 'episode_number': 0, + 'timestamp': 1755269073, + 'upload_date': '20250815', }, - { - 'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html', - 'info_dict': { - 'id': '9f516bf1-7543-39c4-8076-dd441b459ba9', - 'display_id': 'justice-for-ferguson-a-community-reacts', - 'ext': 'flv', - 'title': 'Justice for Ferguson: A Community Reacts', - 'description': 'A BET News special.', - 'duration': 1696, - 'upload_date': '20141125', - 'thumbnail': r're:(?i)^https?://.*\.jpg$', - 'subtitles': { - 'en': 'mincount:2', - }, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.bet.com/episodes/nmce72/tyler-perry-s-sistas-heavy-is-the-crown-season-9-ep-5', + 'info_dict': { + 'id': '6427562b-3029-11f0-b405-16fff45bc035', + 'ext': 'mp4', + 'display_id': 'nmce72', + 'title': 'Heavy Is the Crown', + 'description': 'md5:1ed345d3157a50572d2464afcc7a652a', + 'channel': 'BET', + 'duration': 2550.0, + 'thumbnail': r're:https://images\.paramount\.tech/uri/mgid:arc:imageassetref', + 'series': 'Tyler Perry\'s Sistas', + 'season': 'Season 9', + 'season_number': 9, + 'episode': 'Episode 5', + 'episode_number': 5, + 'timestamp': 1755165600, + 'upload_date': '20250814', + 'release_timestamp': 1755129600, + 'release_date': '20250814', }, - ] - - _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/bet-mrss-player' - - def _get_feed_query(self, uri): - return { - 'uuid': uri, - } - - def _extract_mgid(self, webpage): - return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid') - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - mgid = self._extract_mgid(webpage) - videos_info = self._get_videos_info(mgid) - - info_dict = videos_info['entries'][0] - - upload_date = unified_strdate(self._html_search_meta('date', webpage)) - description = self._html_search_meta('description', webpage) - - info_dict.update({ - 'display_id': display_id, - 'description': description, - 'upload_date': upload_date, - }) - - return info_dict + 'params': {'skip_download': 'm3u8'}, + 'skip': 'Requires provider sign-in', + }] diff --git a/yt_dlp/extractor/comedycentral.py b/yt_dlp/extractor/comedycentral.py index 27d295bb38..b9f0d66d1c 100644 --- a/yt_dlp/extractor/comedycentral.py +++ b/yt_dlp/extractor/comedycentral.py @@ -1,55 +1,27 @@ -from .mtv import MTVServicesInfoExtractor +from .mtv import MTVServicesBaseIE -class ComedyCentralIE(MTVServicesInfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?|collection-playlist|movies)/(?P[0-9a-z]{6})' - _FEED_URL = 'http://comedycentral.com/feeds/mrss/' - +class ComedyCentralIE(MTVServicesBaseIE): + _VALID_URL = r'https?://(?:www\.)?cc\.com/video-clips/(?P[\da-z]{6})' _TESTS = [{ - 'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike', - 'md5': 'b8acb347177c680ff18a292aa2166f80', + 'url': 'https://www.cc.com/video-clips/wl12cx', 'info_dict': { - 'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025', + 'id': 'dec6953e-80c8-43b3-96cd-05e9230e704d', 'ext': 'mp4', - 'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike', - 'description': 'md5:5334307c433892b85f4f5e5ac9ef7498', - 'timestamp': 1598670000, - 'upload_date': '20200829', + 'display_id': 'wl12cx', + 'title': 'Alison Brie and Dave Franco -"Together"- Extended Interview', + 'description': 'md5:ec68e38d3282f863de9cde0ce5cd231c', + 'duration': 516.76, + 'thumbnail': r're:https://images\.paramount\.tech/uri/mgid:arc:imageassetref:', + 'series': 'The Daily Show', + 'season': 'Season 30', + 'season_number': 30, + 'episode': 'Episode 0', + 'episode_number': 0, + 'timestamp': 1753973314, + 'upload_date': '20250731', + 'release_timestamp': 1753977914, + 'release_date': '20250731', }, - }, { - 'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314', - 'only_matching': True, - }, { - 'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate', - 'only_matching': True, - }, { - 'url': 'https://www.cc.com/collection-playlist/cosnej/stand-up-specials/t6vtjb', - 'only_matching': True, - }, { - 'url': 'https://www.cc.com/movies/tkp406/a-cluesterfuenke-christmas', - 'only_matching': True, + 'params': {'skip_download': 'm3u8'}, }] - - -class ComedyCentralTVIE(MTVServicesInfoExtractor): - _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P[0-9a-z]{6})' - _TESTS = [{ - 'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1', - 'info_dict': { - 'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285', - 'ext': 'mp4', - 'title': 'Josh Investigates', - 'description': 'Steht uns das Ende der Welt bevor?', - }, - }] - _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' - _GEO_COUNTRIES = ['DE'] - - def _get_feed_query(self, uri): - return { - 'accountOverride': 'intl.mtvi.com', - 'arcEp': 'web.cc.tv', - 'ep': 'b9032c3a', - 'imageEp': 'web.cc.tv', - 'mgid': uri, - } diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index 34e015dfcd..4c1051eb41 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -1,652 +1,251 @@ -import re -import xml.etree.ElementTree +import base64 +import json +import time +import urllib.parse from .common import InfoExtractor -from ..networking import HEADRequest, Request from ..utils import ( ExtractorError, - RegexNotFoundError, - find_xpath_attr, - fix_xml_ampersands, float_or_none, int_or_none, - join_nonempty, - strip_or_none, - timeconvert, - try_get, - unescapeHTML, + js_to_json, + jwt_decode_hs256, + parse_iso8601, + parse_qs, + update_url, update_url_query, - url_basename, - xpath_text, + url_or_none, ) +from ..utils.traversal import require, traverse_obj -def _media_xml_tag(tag): - return f'{{http://search.yahoo.com/mrss/}}{tag}' - - -class MTVServicesInfoExtractor(InfoExtractor): - _MOBILE_TEMPLATE = None - _LANG = None +class MTVServicesBaseIE(InfoExtractor): + _CACHE_SECTION = 'mtvservices' + _ACCESS_TOKEN_KEY = 'access' + _REFRESH_TOKEN_KEY = 'refresh' + _MEDIA_TOKEN_KEY = 'media' + _token_cache = {} @staticmethod - def _id_from_uri(uri): - return uri.split(':')[-1] + def _jwt_is_expired(token): + return jwt_decode_hs256(token)['exp'] - time.time() < 120 @staticmethod - def _remove_template_parameter(url): - # Remove the templates, like &device={device} - return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url) - - def _get_feed_url(self, uri, url=None): - return self._FEED_URL - - def _get_thumbnail_url(self, uri, itemdoc): - search_path = '{}/{}'.format(_media_xml_tag('group'), _media_xml_tag('thumbnail')) - thumb_node = itemdoc.find(search_path) - if thumb_node is None: - return None - return thumb_node.get('url') or thumb_node.text or None - - def _extract_mobile_video_formats(self, mtvn_id): - webpage_url = self._MOBILE_TEMPLATE % mtvn_id - req = Request(webpage_url) - # Otherwise we get a webpage that would execute some javascript - req.headers['User-Agent'] = 'curl/7' - webpage = self._download_webpage(req, mtvn_id, - 'Downloading mobile page') - metrics_url = unescapeHTML(self._search_regex(r'.+?_lc_promo.*?)\1', webpage, - 'data zone', default=data_zone, group='zone') - - feed_url = try_get( - triforce_feed, lambda x: x['manifest']['zones'][data_zone]['feed'], - str) - if not feed_url: - return - - feed = self._download_json(feed_url, video_id, fatal=False) - if not feed: - return - - return try_get(feed, lambda x: x['result']['data']['id'], str) - - @staticmethod - def _extract_child_with_type(parent, t): - for c in parent['children']: - if c.get('type') == t: - return c - - def _extract_mgid(self, webpage): - try: - # the url can be http://media.mtvnservices.com/fb/{mgid}.swf - # or http://media.mtvnservices.com/{mgid} - og_url = self._og_search_video_url(webpage) - mgid = url_basename(og_url) - if mgid.endswith('.swf'): - mgid = mgid[:-4] - except RegexNotFoundError: - mgid = None - - if mgid is None or ':' not in mgid: - mgid = self._search_regex( - [r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'], - webpage, 'mgid', default=None) - - if not mgid: - sm4_embed = self._html_search_meta( - 'sm4:video:embed', webpage, 'sm4 embed', default='') - mgid = self._search_regex( - r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None) - - if not mgid: - mgid = self._extract_triforce_mgid(webpage) - - if not mgid: - data = self._parse_json(self._search_regex( - r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None) - main_container = self._extract_child_with_type(data, 'MainContainer') - ab_testing = self._extract_child_with_type(main_container, 'ABTesting') - video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer') - if video_player: - mgid = try_get(video_player, lambda x: x['props']['media']['video']['config']['uri']) - else: - flex_wrapper = self._extract_child_with_type(ab_testing or main_container, 'FlexWrapper') - auth_suite_wrapper = self._extract_child_with_type(flex_wrapper, 'AuthSuiteWrapper') - player = self._extract_child_with_type(auth_suite_wrapper or flex_wrapper, 'Player') - if player: - mgid = try_get(player, lambda x: x['props']['videoDetail']['mgid']) - - if not mgid: - raise ExtractorError('Could not extract mgid') - - return mgid - - def _real_extract(self, url): - title = url_basename(url) - webpage = self._download_webpage(url, title) - mgid = self._extract_mgid(webpage) - return self._get_videos_info(mgid, url=url) - - -class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): - IE_NAME = 'mtvservices:embedded' - _VALID_URL = r'https?://media\.mtvnservices\.com/embed/(?P.+?)(\?|/|$)' - _EMBED_REGEX = [r']+?src=(["\'])(?P(?:https?:)?//media\.mtvnservices\.com/embed/.+?)\1'] - - _TEST = { - # From http://www.thewrap.com/peter-dinklage-sums-up-game-of-thrones-in-45-seconds-video/ - 'url': 'http://media.mtvnservices.com/embed/mgid:uma:video:mtv.com:1043906/cp~vid%3D1043906%26uri%3Dmgid%3Auma%3Avideo%3Amtv.com%3A1043906', - 'md5': 'cb349b21a7897164cede95bd7bf3fbb9', - 'info_dict': { - 'id': '1043906', - 'ext': 'mp4', - 'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds', - 'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.', - 'timestamp': 1400126400, - 'upload_date': '20140515', - }, - } - - def _get_feed_url(self, uri, url=None): - video_id = self._id_from_uri(uri) - config = self._download_json( - f'http://media.mtvnservices.com/pmt/e1/access/index.html?uri={uri}&configtype=edge', video_id) - return self._remove_template_parameter(config['feedWithQueryParams']) + if data is not None: + headers['Content-Type'] = 'application/json' + if isinstance(data, dict): + data = json.dumps(data, separators=(',', ':')).encode() + + return self._download_json( + f'https://auth.mtvnservices.com/{path}', display_id, + note=note or 'Calling authentication API', data=data, + headers=headers, query={**self._get_auth_suite_data(config), **(query or {})}) + + def _get_fresh_access_token(self, config, display_id=None, force_refresh=False): + resource_id = config['resourceId'] + # resource_id should already be in _token_cache since _get_media_token is the caller + tokens = self._token_cache[resource_id] + + access_token = tokens.get(self._ACCESS_TOKEN_KEY) + if not force_refresh and access_token and not self._jwt_is_expired(access_token): + return access_token + + if self._REFRESH_TOKEN_KEY not in tokens: + response = self._call_auth_api( + 'accessToken', config, display_id, 'Retrieving auth tokens', data=b'') + else: + response = self._call_auth_api( + 'accessToken/refresh', config, display_id, 'Refreshing auth tokens', + data={'refreshToken': tokens[self._REFRESH_TOKEN_KEY]}, + headers={'Authorization': f'Bearer {access_token}'}) + + tokens[self._ACCESS_TOKEN_KEY] = response['applicationAccessToken'] + tokens[self._REFRESH_TOKEN_KEY] = response['deviceRefreshToken'] + self.cache.store(self._CACHE_SECTION, resource_id, tokens) + + return tokens[self._ACCESS_TOKEN_KEY] + + def _get_media_token(self, video_config, config, display_id=None): + resource_id = config['resourceId'] + if resource_id in self._token_cache: + tokens = self._token_cache[resource_id] + else: + tokens = self._token_cache[resource_id] = self.cache.load(self._CACHE_SECTION, resource_id) or {} + + media_token = tokens.get(self._MEDIA_TOKEN_KEY) + if media_token and not self._jwt_is_expired(media_token): + return media_token + + access_token = self._get_fresh_access_token(config, display_id) + if not jwt_decode_hs256(access_token).get('accessMethods'): + mso_id = self.get_param('ap_mso') + if not mso_id: + raise ExtractorError( + 'This video is only available for users of participating TV providers. ' + 'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier and pass ' + 'cookies from a browser session where you are signed-in to your provider.', expected=True) + + auth_suite_data = json.dumps( + self._get_auth_suite_data(config), separators=(',', ':')).encode() + callback_url = update_url_query(config['callbackURL'], { + 'authSuiteData': urllib.parse.quote(base64.b64encode(auth_suite_data).decode()), + 'mvpdCode': mso_id, + }) + auth_url = self._call_auth_api( + f'mvpd/{mso_id}/login', config, display_id, + 'Retrieving provider authentication URL', + query={'callbackUrl': callback_url}, + headers={'Authorization': f'Bearer {access_token}'})['authenticationUrl'] + res = self._download_webpage_handle(auth_url, display_id, 'Downloading provider auth page') + # BEGIN provider-specific code + redirect_url = self._search_json( + r'initInterstitialRedirect\(', res[0], 'redirect JSON', + display_id, transform_source=js_to_json)['continue'] + urlh = self._request_webpage(redirect_url, display_id, 'Requesting provider redirect page') + authorization_code = parse_qs(urlh.url)['authorizationCode'][-1] + # END provider-specific code + self._call_auth_api( + f'access/mvpd/{mso_id}', config, display_id, + 'Submitting authorization code to MTVNServices', + query={'authorizationCode': authorization_code}, data=b'', + headers={'Authorization': f'Bearer {access_token}'}) + access_token = self._get_fresh_access_token(config, display_id, force_refresh=True) + + tokens[self._MEDIA_TOKEN_KEY] = self._call_auth_api( + 'mediaToken', config, display_id, 'Fetching media token', data={ + 'content': {('id' if k == 'videoId' else k): v for k, v in video_config.items()}, + 'resourceId': resource_id, + }, headers={'Authorization': f'Bearer {access_token}'})['mediaToken'] + + self.cache.store(self._CACHE_SECTION, resource_id, tokens) + return tokens[self._MEDIA_TOKEN_KEY] def _real_extract(self, url): - mobj = self._match_valid_url(url) - mgid = mobj.group('mgid') - return self._get_videos_info(mgid) - - -class MTVIE(MTVServicesInfoExtractor): - IE_NAME = 'mtv' - _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|(?:full-)?episodes)/(?P[^/?#.]+)' - _FEED_URL = 'http://www.mtv.com/feeds/mrss/' + display_id = self._match_id(url) + + data = self._download_json(update_url(url, query=None), display_id, query={'json': 'true'}) + flex_wrapper = traverse_obj(data, ( + 'children', lambda _, v: v['type'] == 'MainContainer', + (None, ('children', lambda _, v: v['type'] == 'AviaWrapper')), + 'children', lambda _, v: v['type'] == 'FlexWrapper', {dict}, any)) + video_detail = traverse_obj(flex_wrapper, ( + (None, ('children', lambda _, v: v['type'] == 'AuthSuiteWrapper')), + 'children', lambda _, v: v['type'] == 'Player', + 'props', 'videoDetail', {dict}, any)) + if not video_detail: + video_detail = traverse_obj(data, ( + 'children', ..., ('handleTVEAuthRedirection', None), + 'videoDetail', {dict}, any, {require('video detail')})) + mgid = video_detail['mgid'] + video_id = mgid.rpartition(':')[2] + service_url = traverse_obj(video_detail, ('videoServiceUrl', {url_or_none}, {update_url(query=None)})) + + headers = {} + if video_detail.get('authRequired'): + video_config = traverse_obj(flex_wrapper, ( + 'children', lambda _, v: v['type'] == 'AuthSuiteWrapper', + 'props', 'videoConfig', {dict}, any, {require('video config')})) + config = traverse_obj(data, ( + 'props', 'authSuiteConfig', {dict}, {require('auth suite config')})) + headers['X-VIA-TVE-MEDIATOKEN'] = self._get_media_token(video_config, config, display_id) + + stream_info = self._download_json( + service_url or f'https://topaz.paramount.tech/topaz/api/{mgid}/mica.json', + video_id, 'Downloading API JSON', 'Unable to download API JSON', + query={'clientPlatform': 'desktop'}, headers=headers)['stitchedstream'] + + manifest_type = stream_info['manifesttype'] + if manifest_type == 'hls': + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + stream_info['source'], video_id, 'mp4', m3u8_id=manifest_type) + elif manifest_type == 'dash': + formats, subtitles = self._extract_mpd_formats_and_subtitles( + stream_info['source'], video_id, mpd_id=manifest_type) + else: + self.raise_no_formats(f'Unsupported manifest type "{manifest_type}"') + formats, subtitles = [], {} - _TESTS = [{ - 'url': 'http://www.mtv.com/video-clips/vl8qof/unlocking-the-truth-trailer', - 'md5': '1edbcdf1e7628e414a8c5dcebca3d32b', - 'info_dict': { - 'id': '5e14040d-18a4-47c4-a582-43ff602de88e', - 'ext': 'mp4', - 'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer', - 'description': '"Unlocking the Truth" premieres August 17th at 11/10c.', - 'timestamp': 1468846800, - 'upload_date': '20160718', - }, - }, { - 'url': 'http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101', - 'only_matching': True, - }, { - 'url': 'http://www.mtv.com/episodes/g8xu7q/teen-mom-2-breaking-the-wall-season-7-ep-713', - 'only_matching': True, - }] - - -class MTVJapanIE(MTVServicesInfoExtractor): - IE_NAME = 'mtvjapan' - _VALID_URL = r'https?://(?:www\.)?mtvjapan\.com/videos/(?P[0-9a-z]+)' - - _TEST = { - 'url': 'http://www.mtvjapan.com/videos/prayht/fresh-info-cadillac-escalade', - 'info_dict': { - 'id': 'bc01da03-6fe5-4284-8880-f291f4e368f5', - 'ext': 'mp4', - 'title': '【Fresh Info】Cadillac ESCALADE Sport Edition', - }, - 'params': { - 'skip_download': True, - }, - } - _GEO_COUNTRIES = ['JP'] - _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' - - def _get_feed_query(self, uri): return { - 'arcEp': 'mtvjapan.com', - 'mgid': uri, + **traverse_obj(video_detail, { + 'title': ('title', {str}), + 'channel': ('channel', 'name', {str}), + 'thumbnails': ('images', ..., {'url': ('url', {url_or_none})}), + 'description': (('fullDescription', 'description'), {str}, any), + 'series': ('parentEntity', 'title', {str}), + 'season_number': ('seasonNumber', {int_or_none}), + 'episode_number': ('episodeAiringOrder', {int_or_none}), + 'duration': ('duration', 'milliseconds', {float_or_none(scale=1000)}), + 'timestamp': (( + ('originalPublishDate', {parse_iso8601}), + ('publishDate', 'timestamp', {int_or_none})), any), + 'release_timestamp': (( + ('originalAirDate', {parse_iso8601}), + ('airDate', 'timestamp', {int_or_none})), any), + }), + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'subtitles': subtitles, } -class MTVVideoIE(MTVServicesInfoExtractor): - IE_NAME = 'mtv:video' - _VALID_URL = r'''(?x)^https?:// - (?:(?:www\.)?mtv\.com/videos/.+?/(?P[0-9]+)/[^/]+$| - m\.mtv\.com/videos/video\.rbml\?.*?id=(?P[^&]+))''' - - _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/' - - _TESTS = [ - { - 'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', - 'md5': '850f3f143316b1e71fa56a4edfd6e0f8', - 'info_dict': { - 'id': '853555', - 'ext': 'mp4', - 'title': 'Taylor Swift - "Ours (VH1 Storytellers)"', - 'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', - 'timestamp': 1352610000, - 'upload_date': '20121111', - }, - }, - ] - - def _get_thumbnail_url(self, uri, itemdoc): - return 'http://mtv.mtvnimages.com/uri/' + uri - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('videoid') - uri = mobj.groupdict().get('mgid') - if uri is None: - webpage = self._download_webpage(url, video_id) - - # Some videos come from Vevo.com - m_vevo = re.search( - r'(?s)isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage) - if m_vevo: - vevo_id = m_vevo.group(1) - self.to_screen(f'Vevo video detected: {vevo_id}') - return self.url_result(f'vevo:{vevo_id}', ie='Vevo') - - uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri') - return self._get_videos_info(uri) - - -class MTVDEIE(MTVServicesInfoExtractor): - _WORKING = False - IE_NAME = 'mtv.de' - _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:musik/videoclips|folgen|news)/(?P[0-9a-z]+)' +class MTVIE(MTVServicesBaseIE): + IE_NAME = 'mtv' + _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|episodes)/(?P[\da-z]{6})' _TESTS = [{ - 'url': 'http://www.mtv.de/musik/videoclips/2gpnv7/Traum', - 'info_dict': { - 'id': 'd5d472bc-f5b7-11e5-bffd-a4badb20dab5', - 'ext': 'mp4', - 'title': 'Traum', - 'description': 'Traum', - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'skip': 'Blocked at Travis CI', - }, { - # mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97) - 'url': 'http://www.mtv.de/folgen/6b1ylu/teen-mom-2-enthuellungen-S5-F1', + 'url': 'https://www.mtv.com/video-clips/syolsj', 'info_dict': { - 'id': '1e5a878b-31c5-11e7-a442-0e40cf2fc285', + 'id': '213ea7f8-bac7-4a43-8cd5-8d8cb8c8160f', 'ext': 'mp4', - 'title': 'Teen Mom 2', - 'description': 'md5:dc65e357ef7e1085ed53e9e9d83146a7', - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'skip': 'Blocked at Travis CI', + 'display_id': 'syolsj', + 'title': 'The Challenge: Vets & New Threats', + 'description': 'md5:c4d2e90a5fff6463740fbf96b2bb6a41', + 'duration': 95.0, + 'thumbnail': r're:https://images\.paramount\.tech/uri/mgid:arc:imageassetref', + 'series': 'The Challenge', + 'season': 'Season 41', + 'season_number': 41, + 'episode': 'Episode 0', + 'episode_number': 0, + 'timestamp': 1753945200, + 'upload_date': '20250731', + 'release_timestamp': 1753945200, + 'release_date': '20250731', + }, + 'params': {'skip_download': 'm3u8'}, }, { - 'url': 'http://www.mtv.de/news/glolix/77491-mtv-movies-spotlight--pixels--teil-3', + 'url': 'https://www.mtv.com/episodes/uzvigh', 'info_dict': { - 'id': 'local_playlist-4e760566473c4c8c5344', + 'id': '364e8b9e-e415-11ef-b405-16fff45bc035', 'ext': 'mp4', - 'title': 'Article_mtv-movies-spotlight-pixels-teil-3_short-clips_part1', - 'description': 'MTV Movies Supercut', - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'skip': 'Das Video kann zur Zeit nicht abgespielt werden.', + 'display_id': 'uzvigh', + 'title': 'CT Tamburello and Johnny Bananas', + 'description': 'md5:364cea52001e9c13f92784e3365c6606', + 'channel': 'MTV', + 'duration': 1260.0, + 'thumbnail': r're:https://images\.paramount\.tech/uri/mgid:arc:imageassetref', + 'series': 'Ridiculousness', + 'season': 'Season 47', + 'season_number': 47, + 'episode': 'Episode 19', + 'episode_number': 19, + 'timestamp': 1753318800, + 'upload_date': '20250724', + 'release_timestamp': 1753318800, + 'release_date': '20250724', + }, + 'params': {'skip_download': 'm3u8'}, }] - _GEO_COUNTRIES = ['DE'] - _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' - - def _get_feed_query(self, uri): - return { - 'arcEp': 'mtv.de', - 'mgid': uri, - } - - -class MTVItaliaIE(MTVServicesInfoExtractor): - IE_NAME = 'mtv.it' - _VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:episodi|video|musica)/(?P[0-9a-z]+)' - _TESTS = [{ - 'url': 'http://www.mtv.it/episodi/24bqab/mario-una-serie-di-maccio-capatonda-cavoli-amario-episodio-completo-S1-E1', - 'info_dict': { - 'id': '0f0fc78e-45fc-4cce-8f24-971c25477530', - 'ext': 'mp4', - 'title': 'Cavoli amario (episodio completo)', - 'description': 'md5:4962bccea8fed5b7c03b295ae1340660', - 'series': 'Mario - Una Serie Di Maccio Capatonda', - 'season_number': 1, - 'episode_number': 1, - }, - 'params': { - 'skip_download': True, - }, - }] - _GEO_COUNTRIES = ['IT'] - _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' - - def _get_feed_query(self, uri): - return { - 'arcEp': 'mtv.it', - 'mgid': uri, - } - - -class MTVItaliaProgrammaIE(MTVItaliaIE): # XXX: Do not subclass from concrete IE - IE_NAME = 'mtv.it:programma' - _VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:programmi|playlist)/(?P[0-9a-z]+)' - _TESTS = [{ - # program page: general - 'url': 'http://www.mtv.it/programmi/s2rppv/mario-una-serie-di-maccio-capatonda', - 'info_dict': { - 'id': 'a6f155bc-8220-4640-aa43-9b95f64ffa3d', - 'title': 'Mario - Una Serie Di Maccio Capatonda', - 'description': 'md5:72fbffe1f77ccf4e90757dd4e3216153', - }, - 'playlist_count': 2, - 'params': { - 'skip_download': True, - }, - }, { - # program page: specific season - 'url': 'http://www.mtv.it/programmi/d9ncjf/mario-una-serie-di-maccio-capatonda-S2', - 'info_dict': { - 'id': '4deeb5d8-f272-490c-bde2-ff8d261c6dd1', - 'title': 'Mario - Una Serie Di Maccio Capatonda - Stagione 2', - }, - 'playlist_count': 34, - 'params': { - 'skip_download': True, - }, - }, { - # playlist page + redirect - 'url': 'http://www.mtv.it/playlist/sexy-videos/ilctal', - 'info_dict': { - 'id': 'dee8f9ee-756d-493b-bf37-16d1d2783359', - 'title': 'Sexy Videos', - }, - 'playlist_mincount': 145, - 'params': { - 'skip_download': True, - }, - }] - _GEO_COUNTRIES = ['IT'] - _FEED_URL = 'http://www.mtv.it/feeds/triforce/manifest/v8' - - def _get_entries(self, title, url): - while True: - pg = self._search_regex(r'/(\d+)$', url, 'entries', '1') - entries = self._download_json(url, title, f'page {pg}') - url = try_get( - entries, lambda x: x['result']['nextPageURL'], str) - entries = try_get( - entries, ( - lambda x: x['result']['data']['items'], - lambda x: x['result']['data']['seasons']), - list) - for entry in entries or []: - if entry.get('canonicalURL'): - yield self.url_result(entry['canonicalURL']) - if not url: - break - - def _real_extract(self, url): - query = {'url': url} - info_url = update_url_query(self._FEED_URL, query) - video_id = self._match_id(url) - info = self._download_json(info_url, video_id).get('manifest') - - redirect = try_get( - info, lambda x: x['newLocation']['url'], str) - if redirect: - return self.url_result(redirect) - - title = info.get('title') - video_id = try_get( - info, lambda x: x['reporting']['itemId'], str) - parent_id = try_get( - info, lambda x: x['reporting']['parentId'], str) - - playlist_url = current_url = None - for z in (info.get('zones') or {}).values(): - if z.get('moduleName') in ('INTL_M304', 'INTL_M209'): - info_url = z.get('feed') - if z.get('moduleName') in ('INTL_M308', 'INTL_M317'): - playlist_url = playlist_url or z.get('feed') - if z.get('moduleName') in ('INTL_M300',): - current_url = current_url or z.get('feed') - - if not info_url: - raise ExtractorError('No info found') - - if video_id == parent_id: - video_id = self._search_regex( - r'([^\/]+)/[^\/]+$', info_url, 'video_id') - - info = self._download_json(info_url, video_id, 'Show infos') - info = try_get(info, lambda x: x['result']['data'], dict) - title = title or try_get( - info, ( - lambda x: x['title'], - lambda x: x['headline']), - str) - description = try_get(info, lambda x: x['content'], str) - - if current_url: - season = try_get( - self._download_json(playlist_url, video_id, 'Seasons info'), - lambda x: x['result']['data'], dict) - current = try_get( - season, lambda x: x['currentSeason'], str) - seasons = try_get( - season, lambda x: x['seasons'], list) or [] - - if current in [s.get('eTitle') for s in seasons]: - playlist_url = current_url - - title = re.sub( - r'[-|]\s*(?:mtv\s*italia|programma|playlist)', - '', title, flags=re.IGNORECASE).strip() - - return self.playlist_result( - self._get_entries(title, playlist_url), - video_id, title, description) diff --git a/yt_dlp/extractor/nick.py b/yt_dlp/extractor/nick.py index 653b10b9d0..00108e8328 100644 --- a/yt_dlp/extractor/nick.py +++ b/yt_dlp/extractor/nick.py @@ -1,91 +1,57 @@ -from .mtv import MTVServicesInfoExtractor -from ..utils import update_url_query +from .mtv import MTVServicesBaseIE -class NickIE(MTVServicesInfoExtractor): +class NickIE(MTVServicesBaseIE): IE_NAME = 'nick.com' - _VALID_URL = r'https?://(?P(?:www\.)?nick(?:jr)?\.com)/(?:[^/]+/)?(?Pvideos/clip|[^/]+/videos|episodes/[^/]+)/(?P[^/?#.]+)' - _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm' + _VALID_URL = r'https?://(?:www\.)?nick\.com/(?:video-clips|episodes)/(?P[\da-z]{6})' _GEO_COUNTRIES = ['US'] _TESTS = [{ - 'url': 'https://www.nick.com/episodes/sq47rw/spongebob-squarepants-a-place-for-pets-lockdown-for-love-season-13-ep-1', + 'url': 'https://www.nick.com/episodes/u3smw8/wylde-pak-best-summer-ever-season-1-ep-1', 'info_dict': { - 'description': 'md5:0650a9eb88955609d5c1d1c79292e234', - 'title': 'A Place for Pets/Lockdown for Love', + 'id': 'eb9d4db0-274a-11ef-a913-0e37995d42c9', + 'ext': 'mp4', + 'display_id': 'u3smw8', + 'title': 'Best Summer Ever?', + 'description': 'md5:c737a0ade3fbc09d569c3b3d029a7792', + 'channel': 'Nickelodeon', + 'duration': 1296.0, + 'thumbnail': r're:https://assets\.nick\.com/uri/mgid:arc:imageassetref:', + 'series': 'Wylde Pak', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 1', + 'episode_number': 1, + 'timestamp': 1746100800, + 'upload_date': '20250501', + 'release_timestamp': 1746100800, + 'release_date': '20250501', }, - 'playlist': [ - { - 'md5': 'cb8a2afeafb7ae154aca5a64815ec9d6', - 'info_dict': { - 'id': '85ee8177-d6ce-48f8-9eee-a65364f8a6df', - 'ext': 'mp4', - 'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S1', - 'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.', - - }, - }, - { - 'md5': '839a04f49900a1fcbf517020d94e0737', - 'info_dict': { - 'id': '2e2a9960-8fd4-411d-868b-28eb1beb7fae', - 'ext': 'mp4', - 'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S2', - 'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.', - - }, - }, - { - 'md5': 'f1145699f199770e2919ee8646955d46', - 'info_dict': { - 'id': 'dc91c304-6876-40f7-84a6-7aece7baa9d0', - 'ext': 'mp4', - 'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S3', - 'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.', - - }, - }, - { - 'md5': 'd463116875aee2585ee58de3b12caebd', - 'info_dict': { - 'id': '5d929486-cf4c-42a1-889a-6e0d183a101a', - 'ext': 'mp4', - 'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S4', - 'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.', - - }, - }, - ], + 'params': {'skip_download': 'm3u8'}, }, { - 'url': 'http://www.nickjr.com/blues-clues-and-you/videos/blues-clues-and-you-original-209-imagination-station/', + 'url': 'https://www.nick.com/video-clips/0p4706/spongebob-squarepants-spongebob-loving-the-krusty-krab-for-7-minutes', 'info_dict': { - 'id': '31631529-2fc5-430b-b2ef-6a74b4609abd', + 'id': '4aac2228-5295-4076-b986-159513cf4ce4', 'ext': 'mp4', - 'description': 'md5:9d65a66df38e02254852794b2809d1cf', - 'title': 'Blue\'s Imagination Station', + 'display_id': '0p4706', + 'title': 'SpongeBob Loving the Krusty Krab for 7 Minutes!', + 'description': 'md5:72bf59babdf4e6d642187502864e111d', + 'duration': 423.423, + 'thumbnail': r're:https://assets\.nick\.com/uri/mgid:arc:imageassetref:', + 'series': 'SpongeBob SquarePants', + 'season': 'Season 0', + 'season_number': 0, + 'episode': 'Episode 0', + 'episode_number': 0, + 'timestamp': 1663819200, + 'upload_date': '20220922', }, - 'skip': 'Not accessible?', + 'params': {'skip_download': 'm3u8'}, }] - def _get_feed_query(self, uri): - return { - 'feed': 'nick_arc_player_prime', - 'mgid': uri, - } - def _real_extract(self, url): - domain, video_type, display_id = self._match_valid_url(url).groups() - if video_type.startswith('episodes'): - return super()._real_extract(url) - video_data = self._download_json( - f'http://{domain}/data/video.endLevel.json', - display_id, query={ - 'urlKey': display_id, - }) - return self._get_videos_info(video_data['player'] + video_data['id']) - - -class NickBrIE(MTVServicesInfoExtractor): +class NickBrIE(MTVServicesBaseIE): IE_NAME = 'nickelodeon:br' + _WORKING = False _VALID_URL = r'''(?x) https?:// (?: @@ -112,42 +78,10 @@ class NickBrIE(MTVServicesInfoExtractor): 'only_matching': True, }] - def _real_extract(self, url): - domain, display_id = self._match_valid_url(url).groups() - webpage = self._download_webpage(url, display_id) - uri = self._search_regex( - r'data-(?:contenturi|mgid)="([^"]+)', webpage, 'mgid') - video_id = self._id_from_uri(uri) - config = self._download_json( - 'http://media.mtvnservices.com/pmt/e1/access/index.html', - video_id, query={ - 'uri': uri, - 'configtype': 'edge', - }, headers={ - 'Referer': url, - }) - info_url = self._remove_template_parameter(config['feedWithQueryParams']) - if info_url == 'None': - if domain.startswith('www.'): - domain = domain[4:] - content_domain = { - 'mundonick.uol': 'mundonick.com.br', - 'nickjr': 'br.nickelodeonjunior.tv', - }[domain] - query = { - 'mgid': uri, - 'imageEp': content_domain, - 'arcEp': content_domain, - } - if domain == 'nickjr.com.br': - query['ep'] = 'c4b16088' - info_url = update_url_query( - 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed', query) - return self._get_videos_info_from_url(info_url, video_id) - -class NickDeIE(MTVServicesInfoExtractor): +class NickDeIE(MTVServicesBaseIE): IE_NAME = 'nick.de' + _WORKING = False _VALID_URL = r'https?://(?:www\.)?(?Pnick\.(?:de|com\.pl|ch)|nickelodeon\.(?:nl|be|at|dk|no|se))/[^/]+/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse', @@ -181,15 +115,10 @@ class NickDeIE(MTVServicesInfoExtractor): 'only_matching': True, }] - def _get_feed_url(self, uri, url=None): - video_id = self._id_from_uri(uri) - config = self._download_json( - f'http://media.mtvnservices.com/pmt/e1/access/index.html?uri={uri}&configtype=edge&ref={url}', video_id) - return self._remove_template_parameter(config['feedWithQueryParams']) - -class NickRuIE(MTVServicesInfoExtractor): +class NickRuIE(MTVServicesBaseIE): IE_NAME = 'nickelodeonru' + _WORKING = False _VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu|com\.tr)/[^/]+/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6', @@ -216,9 +145,3 @@ class NickRuIE(MTVServicesInfoExtractor): 'url': 'http://www.nickelodeon.com.tr/programlar/sunger-bob/videolar/kayip-yatak/mgqbjy', 'only_matching': True, }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - mgid = self._extract_mgid(webpage, url) - return self.url_result(f'http://media.mtvnservices.com/embed/{mgid}') diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index 3d661a86ac..70888e42ca 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -1,44 +1,64 @@ -from .mtv import MTVServicesInfoExtractor +from .mtv import MTVServicesBaseIE -class SouthParkIE(MTVServicesInfoExtractor): +class SouthParkIE(MTVServicesBaseIE): IE_NAME = 'southpark.cc.com' - _VALID_URL = r'https?://(?:www\.)?(?Psouthpark(?:\.cc|studios)\.com/((?:video-)?clips|(?:full-)?episodes|collections)/(?P.+?)(\?|#|$))' - - _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' - + _VALID_URL = r'https?://(?:www\.)?southpark(?:\.cc|studios)\.com/(?:video-clips|episodes|collections)/(?P[\da-z]{6})' _TESTS = [{ 'url': 'https://southpark.cc.com/video-clips/d7wr06/south-park-you-all-agreed-to-counseling', 'info_dict': { + 'id': '31929ad5-8269-11eb-8774-70df2f866ace', 'ext': 'mp4', + 'display_id': 'd7wr06', 'title': 'You All Agreed to Counseling', - 'description': 'Kenny, Cartman, Stan, and Kyle visit Mr. Mackey and ask for his help getting Mrs. Nelson to come back. Mr. Mackey reveals the only way to get things back to normal is to get the teachers vaccinated.', + 'description': 'md5:01f78fb306c7042f3f05f3c78edfc212', + 'duration': 134.552, + 'thumbnail': r're:https://images\.paramount\.tech/uri/mgid:arc:imageassetref:', + 'series': 'South Park', + 'season': 'Season 24', + 'season_number': 24, + 'episode': 'Episode 2', + 'episode_number': 2, 'timestamp': 1615352400, 'upload_date': '20210310', + 'release_timestamp': 1615352400, + 'release_date': '20210310', }, + 'params': {'skip_download': 'm3u8'}, }, { - 'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1', + 'url': 'https://southpark.cc.com/episodes/940f8z/south-park-cartman-gets-an-anal-probe-season-1-ep-1', + 'info_dict': { + 'id': '5fb8887e-ecfd-11e0-aca6-0026b9414f30', + 'ext': 'mp4', + 'display_id': '940f8z', + 'title': 'Cartman Gets An Anal Probe', + 'description': 'md5:964e1968c468545752feef102b140300', + 'channel': 'Comedy Central', + 'duration': 1319.0, + 'thumbnail': r're:https://images\.paramount\.tech/uri/mgid:arc:imageassetref:', + 'series': 'South Park', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 1', + 'episode_number': 1, + 'timestamp': 871473600, + 'upload_date': '19970813', + 'release_timestamp': 871473600, + 'release_date': '19970813', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://southpark.cc.com/collections/dejukt/south-park-best-of-mr-mackey/tphx9j', 'only_matching': True, }, { 'url': 'https://www.southparkstudios.com/episodes/h4o269/south-park-stunning-and-brave-season-19-ep-1', 'only_matching': True, }] - def _get_feed_query(self, uri): - return { - 'accountOverride': 'intl.mtvi.com', - 'arcEp': 'shared.southpark.global', - 'ep': '90877963', - 'imageEp': 'shared.southpark.global', - 'mgid': uri, - } - -class SouthParkEsIE(SouthParkIE): # XXX: Do not subclass from concrete IE +class SouthParkEsIE(MTVServicesBaseIE): IE_NAME = 'southpark.cc.com:español' _VALID_URL = r'https?://(?:www\.)?(?Psouthpark\.cc\.com/es/episodios/(?P.+?)(\?|#|$))' - _LANG = 'es' - _TESTS = [{ 'url': 'http://southpark.cc.com/es/episodios/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate', 'info_dict': { @@ -50,9 +70,10 @@ class SouthParkEsIE(SouthParkIE): # XXX: Do not subclass from concrete IE }] -class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE +class SouthParkDeIE(MTVServicesBaseIE): IE_NAME = 'southpark.de' _VALID_URL = r'https?://(?:www\.)?(?Psouthpark\.de/(?:(en/(videoclip|collections|episodes|video-clips))|(videoclip|collections|folgen))/(?P(?P.+?)/.+?)(?:\?|#|$))' + _GEO_COUNTRIES = ['DE'] _TESTS = [{ 'url': 'https://www.southpark.de/videoclip/rsribv/south-park-rueckzug-zum-gummibonbon-wald', 'only_matching': True, @@ -99,19 +120,11 @@ class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE }, }] - def _get_feed_url(self, uri, url=None): - video_id = self._id_from_uri(uri) - config = self._download_json( - f'http://media.mtvnservices.com/pmt/e1/access/index.html?uri={uri}&configtype=edge&ref={url}', video_id) - return self._remove_template_parameter(config['feedWithQueryParams']) - - def _get_feed_query(self, uri): - return - -class SouthParkLatIE(SouthParkIE): # XXX: Do not subclass from concrete IE +class SouthParkLatIE(MTVServicesBaseIE): IE_NAME = 'southpark.lat' _VALID_URL = r'https?://(?:www\.)?southpark\.lat/(?:en/)?(?:video-?clips?|collections|episod(?:e|io)s)/(?P[^/?#&]+)' + _GEO_COUNTRIES = ['BR'] _TESTS = [{ 'url': 'https://www.southpark.lat/en/video-clips/ct46op/south-park-tooth-fairy-cartman', 'only_matching': True, @@ -141,22 +154,11 @@ class SouthParkLatIE(SouthParkIE): # XXX: Do not subclass from concrete IE }, }] - def _get_feed_url(self, uri, url=None): - video_id = self._id_from_uri(uri) - config = self._download_json( - f'http://media.mtvnservices.com/pmt/e1/access/index.html?uri={uri}&configtype=edge&ref={url}', - video_id) - return self._remove_template_parameter(config['feedWithQueryParams']) - - def _get_feed_query(self, uri): - return - -class SouthParkNlIE(SouthParkIE): # XXX: Do not subclass from concrete IE +class SouthParkNlIE(MTVServicesBaseIE): IE_NAME = 'southpark.nl' _VALID_URL = r'https?://(?:www\.)?(?Psouthpark\.nl/(?:clips|(?:full-)?episodes|collections)/(?P.+?)(\?|#|$))' - _FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/' - + _GEO_COUNTRIES = ['NL'] _TESTS = [{ 'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free', 'info_dict': { @@ -167,11 +169,10 @@ class SouthParkNlIE(SouthParkIE): # XXX: Do not subclass from concrete IE }] -class SouthParkDkIE(SouthParkIE): # XXX: Do not subclass from concrete IE +class SouthParkDkIE(MTVServicesBaseIE): IE_NAME = 'southparkstudios.dk' _VALID_URL = r'https?://(?:www\.)?(?Psouthparkstudios\.(?:dk|nu)/(?:clips|full-episodes|collections)/(?P.+?)(\?|#|$))' - _FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/' - + _GEO_COUNTRIES = ['DK'] _TESTS = [{ 'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop', 'info_dict': { diff --git a/yt_dlp/extractor/spike.py b/yt_dlp/extractor/spike.py index 5c1c78d8fc..c4e9d9620f 100644 --- a/yt_dlp/extractor/spike.py +++ b/yt_dlp/extractor/spike.py @@ -1,7 +1,7 @@ -from .mtv import MTVServicesInfoExtractor +from .mtv import MTVServicesBaseIE -class BellatorIE(MTVServicesInfoExtractor): +class BellatorIE(MTVServicesBaseIE): _VALID_URL = r'https?://(?:www\.)?bellator\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)' _TESTS = [{ 'url': 'http://www.bellator.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg', @@ -19,7 +19,7 @@ class BellatorIE(MTVServicesInfoExtractor): _GEO_COUNTRIES = ['US'] -class ParamountNetworkIE(MTVServicesInfoExtractor): +class ParamountNetworkIE(MTVServicesBaseIE): _VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)' _TESTS = [{ 'url': 'http://www.paramountnetwork.com/episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-13', diff --git a/yt_dlp/extractor/tvland.py b/yt_dlp/extractor/tvland.py index 481d5eb19e..6e0117eb0e 100644 --- a/yt_dlp/extractor/tvland.py +++ b/yt_dlp/extractor/tvland.py @@ -1,9 +1,9 @@ -from .mtv import MTVServicesInfoExtractor +from .mtv import MTVServicesBaseIE # TODO: Remove - Reason not used anymore - Service moved to youtube -class TVLandIE(MTVServicesInfoExtractor): +class TVLandIE(MTVServicesBaseIE): IE_NAME = 'tvland.com' _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P[^/?#.]+)' _FEED_URL = 'http://www.tvland.com/feeds/mrss/' diff --git a/yt_dlp/extractor/vh1.py b/yt_dlp/extractor/vh1.py index 53d5a7108e..4a462ce7cb 100644 --- a/yt_dlp/extractor/vh1.py +++ b/yt_dlp/extractor/vh1.py @@ -1,33 +1,50 @@ -from .mtv import MTVServicesInfoExtractor +from .mtv import MTVServicesBaseIE -# TODO: Remove - Reason: Outdated Site - -class VH1IE(MTVServicesInfoExtractor): +class VH1IE(MTVServicesBaseIE): IE_NAME = 'vh1.com' - _FEED_URL = 'http://www.vh1.com/feeds/mrss/' + _VALID_URL = r'https?://(?:www\.)?vh1\.com/(?:video-clips|episodes)/(?P[\da-z]{6})' _TESTS = [{ - 'url': 'https://www.vh1.com/episodes/0aqivv/nick-cannon-presents-wild-n-out-foushee-season-16-ep-12', + 'url': 'https://www.vh1.com/episodes/d06ta1/barely-famous-barely-famous-season-1-ep-1', 'info_dict': { - 'title': 'Fousheé', - 'description': 'Fousheé joins Team Evolutions fight against Nick and Team Revolution in Baby Daddy, Baby Mama; Kick Em Out the Classroom; Backseat of My Ride and Wildstyle; and Fousheé performs.', + 'id': '4af4cf2c-a854-11e4-9596-0026b9414f30', + 'ext': 'mp4', + 'display_id': 'd06ta1', + 'title': 'Barely Famous', + 'description': 'md5:6da5c9d88012eba0a80fc731c99b5fed', + 'channel': 'VH1', + 'duration': 1280.0, + 'thumbnail': r're:https://images\.paramount\.tech/uri/mgid:arc:imageassetref:', + 'series': 'Barely Famous', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 1', + 'episode_number': 1, + 'timestamp': 1426680000, + 'upload_date': '20150318', + 'release_timestamp': 1426680000, + 'release_date': '20150318', }, - 'playlist_mincount': 4, - 'skip': '404 Not found', + 'params': {'skip_download': 'm3u8'}, }, { - # Clip - 'url': 'https://www.vh1.com/video-clips/e0sja0/nick-cannon-presents-wild-n-out-foushee-clap-for-him', + 'url': 'https://www.vh1.com/video-clips/ryzt2n/love-hip-hop-miami-love-hip-hop-miami-season-5-recap', 'info_dict': { - 'id': 'a07563f7-a37b-4e7f-af68-85855c2c7cc3', + 'id': '59e62974-4a5c-4417-91c3-5044cb2f4ce2', 'ext': 'mp4', - 'title': 'Fousheé - "clap for him"', - 'description': 'Singer Fousheé hits the Wild N Out: In the Dark stage with a performance of the tongue-in-cheek track "clap for him" from her 2021 album "time machine."', - 'upload_date': '20210826', - }, - 'params': { - # m3u8 download - 'skip_download': True, + 'display_id': 'ryzt2n', + 'title': 'Love & Hip Hop Miami - Season 5 Recap', + 'description': 'md5:4e49c65d0007bfc8d06db555a6b76ef0', + 'duration': 792.083, + 'thumbnail': r're:https://images\.paramount\.tech/uri/mgid:arc:imageassetref:', + 'series': 'Love & Hip Hop Miami', + 'season': 'Season 6', + 'season_number': 6, + 'episode': 'Episode 0', + 'episode_number': 0, + 'timestamp': 1732597200, + 'upload_date': '20241126', + 'release_timestamp': 1732597200, + 'release_date': '20241126', }, + 'params': {'skip_download': 'm3u8'}, }] - - _VALID_URL = r'https?://(?:www\.)?vh1\.com/(?:video-clips|episodes)/(?P[^/?#.]+)'