From 65e90aea29cf3bfc9d1ae3e009fbf9a8db3a23c9 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 12 Sep 2025 03:15:41 -0500 Subject: [PATCH] [cleanup] Remove broken extractors (#14305) Closes #1466, Closes #2005, Closes #4897, Closes #5118, Closes #8489, Closes #13072 Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 15 -- yt_dlp/extractor/cbsnews.py | 10 +- yt_dlp/extractor/crackle.py | 243 ------------------------------ yt_dlp/extractor/cwtv.py | 180 ---------------------- yt_dlp/extractor/paramountplus.py | 201 ------------------------ yt_dlp/extractor/sixplay.py | 119 --------------- yt_dlp/extractor/spotify.py | 167 -------------------- yt_dlp/extractor/unsupported.py | 53 ++++++- yt_dlp/extractor/xanimu.py | 52 ------- 9 files changed, 47 insertions(+), 993 deletions(-) delete mode 100644 yt_dlp/extractor/crackle.py delete mode 100644 yt_dlp/extractor/cwtv.py delete mode 100644 yt_dlp/extractor/paramountplus.py delete mode 100644 yt_dlp/extractor/sixplay.py delete mode 100644 yt_dlp/extractor/spotify.py delete mode 100644 yt_dlp/extractor/xanimu.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index b3dd52b504..9d3d353683 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -424,7 +424,6 @@ from .cpac import ( CPACPlaylistIE, ) from .cracked import CrackedIE -from .crackle import CrackleIE from .craftsy import CraftsyIE from .crooksandliars import CrooksAndLiarsIE from .crowdbunker import ( @@ -444,10 +443,6 @@ from .curiositystream import ( CuriosityStreamIE, CuriosityStreamSeriesIE, ) -from .cwtv import ( - CWTVIE, - CWTVMovieIE, -) from .cybrary import ( CybraryCourseIE, CybraryIE, @@ -1467,10 +1462,6 @@ from .panopto import ( PanoptoListIE, PanoptoPlaylistIE, ) -from .paramountplus import ( - ParamountPlusIE, - ParamountPlusSeriesIE, -) from .parler import ParlerIE from .parlview import ParlviewIE from .parti import ( @@ -1849,7 +1840,6 @@ from .simplecast import ( SimplecastPodcastIE, ) from .sina import SinaIE -from .sixplay import SixPlayIE from .skeb import SkebIE from .sky import ( SkyNewsIE, @@ -1930,10 +1920,6 @@ from .spiegel import SpiegelIE from .sport5 import Sport5IE from .sportbox import SportBoxIE from .sportdeutschland import SportDeutschlandIE -from .spotify import ( - SpotifyIE, - SpotifyShowIE, -) from .spreaker import ( SpreakerIE, SpreakerShowIE, @@ -2477,7 +2463,6 @@ from .wykop import ( WykopPostCommentIE, WykopPostIE, ) -from .xanimu import XanimuIE from .xboxclips import XboxClipsIE from .xhamster import ( XHamsterEmbedIE, diff --git a/yt_dlp/extractor/cbsnews.py b/yt_dlp/extractor/cbsnews.py index b01c0efd5d..59457813f5 100644 --- a/yt_dlp/extractor/cbsnews.py +++ b/yt_dlp/extractor/cbsnews.py @@ -5,8 +5,6 @@ import zlib from .anvato import AnvatoIE from .common import InfoExtractor -from .paramountplus import ParamountPlusIE -from ..networking import HEADRequest from ..utils import ( ExtractorError, UserNotLive, @@ -132,13 +130,7 @@ class CBSNewsEmbedIE(CBSNewsBaseIE): video_id = item['mpxRefId'] video_url = self._get_video_url(item) if not video_url: - # Old embeds redirect user to ParamountPlus but most links are 404 - pplus_url = f'https://www.paramountplus.com/shows/video/{video_id}' - try: - self._request_webpage(HEADRequest(pplus_url), video_id) - return self.url_result(pplus_url, ParamountPlusIE) - except ExtractorError: - self.raise_no_formats('This video is no longer available', True, video_id) + raise ExtractorError('This video is no longer available', expected=True) return self._extract_video(item, video_url, video_id) diff --git a/yt_dlp/extractor/crackle.py b/yt_dlp/extractor/crackle.py deleted file mode 100644 index c4ceba9408..0000000000 --- a/yt_dlp/extractor/crackle.py +++ /dev/null @@ -1,243 +0,0 @@ -import hashlib -import hmac -import re -import time - -from .common import InfoExtractor -from ..networking.exceptions import HTTPError -from ..utils import ( - ExtractorError, - determine_ext, - float_or_none, - int_or_none, - orderedSet, - parse_age_limit, - parse_duration, - url_or_none, -) - - -class CrackleIE(InfoExtractor): - _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P\d+)' - _TESTS = [{ - # Crackle is available in the United States and territories - 'url': 'https://www.crackle.com/thanksgiving/2510064', - 'info_dict': { - 'id': '2510064', - 'ext': 'mp4', - 'title': 'Touch Football', - 'description': 'md5:cfbb513cf5de41e8b56d7ab756cff4df', - 'duration': 1398, - 'view_count': int, - 'average_rating': 0, - 'age_limit': 17, - 'genre': 'Comedy', - 'creator': 'Daniel Powell', - 'artist': 'Chris Elliott, Amy Sedaris', - 'release_year': 2016, - 'series': 'Thanksgiving', - 'episode': 'Touch Football', - 'season_number': 1, - 'episode_number': 1, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'expected_warnings': [ - 'Trying with a list of known countries', - ], - }, { - 'url': 'https://www.sonycrackle.com/thanksgiving/2510064', - 'only_matching': True, - }] - - _MEDIA_FILE_SLOTS = { - '360p.mp4': { - 'width': 640, - 'height': 360, - }, - '480p.mp4': { - 'width': 768, - 'height': 432, - }, - '480p_1mbps.mp4': { - 'width': 852, - 'height': 480, - }, - } - - def _download_json(self, url, *args, **kwargs): - # Authorization generation algorithm is reverse engineered from: - # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js - timestamp = time.strftime('%Y%m%d%H%M', time.gmtime()) - h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([url, timestamp]).encode(), hashlib.sha1).hexdigest().upper() - headers = { - 'Accept': 'application/json', - 'Authorization': '|'.join([h, timestamp, '117', '1']), - } - return InfoExtractor._download_json(self, url, *args, headers=headers, **kwargs) - - def _real_extract(self, url): - video_id = self._match_id(url) - - geo_bypass_country = self.get_param('geo_bypass_country', None) - countries = orderedSet((geo_bypass_country, 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI', '')) - num_countries, num = len(countries) - 1, 0 - - media = {} - for num, country in enumerate(countries): - if num == 1: # start hard-coded list - self.report_warning('%s. Trying with a list of known countries' % ( - f'Unable to obtain video formats from {geo_bypass_country} API' if geo_bypass_country - else 'No country code was given using --geo-bypass-country')) - elif num == num_countries: # end of list - geo_info = self._download_json( - 'https://web-api-us.crackle.com/Service.svc/geo/country', - video_id, fatal=False, note='Downloading geo-location information from crackle API', - errnote='Unable to fetch geo-location information from crackle') or {} - country = geo_info.get('CountryCode') - if country is None: - continue - self.to_screen(f'{self.IE_NAME} identified country as {country}') - if country in countries: - self.to_screen(f'Downloading from {country} API was already attempted. Skipping...') - continue - - if country is None: - continue - try: - media = self._download_json( - f'https://web-api-us.crackle.com/Service.svc/details/media/{video_id}/{country}?disableProtocols=true', - video_id, note=f'Downloading media JSON from {country} API', - errnote='Unable to download media JSON') - except ExtractorError as e: - # 401 means geo restriction, trying next country - if isinstance(e.cause, HTTPError) and e.cause.status == 401: - continue - raise - - status = media.get('status') - if status.get('messageCode') != '0': - raise ExtractorError( - '{} said: {} {} - {}'.format( - self.IE_NAME, status.get('messageCodeDescription'), status.get('messageCode'), status.get('message')), - expected=True) - - # Found video formats - if isinstance(media.get('MediaURLs'), list): - break - - ignore_no_formats = self.get_param('ignore_no_formats_error') - - if not media or (not media.get('MediaURLs') and not ignore_no_formats): - raise ExtractorError( - 'Unable to access the crackle API. Try passing your country code ' - 'to --geo-bypass-country. If it still does not work and the ' - 'video is available in your country') - title = media['Title'] - - formats, subtitles = [], {} - has_drm = False - for e in media.get('MediaURLs') or []: - if e.get('UseDRM'): - has_drm = True - format_url = url_or_none(e.get('DRMPath')) - else: - format_url = url_or_none(e.get('Path')) - if not format_url: - continue - ext = determine_ext(format_url) - if ext == 'm3u8': - fmts, subs = self._extract_m3u8_formats_and_subtitles( - format_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False) - formats.extend(fmts) - subtitles = self._merge_subtitles(subtitles, subs) - elif ext == 'mpd': - fmts, subs = self._extract_mpd_formats_and_subtitles( - format_url, video_id, mpd_id='dash', fatal=False) - formats.extend(fmts) - subtitles = self._merge_subtitles(subtitles, subs) - elif format_url.endswith('.ism/Manifest'): - fmts, subs = self._extract_ism_formats_and_subtitles( - format_url, video_id, ism_id='mss', fatal=False) - formats.extend(fmts) - subtitles = self._merge_subtitles(subtitles, subs) - else: - mfs_path = e.get('Type') - mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path) - if not mfs_info: - continue - formats.append({ - 'url': format_url, - 'format_id': 'http-' + mfs_path.split('.')[0], - 'width': mfs_info['width'], - 'height': mfs_info['height'], - }) - if not formats and has_drm: - self.report_drm(video_id) - - description = media.get('Description') - duration = int_or_none(media.get( - 'DurationInSeconds')) or parse_duration(media.get('Duration')) - view_count = int_or_none(media.get('CountViews')) - average_rating = float_or_none(media.get('UserRating')) - age_limit = parse_age_limit(media.get('Rating')) - genre = media.get('Genre') - release_year = int_or_none(media.get('ReleaseYear')) - creator = media.get('Directors') - artist = media.get('Cast') - - if media.get('MediaTypeDisplayValue') == 'Full Episode': - series = media.get('ShowName') - episode = title - season_number = int_or_none(media.get('Season')) - episode_number = int_or_none(media.get('Episode')) - else: - series = episode = season_number = episode_number = None - - cc_files = media.get('ClosedCaptionFiles') - if isinstance(cc_files, list): - for cc_file in cc_files: - if not isinstance(cc_file, dict): - continue - cc_url = url_or_none(cc_file.get('Path')) - if not cc_url: - continue - lang = cc_file.get('Locale') or 'en' - subtitles.setdefault(lang, []).append({'url': cc_url}) - - thumbnails = [] - images = media.get('Images') - if isinstance(images, list): - for image_key, image_url in images.items(): - mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key) - if not mobj: - continue - thumbnails.append({ - 'url': image_url, - 'width': int(mobj.group(1)), - 'height': int(mobj.group(2)), - }) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'view_count': view_count, - 'average_rating': average_rating, - 'age_limit': age_limit, - 'genre': genre, - 'creator': creator, - 'artist': artist, - 'release_year': release_year, - 'series': series, - 'episode': episode, - 'season_number': season_number, - 'episode_number': episode_number, - 'thumbnails': thumbnails, - 'subtitles': subtitles, - 'formats': formats, - } diff --git a/yt_dlp/extractor/cwtv.py b/yt_dlp/extractor/cwtv.py deleted file mode 100644 index cdb29fcee7..0000000000 --- a/yt_dlp/extractor/cwtv.py +++ /dev/null @@ -1,180 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - parse_age_limit, - parse_iso8601, - parse_qs, - smuggle_url, - str_or_none, - update_url_query, -) -from ..utils.traversal import traverse_obj - - -class CWTVIE(InfoExtractor): - IE_NAME = 'cwtv' - _VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch|guid)=(?P[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})' - _TESTS = [{ - 'url': 'https://www.cwtv.com/shows/continuum/a-stitch-in-time/?play=9149a1e1-4cb2-46d7-81b2-47d35bbd332b', - 'info_dict': { - 'id': '9149a1e1-4cb2-46d7-81b2-47d35bbd332b', - 'ext': 'mp4', - 'title': 'A Stitch in Time', - 'description': r're:(?s)City Protective Services officer Kiera Cameron is transported from 2077.+', - 'thumbnail': r're:https?://.+\.jpe?g', - 'duration': 2632, - 'timestamp': 1736928000, - 'uploader': 'CWTV', - 'chapters': 'count:5', - 'series': 'Continuum', - 'season_number': 1, - 'episode_number': 1, - 'age_limit': 14, - 'upload_date': '20250115', - 'season': 'Season 1', - 'episode': 'Episode 1', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63', - 'info_dict': { - 'id': '6b15e985-9345-4f60-baf8-56e96be57c63', - 'ext': 'mp4', - 'title': 'Legends of Yesterday', - 'description': r're:(?s)Oliver and Barry Allen take Kendra Saunders and Carter Hall to a remote.+', - 'duration': 2665, - 'series': 'Arrow', - 'season_number': 4, - 'season': '4', - 'episode_number': 8, - 'upload_date': '20151203', - 'timestamp': 1449122100, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'skip': 'redirect to http://cwtv.com/shows/arrow/', - }, { - 'url': 'http://www.cwseed.com/shows/whose-line-is-it-anyway/jeff-davis-4/?play=24282b12-ead2-42f2-95ad-26770c2c6088', - 'info_dict': { - 'id': '24282b12-ead2-42f2-95ad-26770c2c6088', - 'ext': 'mp4', - 'title': 'Jeff Davis 4', - 'description': 'Jeff Davis is back to make you laugh.', - 'duration': 1263, - 'series': 'Whose Line Is It Anyway?', - 'season_number': 11, - 'episode_number': 20, - 'upload_date': '20151006', - 'timestamp': 1444107300, - 'age_limit': 14, - 'uploader': 'CWTV', - 'thumbnail': r're:https?://.+\.jpe?g', - 'chapters': 'count:4', - 'episode': 'Episode 20', - 'season': 'Season 11', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6', - 'only_matching': True, - }, { - 'url': 'http://cwtvpr.com/the-cw/video?watch=9eee3f60-ef4e-440b-b3b2-49428ac9c54e', - 'only_matching': True, - }, { - 'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?watch=6b15e985-9345-4f60-baf8-56e96be57c63', - 'only_matching': True, - }, { - 'url': 'http://www.cwtv.com/movies/play/?guid=0a8e8b5b-1356-41d5-9a6a-4eda1a6feb6c', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - data = self._download_json( - f'https://images.cwtv.com/feed/app-2/video-meta/apiversion_22/device_android/guid_{video_id}', video_id) - if traverse_obj(data, 'result') != 'ok': - raise ExtractorError(traverse_obj(data, (('error_msg', 'msg'), {str}, any)), expected=True) - video_data = data['video'] - title = video_data['title'] - mpx_url = update_url_query( - video_data.get('mpx_url') or f'https://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}', - {'formats': 'M3U+none'}) - - season = str_or_none(video_data.get('season')) - episode = str_or_none(video_data.get('episode')) - if episode and season: - episode = episode[len(season):] - - return { - '_type': 'url_transparent', - 'id': video_id, - 'title': title, - 'url': smuggle_url(mpx_url, {'force_smil_url': True}), - 'description': video_data.get('description_long'), - 'duration': int_or_none(video_data.get('duration_secs')), - 'series': video_data.get('series_name'), - 'season_number': int_or_none(season), - 'episode_number': int_or_none(episode), - 'timestamp': parse_iso8601(video_data.get('start_time')), - 'age_limit': parse_age_limit(video_data.get('rating')), - 'ie_key': 'ThePlatform', - 'thumbnail': video_data.get('large_thumbnail'), - } - - -class CWTVMovieIE(InfoExtractor): - IE_NAME = 'cwtv:movie' - _VALID_URL = r'https?://(?:www\.)?cwtv\.com/shows/(?P[\w-]+)/?\?(?:[^#]+&)?viewContext=Movies' - _TESTS = [{ - 'url': 'https://www.cwtv.com/shows/the-crush/?viewContext=Movies+Swimlane', - 'info_dict': { - 'id': '0a8e8b5b-1356-41d5-9a6a-4eda1a6feb6c', - 'ext': 'mp4', - 'title': 'The Crush', - 'upload_date': '20241112', - 'description': 'md5:1549acd90dff4a8273acd7284458363e', - 'chapters': 'count:9', - 'timestamp': 1731398400, - 'age_limit': 16, - 'duration': 5337, - 'series': 'The Crush', - 'season': 'Season 1', - 'uploader': 'CWTV', - 'season_number': 1, - 'episode': 'Episode 1', - 'episode_number': 1, - 'thumbnail': r're:https?://.+\.jpe?g', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }] - _UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - app_url = ( - self._html_search_meta('al:ios:url', webpage, default=None) - or self._html_search_meta('al:android:url', webpage, default=None)) - video_id = ( - traverse_obj(parse_qs(app_url), ('video_id', 0, {lambda x: re.fullmatch(self._UUID_RE, x)}, 0)) - or self._search_regex([ - rf'CWTV\.Site\.curPlayingGUID\s*=\s*["\']({self._UUID_RE})', - rf'CWTV\.Site\.viewInAppURL\s*=\s*["\']/shows/[\w-]+/watch-in-app/\?play=({self._UUID_RE})', - ], webpage, 'video ID')) - - return self.url_result( - f'https://www.cwtv.com/shows/{display_id}/{display_id}/?play={video_id}', CWTVIE, video_id) diff --git a/yt_dlp/extractor/paramountplus.py b/yt_dlp/extractor/paramountplus.py deleted file mode 100644 index 317f53b2bc..0000000000 --- a/yt_dlp/extractor/paramountplus.py +++ /dev/null @@ -1,201 +0,0 @@ -import itertools - -from .cbs import CBSBaseIE -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - url_or_none, -) - - -class ParamountPlusIE(CBSBaseIE): - _VALID_URL = r'''(?x) - (?: - paramountplus:| - https?://(?:www\.)?(?: - paramountplus\.com/(?:shows|movies)/(?:video|[^/]+/video|[^/]+)/ - )(?P[\w-]+))''' - - # All tests are blocked outside US - _TESTS = [{ - 'url': 'https://www.paramountplus.com/shows/video/Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k/', - 'info_dict': { - 'id': 'Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k', - 'ext': 'mp4', - 'title': 'CatDog - Climb Every CatDog/The Canine Mutiny', - 'description': 'md5:7ac835000645a69933df226940e3c859', - 'duration': 1426, - 'timestamp': 920264400, - 'upload_date': '19990301', - 'uploader': 'CBSI-NEW', - 'episode_number': 5, - 'thumbnail': r're:https?://.+\.jpg$', - 'season': 'Season 2', - 'chapters': 'count:3', - 'episode': 'Episode 5', - 'season_number': 2, - 'series': 'CatDog', - }, - 'params': { - 'skip_download': 'm3u8', - }, - }, { - 'url': 'https://www.paramountplus.com/shows/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/', - 'info_dict': { - 'id': '6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd', - 'ext': 'mp4', - 'title': '7/23/21 WEEK IN REVIEW (Rep. Jahana Hayes/Howard Fineman/Sen. Michael Bennet/Sheera Frenkel & Cecilia Kang)', - 'description': 'md5:f4adcea3e8b106192022e121f1565bae', - 'duration': 2506, - 'timestamp': 1627063200, - 'upload_date': '20210723', - 'uploader': 'CBSI-NEW', - 'episode_number': 81, - 'thumbnail': r're:https?://.+\.jpg$', - 'season': 'Season 2', - 'chapters': 'count:4', - 'episode': 'Episode 81', - 'season_number': 2, - 'series': 'Tooning Out The News', - }, - 'params': { - 'skip_download': 'm3u8', - }, - }, { - 'url': 'https://www.paramountplus.com/movies/video/vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC/', - 'info_dict': { - 'id': 'vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC', - 'ext': 'mp4', - 'title': 'Daddy\'s Home', - 'upload_date': '20151225', - 'description': 'md5:9a6300c504d5e12000e8707f20c54745', - 'uploader': 'CBSI-NEW', - 'timestamp': 1451030400, - 'thumbnail': r're:https?://.+\.jpg$', - 'chapters': 'count:0', - 'duration': 5761, - 'series': 'Paramount+ Movies', - }, - 'params': { - 'skip_download': 'm3u8', - }, - 'skip': 'DRM', - }, { - 'url': 'https://www.paramountplus.com/movies/video/5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc/', - 'info_dict': { - 'id': '5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc', - 'ext': 'mp4', - 'uploader': 'CBSI-NEW', - 'description': 'md5:bc7b6fea84ba631ef77a9bda9f2ff911', - 'timestamp': 1577865600, - 'title': 'Sonic the Hedgehog', - 'upload_date': '20200101', - 'thumbnail': r're:https?://.+\.jpg$', - 'chapters': 'count:0', - 'duration': 5932, - 'series': 'Paramount+ Movies', - }, - 'params': { - 'skip_download': 'm3u8', - }, - 'skip': 'DRM', - }, { - 'url': 'https://www.paramountplus.com/shows/the-real-world/video/mOVeHeL9ub9yWdyzSZFYz8Uj4ZBkVzQg/the-real-world-reunion/', - 'only_matching': True, - }, { - 'url': 'https://www.paramountplus.com/shows/video/mOVeHeL9ub9yWdyzSZFYz8Uj4ZBkVzQg/', - 'only_matching': True, - }, { - 'url': 'https://www.paramountplus.com/movies/video/W0VyStQqUnqKzJkrpSAIARuCc9YuYGNy/', - 'only_matching': True, - }, { - 'url': 'https://www.paramountplus.com/movies/paw-patrol-the-movie/W0VyStQqUnqKzJkrpSAIARuCc9YuYGNy/', - 'only_matching': True, - }] - - def _extract_video_info(self, content_id, mpx_acc=2198311517): - items_data = self._download_json( - f'https://www.paramountplus.com/apps-api/v2.0/androidtv/video/cid/{content_id}.json', - content_id, query={ - 'locale': 'en-us', - 'at': 'ABCXgPuoStiPipsK0OHVXIVh68zNys+G4f7nW9R6qH68GDOcneW6Kg89cJXGfiQCsj0=', - }, headers=self.geo_verification_headers()) - - asset_types = { - item.get('assetType'): { - 'format': 'SMIL', - 'formats': 'M3U+none,MPEG4', # '+none' specifies ProtectionScheme (no DRM) - } for item in items_data['itemList'] - } - item = items_data['itemList'][-1] - - info, error = {}, None - metadata = { - 'title': item.get('title'), - 'series': item.get('seriesTitle'), - 'season_number': int_or_none(item.get('seasonNum')), - 'episode_number': int_or_none(item.get('episodeNum')), - 'duration': int_or_none(item.get('duration')), - 'thumbnail': url_or_none(item.get('thumbnail')), - } - try: - info = self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info=metadata) - except ExtractorError as e: - error = e - - # Check for DRM formats to give appropriate error - if not info.get('formats'): - for query in asset_types.values(): - query['formats'] = 'MPEG-DASH,M3U,MPEG4' # allows DRM formats - - try: - drm_info = self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info=metadata) - except ExtractorError: - if error: - raise error from None - raise - if drm_info['formats']: - self.report_drm(content_id) - elif error: - raise error - - return info - - -class ParamountPlusSeriesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?paramountplus\.com/shows/(?P[a-zA-Z0-9-_]+)/?(?:[#?]|$)' - _TESTS = [{ - 'url': 'https://www.paramountplus.com/shows/drake-josh', - 'playlist_mincount': 50, - 'info_dict': { - 'id': 'drake-josh', - }, - }, { - 'url': 'https://www.paramountplus.com/shows/hawaii_five_0/', - 'playlist_mincount': 240, - 'info_dict': { - 'id': 'hawaii_five_0', - }, - }, { - 'url': 'https://www.paramountplus.com/shows/spongebob-squarepants/', - 'playlist_mincount': 248, - 'info_dict': { - 'id': 'spongebob-squarepants', - }, - }] - - def _entries(self, show_name): - for page in itertools.count(): - show_json = self._download_json( - f'https://www.paramountplus.com/shows/{show_name}/xhr/episodes/page/{page}/size/50/xs/0/season/0', show_name) - if not show_json.get('success'): - return - for episode in show_json['result']['data']: - yield self.url_result( - 'https://www.paramountplus.com{}'.format(episode['url']), - ie=ParamountPlusIE.ie_key(), video_id=episode['content_id']) - - def _real_extract(self, url): - show_name = self._match_id(url) - return self.playlist_result(self._entries(show_name), playlist_id=show_name) diff --git a/yt_dlp/extractor/sixplay.py b/yt_dlp/extractor/sixplay.py deleted file mode 100644 index 6037a35116..0000000000 --- a/yt_dlp/extractor/sixplay.py +++ /dev/null @@ -1,119 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - determine_ext, - int_or_none, - parse_qs, - qualities, - try_get, -) - - -class SixPlayIE(InfoExtractor): - IE_NAME = '6play' - _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P6play\.fr|rtlplay\.be|play\.rtl\.hr|rtlmost\.hu)/.+?-c_)(?P[0-9]+)' - _TESTS = [{ - 'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051', - 'md5': '31fcd112637baa0c2ab92c4fcd8baf27', - 'info_dict': { - 'id': '12041051', - 'ext': 'mp4', - 'title': 'Le but qui a marqué l\'histoire du football français !', - 'description': 'md5:b59e7e841d646ef1eb42a7868eb6a851', - }, - }, { - 'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869', - 'only_matching': True, - }, { - 'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989', - 'only_matching': True, - }, { - 'url': 'https://www.rtlmost.hu/megtorve-p_14167/megtorve-6-resz-c_12397787', - 'only_matching': True, - }] - - def _real_extract(self, url): - domain, video_id = self._match_valid_url(url).groups() - service, consumer_name = { - '6play.fr': ('6play', 'm6web'), - 'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'), - 'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'), - 'rtlmost.hu': ('rtlhu_rtl_most', 'rtlhu'), - }.get(domain, ('6play', 'm6web')) - - data = self._download_json( - f'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/{service}/videos/clip_{video_id}', - video_id, headers={ - 'x-customer-name': consumer_name, - }, query={ - 'csa': 5, - 'with': 'clips', - }) - - clip_data = data['clips'][0] - title = clip_data['title'] - - urls = [] - quality_key = qualities(['lq', 'sd', 'hq', 'hd']) - formats = [] - subtitles = {} - assets = clip_data.get('assets') or [] - for asset in assets: - asset_url = asset.get('full_physical_path') - protocol = asset.get('protocol') - if not asset_url or ((protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264') and not ('_drmnp.ism/' in asset_url or '_unpnp.ism/' in asset_url)) or asset_url in urls: - continue - urls.append(asset_url) - container = asset.get('video_container') - ext = determine_ext(asset_url) - if protocol == 'http_subtitle' or ext == 'vtt': - subtitles.setdefault('fr', []).append({'url': asset_url}) - continue - if container == 'm3u8' or ext == 'm3u8': - if protocol == 'usp': - if parse_qs(asset_url).get('token', [None])[0]: - urlh = self._request_webpage( - asset_url, video_id, fatal=False, - headers=self.geo_verification_headers()) - if not urlh: - continue - asset_url = urlh.url - asset_url = asset_url.replace('_drmnp.ism/', '_unpnp.ism/') - for i in range(3, 0, -1): - asset_url = asset_url.replace('_sd1/', f'_sd{i}/') - m3u8_formats = self._extract_m3u8_formats( - asset_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False) - formats.extend(m3u8_formats) - formats.extend(self._extract_mpd_formats( - asset_url.replace('.m3u8', '.mpd'), - video_id, mpd_id='dash', fatal=False)) - if m3u8_formats: - break - else: - formats.extend(self._extract_m3u8_formats( - asset_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - elif container == 'mp4' or ext == 'mp4': - quality = asset.get('video_quality') - formats.append({ - 'url': asset_url, - 'format_id': quality, - 'quality': quality_key(quality), - 'ext': ext, - }) - - def get(getter): - for src in (data, clip_data): - v = try_get(src, getter, str) - if v: - return v - - return { - 'id': video_id, - 'title': title, - 'description': get(lambda x: x['description']), - 'duration': int_or_none(clip_data.get('duration')), - 'series': get(lambda x: x['program']['title']), - 'formats': formats, - 'subtitles': subtitles, - } diff --git a/yt_dlp/extractor/spotify.py b/yt_dlp/extractor/spotify.py deleted file mode 100644 index de67a61148..0000000000 --- a/yt_dlp/extractor/spotify.py +++ /dev/null @@ -1,167 +0,0 @@ -import functools -import json -import re - -from .common import InfoExtractor -from ..utils import ( - OnDemandPagedList, - clean_podcast_url, - float_or_none, - int_or_none, - strip_or_none, - traverse_obj, - try_get, - unified_strdate, -) - - -class SpotifyBaseIE(InfoExtractor): - _WORKING = False - _ACCESS_TOKEN = None - _OPERATION_HASHES = { - 'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf', - 'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0', - 'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d', - } - _VALID_URL_TEMPL = r'https?://open\.spotify\.com/(?:embed-podcast/|embed/|)%s/(?P[^/?&#]+)' - _EMBED_REGEX = [r']+src="(?Phttps?://open\.spotify.com/embed/[^"]+)"'] - - def _real_initialize(self): - self._ACCESS_TOKEN = self._download_json( - 'https://open.spotify.com/get_access_token', None)['accessToken'] - - def _call_api(self, operation, video_id, variables, **kwargs): - return self._download_json( - 'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={ - 'operationName': 'query' + operation, - 'variables': json.dumps(variables), - 'extensions': json.dumps({ - 'persistedQuery': { - 'sha256Hash': self._OPERATION_HASHES[operation], - }, - }), - }, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN}, - **kwargs)['data'] - - def _extract_episode(self, episode, series): - episode_id = episode['id'] - title = episode['name'].strip() - - formats = [] - audio_preview = episode.get('audioPreview') or {} - audio_preview_url = audio_preview.get('url') - if audio_preview_url: - f = { - 'url': audio_preview_url.replace('://p.scdn.co/mp3-preview/', '://anon-podcast.scdn.co/'), - 'vcodec': 'none', - } - audio_preview_format = audio_preview.get('format') - if audio_preview_format: - f['format_id'] = audio_preview_format - mobj = re.match(r'([0-9A-Z]{3})_(?:[A-Z]+_)?(\d+)', audio_preview_format) - if mobj: - f.update({ - 'abr': int(mobj.group(2)), - 'ext': mobj.group(1).lower(), - }) - formats.append(f) - - for item in (try_get(episode, lambda x: x['audio']['items']) or []): - item_url = item.get('url') - if not (item_url and item.get('externallyHosted')): - continue - formats.append({ - 'url': clean_podcast_url(item_url), - 'vcodec': 'none', - }) - - thumbnails = [] - for source in (try_get(episode, lambda x: x['coverArt']['sources']) or []): - source_url = source.get('url') - if not source_url: - continue - thumbnails.append({ - 'url': source_url, - 'width': int_or_none(source.get('width')), - 'height': int_or_none(source.get('height')), - }) - - return { - 'id': episode_id, - 'title': title, - 'formats': formats, - 'thumbnails': thumbnails, - 'description': strip_or_none(episode.get('description')), - 'duration': float_or_none(try_get( - episode, lambda x: x['duration']['totalMilliseconds']), 1000), - 'release_date': unified_strdate(try_get( - episode, lambda x: x['releaseDate']['isoString'])), - 'series': series, - } - - -class SpotifyIE(SpotifyBaseIE): - IE_NAME = 'spotify' - IE_DESC = 'Spotify episodes' - _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode' - _TESTS = [{ - 'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo', - 'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b', - 'info_dict': { - 'id': '4Z7GAJ50bgctf6uclHlWKo', - 'ext': 'mp3', - 'title': 'From the archive: Why time management is ruining our lives', - 'description': 'md5:b120d9c4ff4135b42aa9b6d9cde86935', - 'duration': 2083.605, - 'release_date': '20201217', - 'series': "The Guardian's Audio Long Reads", - }, - }, { - 'url': 'https://open.spotify.com/embed/episode/4TvCsKKs2thXmarHigWvXE?si=7eatS8AbQb6RxqO2raIuWA', - 'only_matching': True, - }] - - def _real_extract(self, url): - episode_id = self._match_id(url) - episode = self._call_api('Episode', episode_id, { - 'uri': 'spotify:episode:' + episode_id, - })['episode'] - return self._extract_episode( - episode, try_get(episode, lambda x: x['podcast']['name'])) - - -class SpotifyShowIE(SpotifyBaseIE): - IE_NAME = 'spotify:show' - IE_DESC = 'Spotify shows' - _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show' - _TEST = { - 'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M', - 'info_dict': { - 'id': '4PM9Ke6l66IRNpottHKV9M', - 'title': 'The Story from the Guardian', - 'description': 'The Story podcast is dedicated to our finest audio documentaries, investigations and long form stories', - }, - 'playlist_mincount': 36, - } - _PER_PAGE = 100 - - def _fetch_page(self, show_id, page=0): - return self._call_api('ShowEpisodes', show_id, { - 'limit': 100, - 'offset': page * self._PER_PAGE, - 'uri': f'spotify:show:{show_id}', - }, note=f'Downloading page {page + 1} JSON metadata')['podcast'] - - def _real_extract(self, url): - show_id = self._match_id(url) - first_page = self._fetch_page(show_id) - - def _entries(page): - podcast = self._fetch_page(show_id, page) if page else first_page - yield from map( - functools.partial(self._extract_episode, series=podcast.get('name')), - traverse_obj(podcast, ('episodes', 'items', ..., 'episode'))) - - return self.playlist_result( - OnDemandPagedList(_entries, self._PER_PAGE), - show_id, first_page.get('name'), first_page.get('description')) diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py index 05ae4dd18a..4857156913 100644 --- a/yt_dlp/extractor/unsupported.py +++ b/yt_dlp/extractor/unsupported.py @@ -30,13 +30,13 @@ class KnownDRMIE(UnsupportedInfoExtractor): r'play\.hbomax\.com', r'channel(?:4|5)\.com', r'peacocktv\.com', - r'(?:[\w\.]+\.)?disneyplus\.com', - r'open\.spotify\.com/(?:track|playlist|album|artist)', + r'(?:[\w.]+\.)?disneyplus\.com', + r'open\.spotify\.com', r'tvnz\.co\.nz', r'oneplus\.ch', r'artstation\.com/learning/courses', r'philo\.com', - r'(?:[\w\.]+\.)?mech-plus\.com', + r'(?:[\w.]+\.)?mech-plus\.com', r'aha\.video', r'mubi\.com', r'vootkids\.com', @@ -57,6 +57,14 @@ class KnownDRMIE(UnsupportedInfoExtractor): r'ctv\.ca', r'noovo\.ca', r'tsn\.ca', + r'paramountplus\.com', + r'(?:m\.)?(?:sony)?crackle\.com', + r'cw(?:tv(?:pr)?|seed)\.com', + r'6play\.fr', + r'rtlplay\.be', + r'play\.rtl\.hr', + r'rtlmost\.hu', + r'plus\.rtl\.de(?!/podcast/)', ) _TESTS = [{ @@ -78,10 +86,7 @@ class KnownDRMIE(UnsupportedInfoExtractor): 'url': r'https://www.disneyplus.com', 'only_matching': True, }, { - 'url': 'https://open.spotify.com/artist/', - 'only_matching': True, - }, { - 'url': 'https://open.spotify.com/track/', + 'url': 'https://open.spotify.com', 'only_matching': True, }, { # https://github.com/yt-dlp/yt-dlp/issues/4122 @@ -184,6 +189,39 @@ class KnownDRMIE(UnsupportedInfoExtractor): }, { 'url': 'https://www.tsn.ca/video/relaxed-oilers-look-to-put-emotional-game-2-loss-in-the-rearview%7E3148747', 'only_matching': True, + }, { + 'url': 'https://www.paramountplus.com', + 'only_matching': True, + }, { + 'url': 'https://www.crackle.com', + 'only_matching': True, + }, { + 'url': 'https://m.sonycrackle.com', + 'only_matching': True, + }, { + 'url': 'https://www.cwtv.com', + 'only_matching': True, + }, { + 'url': 'https://www.cwseed.com', + 'only_matching': True, + }, { + 'url': 'https://cwtvpr.com', + 'only_matching': True, + }, { + 'url': 'https://www.6play.fr', + 'only_matching': True, + }, { + 'url': 'https://www.rtlplay.be', + 'only_matching': True, + }, { + 'url': 'https://play.rtl.hr', + 'only_matching': True, + }, { + 'url': 'https://www.rtlmost.hu', + 'only_matching': True, + }, { + 'url': 'https://plus.rtl.de/video-tv/', + 'only_matching': True, }] def _real_extract(self, url): @@ -222,6 +260,7 @@ class KnownPiracyIE(UnsupportedInfoExtractor): r'91porn\.com', r'einthusan\.(?:tv|com|ca)', r'yourupload\.com', + r'xanimu\.com', ) _TESTS = [{ diff --git a/yt_dlp/extractor/xanimu.py b/yt_dlp/extractor/xanimu.py deleted file mode 100644 index b489358779..0000000000 --- a/yt_dlp/extractor/xanimu.py +++ /dev/null @@ -1,52 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import int_or_none - - -class XanimuIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?xanimu\.com/(?P[^/]+)/?' - _TESTS = [{ - 'url': 'https://xanimu.com/51944-the-princess-the-frog-hentai/', - 'md5': '899b88091d753d92dad4cb63bbf357a7', - 'info_dict': { - 'id': '51944-the-princess-the-frog-hentai', - 'ext': 'mp4', - 'title': 'The Princess + The Frog Hentai', - 'thumbnail': 'https://xanimu.com/storage/2020/09/the-princess-and-the-frog-hentai.jpg', - 'description': r're:^Enjoy The Princess \+ The Frog Hentai', - 'duration': 207.0, - 'age_limit': 18, - }, - }, { - 'url': 'https://xanimu.com/huge-expansion/', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - formats = [] - for format_id in ['videoHigh', 'videoLow']: - format_url = self._search_json( - rf'var\s+{re.escape(format_id)}\s*=', webpage, format_id, - video_id, default=None, contains_pattern=r'[\'"]([^\'"]+)[\'"]') - if format_url: - formats.append({ - 'url': format_url, - 'format_id': format_id, - 'quality': -2 if format_id.endswith('Low') else None, - }) - - return { - 'id': video_id, - 'formats': formats, - 'title': self._search_regex(r'[\'"]headline[\'"]:\s*[\'"]([^"]+)[\'"]', webpage, - 'title', default=None) or self._html_extract_title(webpage), - 'thumbnail': self._html_search_meta('thumbnailUrl', webpage, default=None), - 'description': self._html_search_meta('description', webpage, default=None), - 'duration': int_or_none(self._search_regex(r'duration:\s*[\'"]([^\'"]+?)[\'"]', - webpage, 'duration', fatal=False)), - 'age_limit': 18, - }