From 6e5bee418bc108565108153fd745c8e7a59f16dd Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sat, 12 Jul 2025 13:44:27 +1200 Subject: [PATCH 01/81] [ie/youtube] Ensure context params are consistent for web clients (#13701) Authored by: coletdjnz --- yt_dlp/extractor/youtube/_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/youtube/_base.py b/yt_dlp/extractor/youtube/_base.py index 7d9cbf8ee4..0a9b510c7d 100644 --- a/yt_dlp/extractor/youtube/_base.py +++ b/yt_dlp/extractor/youtube/_base.py @@ -105,6 +105,7 @@ INNERTUBE_CLIENTS = { 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'SUPPORTS_COOKIES': True, **WEB_PO_TOKEN_POLICIES, + 'PLAYER_PARAMS': '8AEB', }, # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats 'web_safari': { From a5d697f62d8be78ffd472acb2f52c8bc32833003 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 12 Jul 2025 14:23:22 -0500 Subject: [PATCH 02/81] [ie/vimeo] Fix extractor (#13692) Closes #13180, Closes #13689 Authored by: bashonly --- README.md | 4 + yt_dlp/extractor/vimeo.py | 277 ++++++++++++++++++++++++++++++++------ 2 files changed, 237 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index c1a9356923..925ebd8c5b 100644 --- a/README.md +++ b/README.md @@ -1901,6 +1901,10 @@ The following extractors use this feature: #### tver * `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated) +#### vimeo +* `client`: Client to extract video data from. One of `android` (default), `ios` or `web`. The `ios` client only works with previously cached OAuth tokens. The `web` client only works when authenticated with credentials or account cookies +* `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability + **Note**: These options may be changed/removed in the future without concern for backward compatibility diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index b268fad56d..fdd42ec94f 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -21,6 +21,7 @@ from ..utils import ( js_to_json, jwt_decode_hs256, merge_dicts, + mimetype2ext, parse_filesize, parse_iso8601, parse_qs, @@ -28,9 +29,11 @@ from ..utils import ( smuggle_url, str_or_none, traverse_obj, + try_call, try_get, unified_timestamp, unsmuggle_url, + url_basename, url_or_none, urlencode_postdata, urlhandle_detect_ext, @@ -45,14 +48,56 @@ class VimeoBaseInfoExtractor(InfoExtractor): _REFERER_HINT = ( 'Cannot download embed-only video without embedding URL. Please call yt-dlp ' 'with the URL of the page that embeds this video.') - _IOS_CLIENT_AUTH = 'MTMxNzViY2Y0NDE0YTQ5YzhjZTc0YmU0NjVjNDQxYzNkYWVjOWRlOTpHKzRvMmgzVUh4UkxjdU5FRW80cDNDbDhDWGR5dVJLNUJZZ055dHBHTTB4V1VzaG41bEx1a2hiN0NWYWNUcldSSW53dzRUdFRYZlJEZmFoTTArOTBUZkJHS3R4V2llYU04Qnl1bERSWWxUdXRidjNqR2J4SHFpVmtFSUcyRktuQw==' - _IOS_CLIENT_HEADERS = { + + _DEFAULT_CLIENT = 'android' + _CLIENT_HEADERS = { 'Accept': 'application/vnd.vimeo.*+json; version=3.4.10', 'Accept-Language': 'en', - 'User-Agent': 'Vimeo/11.10.0 (com.vimeo; build:250424.164813.0; iOS 18.4.1) Alamofire/5.9.0 VimeoNetworking/5.0.0', } - _IOS_OAUTH_CACHE_KEY = 'oauth-token-ios' - _ios_oauth_token = None + _CLIENT_CONFIGS = { + 'android': { + 'CACHE_KEY': 'oauth-token-android', + 'CACHE_ONLY': False, + 'VIEWER_JWT': False, + 'REQUIRES_AUTH': False, + 'AUTH': 'NzRmYTg5YjgxMWExY2JiNzUwZDg1MjhkMTYzZjQ4YWYyOGEyZGJlMTp4OGx2NFd3QnNvY1lkamI2UVZsdjdDYlNwSDUrdm50YzdNNThvWDcwN1JrenJGZC9tR1lReUNlRjRSVklZeWhYZVpRS0tBcU9YYzRoTGY2Z1dlVkJFYkdJc0dMRHpoZWFZbU0reDRqZ1dkZ1diZmdIdGUrNUM5RVBySlM0VG1qcw==', + 'USER_AGENT': 'com.vimeo.android.videoapp (OnePlus, ONEPLUS A6003, OnePlus, Android 14/34 Version 11.8.1) Kotlin VimeoNetworking/3.12.0', + 'VIDEOS_FIELDS': ( + 'uri', 'name', 'description', 'type', 'link', 'player_embed_url', 'duration', 'width', + 'language', 'height', 'embed', 'created_time', 'modified_time', 'release_time', 'content_rating', + 'content_rating_class', 'rating_mod_locked', 'license', 'privacy', 'pictures', 'tags', 'stats', + 'categories', 'uploader', 'metadata', 'user', 'files', 'download', 'app', 'play', 'status', + 'resource_key', 'badge', 'upload', 'transcode', 'is_playable', 'has_audio', + ), + }, + 'ios': { + 'CACHE_KEY': 'oauth-token-ios', + 'CACHE_ONLY': True, + 'VIEWER_JWT': False, + 'REQUIRES_AUTH': False, + 'AUTH': 'MTMxNzViY2Y0NDE0YTQ5YzhjZTc0YmU0NjVjNDQxYzNkYWVjOWRlOTpHKzRvMmgzVUh4UkxjdU5FRW80cDNDbDhDWGR5dVJLNUJZZ055dHBHTTB4V1VzaG41bEx1a2hiN0NWYWNUcldSSW53dzRUdFRYZlJEZmFoTTArOTBUZkJHS3R4V2llYU04Qnl1bERSWWxUdXRidjNqR2J4SHFpVmtFSUcyRktuQw==', + 'USER_AGENT': 'Vimeo/11.10.0 (com.vimeo; build:250424.164813.0; iOS 18.4.1) Alamofire/5.9.0 VimeoNetworking/5.0.0', + 'VIDEOS_FIELDS': ( + 'uri', 'name', 'description', 'type', 'link', 'player_embed_url', 'duration', + 'width', 'language', 'height', 'embed', 'created_time', 'modified_time', 'release_time', + 'content_rating', 'content_rating_class', 'rating_mod_locked', 'license', 'config_url', + 'embed_player_config_url', 'privacy', 'pictures', 'tags', 'stats', 'categories', 'uploader', + 'metadata', 'user', 'files', 'download', 'app', 'play', 'status', 'resource_key', 'badge', + 'upload', 'transcode', 'is_playable', 'has_audio', + ), + }, + 'web': { + 'VIEWER_JWT': True, + 'REQUIRES_AUTH': True, + 'USER_AGENT': None, + 'VIDEOS_FIELDS': ( + 'config_url', 'created_time', 'description', 'license', + 'metadata.connections.comments.total', 'metadata.connections.likes.total', + 'release_time', 'stats.plays', + ), + }, + } + _oauth_tokens = {} _viewer_info = None @staticmethod @@ -105,8 +150,8 @@ class VimeoBaseInfoExtractor(InfoExtractor): raise ExtractorError('Unable to log in') def _real_initialize(self): - if self._LOGIN_REQUIRED and not self._get_cookies('https://vimeo.com').get('vuid'): - self._raise_login_required() + if self._LOGIN_REQUIRED and not self._get_cookies('https://vimeo.com').get('vimeo'): + self.raise_login_required() def _get_video_password(self): password = self.get_param('videopassword') @@ -277,52 +322,88 @@ class VimeoBaseInfoExtractor(InfoExtractor): '_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'), } - def _fetch_oauth_token(self): - if not self._ios_oauth_token: - self._ios_oauth_token = self.cache.load(self._NETRC_MACHINE, self._IOS_OAUTH_CACHE_KEY) + def _fetch_oauth_token(self, client): + client_config = self._CLIENT_CONFIGS[client] + + if client_config['VIEWER_JWT']: + return f'jwt {self._fetch_viewer_info()["jwt"]}' - if not self._ios_oauth_token: - self._ios_oauth_token = self._download_json( + cache_key = client_config['CACHE_KEY'] + + if not self._oauth_tokens.get(cache_key): + self._oauth_tokens[cache_key] = self.cache.load(self._NETRC_MACHINE, cache_key) + + if not self._oauth_tokens.get(cache_key): + if client_config['CACHE_ONLY']: + raise ExtractorError( + f'The {client} client is unable to fetch new OAuth tokens ' + f'and is only intended for use with previously cached tokens', expected=True) + + self._oauth_tokens[cache_key] = self._download_json( 'https://api.vimeo.com/oauth/authorize/client', None, - 'Fetching OAuth token', 'Failed to fetch OAuth token', + f'Fetching {client} OAuth token', f'Failed to fetch {client} OAuth token', headers={ - 'Authorization': f'Basic {self._IOS_CLIENT_AUTH}', - **self._IOS_CLIENT_HEADERS, + 'Authorization': f'Basic {client_config["AUTH"]}', + 'User-Agent': client_config['USER_AGENT'], + **self._CLIENT_HEADERS, }, data=urlencode_postdata({ 'grant_type': 'client_credentials', - 'scope': 'private public create edit delete interact upload purchased stats', + 'scope': 'private public create edit delete interact upload purchased stats video_files', }, quote_via=urllib.parse.quote))['access_token'] - self.cache.store(self._NETRC_MACHINE, self._IOS_OAUTH_CACHE_KEY, self._ios_oauth_token) + self.cache.store(self._NETRC_MACHINE, cache_key, self._oauth_tokens[cache_key]) + + return f'Bearer {self._oauth_tokens[cache_key]}' - return self._ios_oauth_token + def _call_videos_api(self, video_id, unlisted_hash=None, path=None, *, force_client=None, query=None, **kwargs): + client = force_client or self._configuration_arg('client', [self._DEFAULT_CLIENT], ie_key=VimeoIE)[0] + if client not in self._CLIENT_CONFIGS: + raise ExtractorError( + f'Unsupported API client "{client}" requested. ' + f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True) + + client_config = self._CLIENT_CONFIGS[client] + if client_config['REQUIRES_AUTH'] and not self._get_cookies('https://vimeo.com').get('vimeo'): + self.raise_login_required(f'The {client} client requires authentication') - def _call_videos_api(self, video_id, unlisted_hash=None, **kwargs): return self._download_json( - join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'), - video_id, 'Downloading API JSON', headers={ - 'Authorization': f'Bearer {self._fetch_oauth_token()}', - **self._IOS_CLIENT_HEADERS, - }, query={ - 'fields': ','.join(( - 'config_url', 'embed_player_config_url', 'player_embed_url', 'download', 'play', - 'files', 'description', 'license', 'release_time', 'created_time', 'stats.plays', - 'metadata.connections.comments.total', 'metadata.connections.likes.total')), + join_nonempty( + 'https://api.vimeo.com/videos', + join_nonempty(video_id, unlisted_hash, delim=':'), + path, delim='/'), + video_id, f'Downloading {client} API JSON', f'Unable to download {client} API JSON', + headers=filter_dict({ + 'Authorization': self._fetch_oauth_token(client), + 'User-Agent': client_config['USER_AGENT'], + **self._CLIENT_HEADERS, + }), query={ + 'fields': ','.join(client_config['VIDEOS_FIELDS']), + **(query or {}), }, **kwargs) - def _extract_original_format(self, url, video_id, unlisted_hash=None, api_data=None): + def _extract_original_format(self, url, video_id, unlisted_hash=None): # Original/source formats are only available when logged in if not self._get_cookies('https://vimeo.com/').get('vimeo'): - return + return None - query = {'action': 'load_download_config'} - if unlisted_hash: - query['unlisted_hash'] = unlisted_hash - download_data = self._download_json( - url, video_id, 'Loading download config JSON', fatal=False, - query=query, headers={'X-Requested-With': 'XMLHttpRequest'}, - expected_status=(403, 404)) or {} - source_file = download_data.get('source_file') - download_url = try_get(source_file, lambda x: x['download_url']) + policy = self._configuration_arg('original_format_policy', ['auto'], ie_key=VimeoIE)[0] + if policy == 'never': + return None + + try: + download_data = self._download_json( + url, video_id, 'Loading download config JSON', query=filter_dict({ + 'action': 'load_download_config', + 'unlisted_hash': unlisted_hash, + }), headers={ + 'Accept': 'application/json', + 'X-Requested-With': 'XMLHttpRequest', + }) + except ExtractorError as error: + self.write_debug(f'Unable to load download config JSON: {error.cause}') + download_data = None + + source_file = traverse_obj(download_data, ('source_file', {dict})) or {} + download_url = traverse_obj(source_file, ('download_url', {url_or_none})) if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): source_name = source_file.get('public_name', 'Original') if self._is_valid_url(download_url, video_id, f'{source_name} video'): @@ -340,8 +421,27 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'quality': 1, } - original_response = api_data or self._call_videos_api( - video_id, unlisted_hash, fatal=False, expected_status=(403, 404)) + # Most web client API requests are subject to rate-limiting (429) when logged-in. + # Requesting only the 'privacy' field is NOT rate-limited, + # so first we should check if video even has 'download' formats available + try: + privacy_info = self._call_videos_api( + video_id, unlisted_hash, force_client='web', query={'fields': 'privacy'}) + except ExtractorError as error: + self.write_debug(f'Unable to download privacy info: {error.cause}') + return None + + if not traverse_obj(privacy_info, ('privacy', 'download', {bool})): + msg = f'{video_id}: Vimeo says this video is not downloadable' + if policy != 'always': + self.write_debug( + f'{msg}, so yt-dlp is not attempting to extract the original/source format. ' + f'To try anyways, use --extractor-args "vimeo:original_format_policy=always"') + return None + self.write_debug(f'{msg}; attempting to extract original/source format anyways') + + original_response = self._call_videos_api( + video_id, unlisted_hash, force_client='web', query={'fields': 'download'}, fatal=False) for download_data in traverse_obj(original_response, ('download', ..., {dict})): download_url = download_data.get('link') if not download_url or download_data.get('quality') != 'source': @@ -919,6 +1019,92 @@ class VimeoIE(VimeoBaseInfoExtractor): raise ExtractorError('Wrong video password', expected=True) return checked + def _get_subtitles(self, video_id, unlisted_hash): + subs = {} + text_tracks = self._call_videos_api( + video_id, unlisted_hash, path='texttracks', query={ + 'include_transcript': 'true', + 'fields': ','.join(( + 'active', 'display_language', 'id', 'language', 'link', 'name', 'type', 'uri', + )), + }, fatal=False) + for tt in traverse_obj(text_tracks, ('data', lambda _, v: url_or_none(v['link']))): + subs.setdefault(tt.get('language'), []).append({ + 'url': tt['link'], + 'ext': 'vtt', + 'name': tt.get('display_language'), + }) + return subs + + def _parse_api_response(self, video, video_id, unlisted_hash=None): + formats, subtitles = [], {} + seen_urls = set() + duration = traverse_obj(video, ('duration', {int_or_none})) + + for file in traverse_obj(video, ( + (('play', (None, 'progressive')), 'files', 'download'), lambda _, v: url_or_none(v['link']), + )): + format_url = file['link'] + if format_url in seen_urls: + continue + seen_urls.add(format_url) + quality = file.get('quality') + ext = determine_ext(format_url) + if quality == 'hls' or ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles( + format_url, video_id, 'mp4', m3u8_id='hls', fatal=False) + elif quality == 'dash' or ext == 'mpd': + fmts, subs = self._extract_mpd_formats_and_subtitles( + format_url, video_id, mpd_id='dash', fatal=False) + for fmt in fmts: + fmt['format_id'] = join_nonempty( + *fmt['format_id'].split('-', 2)[:2], int_or_none(fmt.get('tbr'))) + else: + fmt = traverse_obj(file, { + 'ext': ('type', {mimetype2ext(default='mp4')}), + 'vcodec': ('codec', {str.lower}), + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + 'filesize': ('size', {int_or_none}), + 'fps': ('fps', {int_or_none}), + }) + fmt.update({ + 'url': format_url, + 'format_id': join_nonempty( + 'http', traverse_obj(file, 'public_name', 'rendition'), quality), + 'tbr': try_call(lambda: fmt['filesize'] * 8 / duration / 1024), + }) + formats.append(fmt) + continue + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + if traverse_obj(video, ('metadata', 'connections', 'texttracks', 'total', {int})): + self._merge_subtitles(self.extract_subtitles(video_id, unlisted_hash), target=subtitles) + + return { + **traverse_obj(video, { + 'title': ('name', {str}), + 'uploader': ('user', 'name', {str}), + 'uploader_id': ('user', 'link', {url_basename}), + 'uploader_url': ('user', 'link', {url_or_none}), + 'release_timestamp': ('live', 'scheduled_start_time', {int_or_none}), + 'thumbnails': ('pictures', 'sizes', lambda _, v: url_or_none(v['link']), { + 'url': 'link', + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + }), + }), + 'id': video_id, + 'duration': duration, + 'formats': formats, + 'subtitles': subtitles, + 'live_status': { + 'streaming': 'is_live', + 'done': 'was_live', + }.get(traverse_obj(video, ('live', 'status', {str}))), + } + def _extract_from_api(self, video_id, unlisted_hash=None): for retry in (False, True): try: @@ -934,10 +1120,13 @@ class VimeoIE(VimeoBaseInfoExtractor): continue raise - info = self._parse_config(self._download_json( - video['config_url'], video_id), video_id) + if config_url := traverse_obj(video, ('config_url', {url_or_none})): + info = self._parse_config(self._download_json(config_url, video_id), video_id) + else: + info = self._parse_api_response(video, video_id, unlisted_hash) + source_format = self._extract_original_format( - f'https://vimeo.com/{video_id}', video_id, unlisted_hash, api_data=video) + f'https://vimeo.com/{video_id}', video_id, unlisted_hash) if source_format: info['formats'].append(source_format) From 3ae61e0f313dd03a09060abc7a212775c3717818 Mon Sep 17 00:00:00 2001 From: Lyuben Ivanov Date: Sat, 12 Jul 2025 22:56:11 +0300 Subject: [PATCH 03/81] [ie/BTVPlus] Add extractor (#13541) Authored by: bubo --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/btvplus.py | 73 +++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 yt_dlp/extractor/btvplus.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 84da570b0a..804536cce7 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -309,6 +309,7 @@ from .brilliantpala import ( BrilliantpalaClassesIE, BrilliantpalaElearnIE, ) +from .btvplus import BTVPlusIE from .bundesliga import BundesligaIE from .bundestag import BundestagIE from .bunnycdn import BunnyCdnIE diff --git a/yt_dlp/extractor/btvplus.py b/yt_dlp/extractor/btvplus.py new file mode 100644 index 0000000000..531ace1471 --- /dev/null +++ b/yt_dlp/extractor/btvplus.py @@ -0,0 +1,73 @@ +from .common import InfoExtractor +from ..utils import ( + bug_reports_message, + clean_html, + get_element_by_class, + js_to_json, + mimetype2ext, + strip_or_none, + url_or_none, + urljoin, +) +from ..utils.traversal import traverse_obj + + +class BTVPlusIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?btvplus\.bg/produkt/(?:predavaniya|seriali|novini)/(?P\d+)' + _TESTS = [{ + 'url': 'https://btvplus.bg/produkt/predavaniya/67271/btv-reporterite/btv-reporterite-12-07-2025-g', + 'info_dict': { + 'ext': 'mp4', + 'id': '67271', + 'title': 'bTV Репортерите - 12.07.2025 г.', + 'thumbnail': 'https://cdn.btv.bg/media/images/940x529/Jul2025/2113606319.jpg', + }, + }, { + 'url': 'https://btvplus.bg/produkt/seriali/66942/sezon-2/plen-sezon-2-epizod-55', + 'info_dict': { + 'ext': 'mp4', + 'id': '66942', + 'title': 'Плен - сезон 2, епизод 55', + 'thumbnail': 'https://cdn.btv.bg/media/images/940x529/Jun2025/2113595104.jpg', + }, + }, { + 'url': 'https://btvplus.bg/produkt/novini/67270/btv-novinite-centralna-emisija-12-07-2025', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + player_url = self._search_regex( + r'var\s+videoUrl\s*=\s*[\'"]([^\'"]+)[\'"]', + webpage, 'player URL') + + player_config = self._download_json( + urljoin('https://btvplus.bg', player_url), video_id)['config'] + + videojs_data = self._search_json( + r'videojs\(["\'][^"\']+["\'],', player_config, 'videojs data', + video_id, transform_source=js_to_json) + formats = [] + subtitles = {} + for src in traverse_obj(videojs_data, ('sources', lambda _, v: url_or_none(v['src']))): + ext = mimetype2ext(src.get('type')) + if ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles( + src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + else: + self.report_warning(f'Unknown format type {ext}{bug_reports_message()}') + + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + 'title': ( + strip_or_none(self._og_search_title(webpage, default=None)) + or clean_html(get_element_by_class('product-title', webpage))), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), + 'description': self._og_search_description(webpage, default=None), + } From 5245231e4a39ecd5595d4337d46d85e150e2430a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 12 Jul 2025 17:12:46 -0500 Subject: [PATCH 04/81] [ie] Add `_search_nextjs_v13_data` helper (#13398) * Fixes FranceTVSiteIE livestream extraction * Fixes GoPlayIE metadata extraction Authored by: bashonly --- test/test_InfoExtractor.py | 26 +++++++++++++++++++ yt_dlp/extractor/common.py | 47 +++++++++++++++++++++++++++++++++++ yt_dlp/extractor/francetv.py | 48 +++++++++++++++++++----------------- yt_dlp/extractor/goplay.py | 44 +++++++++++---------------------- yt_dlp/extractor/ninenow.py | 21 ++++++---------- 5 files changed, 120 insertions(+), 66 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index c9f70431f7..7c3825f779 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1959,6 +1959,32 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ with self.assertWarns(DeprecationWarning): self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {}) + def test_search_nextjs_v13_data(self): + HTML = R''' + + + + + + + + ''' + EXPECTED = [{ + 'foo': 'bar', + }, { + 'meta': { + 'dateCreated': 1730489700, + 'uuid': '40cac41d-8d29-4ef5-aa11-75047b9f0907', + }, + }, { + 'duplicated_field_name': {'x': 1}, + }, { + 'duplicated_field_name': {'y': 2}, + }] + self.assertEqual(self.ie._search_nextjs_v13_data(HTML, None), EXPECTED) + self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), []) + self.assertEqual(self.ie._search_nextjs_v13_data(None, None, fatal=False), []) + def test_search_nuxt_json(self): HTML_TMPL = '' VALID_DATA = ''' diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index b75e806233..a3ff5a1c0b 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1783,6 +1783,53 @@ class InfoExtractor: r']+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data', video_id, end_pattern='', fatal=fatal, default=default, **kw) + def _search_nextjs_v13_data(self, webpage, video_id, fatal=True): + """Parses Next.js app router flight data that was introduced in Next.js v13""" + nextjs_data = [] + if not fatal and not isinstance(webpage, str): + return nextjs_data + + def flatten(flight_data): + if not isinstance(flight_data, list): + return + if len(flight_data) == 4 and flight_data[0] == '$': + _, name, _, data = flight_data + if not isinstance(data, dict): + return + children = data.pop('children', None) + if data and isinstance(name, str) and name.startswith('$'): + # It is useful hydration JSON data + nextjs_data.append(data) + flatten(children) + return + for f in flight_data: + flatten(f) + + flight_text = '' + # The pattern for the surrounding JS/tag should be strict as it's a hardcoded string in the next.js source + # Ref: https://github.com/vercel/next.js/blob/5a4a08fdc/packages/next/src/server/app-render/use-flight-response.tsx#L189 + for flight_segment in re.findall(r']*>self\.__next_f\.push\((\[.+?\])\)', webpage): + segment = self._parse_json(flight_segment, video_id, fatal=fatal, errnote=None if fatal else False) + # Some earlier versions of next.js "optimized" away this array structure; this is unsupported + # Ref: https://github.com/vercel/next.js/commit/0123a9d5c9a9a77a86f135b7ae30b46ca986d761 + if not isinstance(segment, list) or len(segment) != 2: + self.write_debug( + f'{video_id}: Unsupported next.js flight data structure detected', only_once=True) + continue + # Only use the relevant payload type (1 == data) + # Ref: https://github.com/vercel/next.js/blob/5a4a08fdc/packages/next/src/server/app-render/use-flight-response.tsx#L11-L14 + payload_type, chunk = segment + if payload_type == 1: + flight_text += chunk + + for f in flight_text.splitlines(): + prefix, _, body = f.partition(':') + if body.startswith('[') and body.endswith(']') and re.fullmatch(r'[0-9a-f]{1,3}', prefix.lstrip()): + # The body isn't necessarily valid JSON, so this should always be non-fatal + flatten(self._parse_json(body, video_id, fatal=False, errnote=False)) + + return nextjs_data + def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)): """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function""" rectx = re.escape(context_name) diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index 5c9f8e36dd..edf6708a03 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -1,4 +1,3 @@ -import json import re import urllib.parse @@ -19,7 +18,11 @@ from ..utils import ( unsmuggle_url, url_or_none, ) -from ..utils.traversal import find_element, traverse_obj +from ..utils.traversal import ( + find_element, + get_first, + traverse_obj, +) class FranceTVBaseInfoExtractor(InfoExtractor): @@ -258,7 +261,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): _TESTS = [{ 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html', 'info_dict': { - 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', # old: c5bda21d-2c6f-4470-8849-3d8327adb2ba' + 'id': 'b2cf9fd8-e971-4757-8651-848f2772df61', # old: ec217ecc-0733-48cf-ac06-af1347b849d1 'ext': 'mp4', 'title': '13h15, le dimanche... - Les mystères de Jésus', 'timestamp': 1502623500, @@ -269,7 +272,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): 'params': { 'skip_download': True, }, - 'add_ie': [FranceTVIE.ie_key()], + 'skip': 'Unfortunately, this video is no longer available', }, { # geo-restricted 'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html', @@ -287,7 +290,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 1441, }, - 'skip': 'No longer available', + 'skip': 'Unfortunately, this video is no longer available', }, { # geo-restricted livestream (workflow == 'token-akamai') 'url': 'https://www.france.tv/france-4/direct.html', @@ -308,6 +311,19 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): 'live_status': 'is_live', }, 'params': {'skip_download': 'livestream'}, + }, { + # Not geo-restricted + 'url': 'https://www.france.tv/france-2/la-maison-des-maternelles/5574051-nous-sommes-amis-et-nous-avons-fait-un-enfant-ensemble.html', + 'info_dict': { + 'id': 'b448bfe4-9fe7-11ee-97d8-2ba3426fa3df', + 'ext': 'mp4', + 'title': 'Nous sommes amis et nous avons fait un enfant ensemble - Émission du jeudi 21 décembre 2023', + 'duration': 1065, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'timestamp': 1703147921, + 'upload_date': '20231221', + }, + 'params': {'skip_download': 'm3u8'}, }, { # france3 'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html', @@ -342,30 +358,16 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): 'only_matching': True, }] - # XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.goplay - def _find_json(self, s): - return self._search_json( - r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None) - def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) + nextjs_data = self._search_nextjs_v13_data(webpage, display_id) - nextjs_data = traverse_obj( - re.findall(r']*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*', webpage), - (..., {json.loads}, ..., {self._find_json}, ..., 'children', ..., ..., 'children', ..., ..., 'children')) - - if traverse_obj(nextjs_data, (..., ..., 'children', ..., 'isLive', {bool}, any)): + if get_first(nextjs_data, ('isLive', {bool})): # For livestreams we need the id of the stream instead of the currently airing episode id - video_id = traverse_obj(nextjs_data, ( - ..., ..., 'children', ..., 'children', ..., 'children', ..., 'children', ..., ..., - 'children', ..., ..., 'children', ..., ..., 'children', (..., (..., ...)), - 'options', 'id', {str}, any)) + video_id = get_first(nextjs_data, ('options', 'id', {str})) else: - video_id = traverse_obj(nextjs_data, ( - ..., ..., ..., 'children', - lambda _, v: v['video']['url'] == urllib.parse.urlparse(url).path, - 'video', ('playerReplayId', 'siId'), {str}, any)) + video_id = get_first(nextjs_data, ('video', ('playerReplayId', 'siId'), {str})) if not video_id: raise ExtractorError('Unable to extract video ID') diff --git a/yt_dlp/extractor/goplay.py b/yt_dlp/extractor/goplay.py index c654c757c6..2e959cead2 100644 --- a/yt_dlp/extractor/goplay.py +++ b/yt_dlp/extractor/goplay.py @@ -5,16 +5,11 @@ import hashlib import hmac import json import os -import re import urllib.parse from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - remove_end, - traverse_obj, -) +from ..utils import ExtractorError, int_or_none +from ..utils.traversal import get_first, traverse_obj class GoPlayIE(InfoExtractor): @@ -27,10 +22,10 @@ class GoPlayIE(InfoExtractor): 'info_dict': { 'id': '2baa4560-87a0-421b-bffc-359914e3c387', 'ext': 'mp4', - 'title': 'S22 - Aflevering 1', + 'title': 'De Slimste Mens ter Wereld - S22 - Aflevering 1', 'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}', 'series': 'De Slimste Mens ter Wereld', - 'episode': 'Episode 1', + 'episode': 'Wordt aangekondigd', 'season_number': 22, 'episode_number': 1, 'season': 'Season 22', @@ -52,7 +47,7 @@ class GoPlayIE(InfoExtractor): 'info_dict': { 'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee', 'ext': 'mp4', - 'title': 'S11 - Aflevering 1', + 'title': 'De Mol - S11 - Aflevering 1', 'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}', 'episode': 'Episode 1', 'series': 'De Mol', @@ -75,21 +70,13 @@ class GoPlayIE(InfoExtractor): if not self._id_token: raise self.raise_login_required(method='password') - # XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.francetv - def _find_json(self, s): - return self._search_json( - r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None) - def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - nextjs_data = traverse_obj( - re.findall(r']*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*', webpage), - (..., {json.loads}, ..., {self._find_json}, ...)) - meta = traverse_obj(nextjs_data, ( - ..., ..., 'children', ..., ..., 'children', - lambda _, v: v['video']['path'] == urllib.parse.urlparse(url).path, 'video', any)) + nextjs_data = self._search_nextjs_v13_data(webpage, display_id) + meta = get_first(nextjs_data, ( + lambda k, v: k in ('video', 'meta') and v['path'] == urllib.parse.urlparse(url).path)) video_id = meta['uuid'] info_dict = traverse_obj(meta, { @@ -98,19 +85,18 @@ class GoPlayIE(InfoExtractor): }) if traverse_obj(meta, ('program', 'subtype')) != 'movie': - for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)): - episode_data = traverse_obj( - season_data, ('videos', lambda _, v: v['videoId'] == video_id, any)) + for season_data in traverse_obj(nextjs_data, (..., 'playlists', ..., {dict})): + episode_data = traverse_obj(season_data, ('videos', lambda _, v: v['videoId'] == video_id, any)) if not episode_data: continue - episode_title = traverse_obj( - episode_data, 'contextualTitle', 'episodeTitle', expected_type=str) + season_number = traverse_obj(season_data, ('season', {int_or_none})) info_dict.update({ - 'title': episode_title or info_dict.get('title'), - 'series': remove_end(info_dict.get('title'), f' - {episode_title}'), - 'season_number': traverse_obj(season_data, ('season', {int_or_none})), + 'episode': traverse_obj(episode_data, ('episodeTitle', {str})), 'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})), + 'season_number': season_number, + 'series': self._search_regex( + fr'^(.+)? - S{season_number} - ', info_dict.get('title'), 'series', default=None), }) break diff --git a/yt_dlp/extractor/ninenow.py b/yt_dlp/extractor/ninenow.py index 7b0cb77a74..2f3a4ed284 100644 --- a/yt_dlp/extractor/ninenow.py +++ b/yt_dlp/extractor/ninenow.py @@ -1,6 +1,3 @@ -import json -import re - from .brightcove import BrightcoveNewIE from .common import InfoExtractor from ..utils import ( @@ -11,7 +8,12 @@ from ..utils import ( str_or_none, url_or_none, ) -from ..utils.traversal import require, traverse_obj, value +from ..utils.traversal import ( + get_first, + require, + traverse_obj, + value, +) class NineNowIE(InfoExtractor): @@ -101,20 +103,11 @@ class NineNowIE(InfoExtractor): }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId={}' - # XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.francetv and yt_dlp.extractor.goplay - def _find_json(self, s): - return self._search_json( - r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None) - def _real_extract(self, url): display_id, video_type = self._match_valid_url(url).group('id', 'type') webpage = self._download_webpage(url, display_id) - common_data = traverse_obj( - re.findall(r']*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*', webpage), - (..., {json.loads}, ..., {self._find_json}, - lambda _, v: v['payload'][video_type]['slug'] == display_id, - 'payload', any, {require('video data')})) + common_data = get_first(self._search_nextjs_v13_data(webpage, display_id), ('payload', {dict})) if traverse_obj(common_data, (video_type, 'video', 'drm', {bool})): self.report_drm(display_id) From b5fea53f2099bed41ba1b17ab0ac87c8dba5a5ec Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 12 Jul 2025 18:12:05 -0500 Subject: [PATCH 05/81] [ie] Rework `_search_nextjs_v13_data` helper (#13711) Fix 5245231e4a39ecd5595d4337d46d85e150e2430a Authored by: bashonly --- test/test_InfoExtractor.py | 31 ++++++++++++++++++------------- yt_dlp/extractor/common.py | 18 ++++++++++++------ 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 7c3825f779..40dd05e136 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1969,21 +1969,26 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ''' - EXPECTED = [{ - 'foo': 'bar', - }, { - 'meta': { - 'dateCreated': 1730489700, - 'uuid': '40cac41d-8d29-4ef5-aa11-75047b9f0907', + EXPECTED = { + '18': { + 'foo': 'bar', }, - }, { - 'duplicated_field_name': {'x': 1}, - }, { - 'duplicated_field_name': {'y': 2}, - }] + '16': { + 'meta': { + 'dateCreated': 1730489700, + 'uuid': '40cac41d-8d29-4ef5-aa11-75047b9f0907', + }, + }, + '19': { + 'duplicated_field_name': {'x': 1}, + }, + '20': { + 'duplicated_field_name': {'y': 2}, + }, + } self.assertEqual(self.ie._search_nextjs_v13_data(HTML, None), EXPECTED) - self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), []) - self.assertEqual(self.ie._search_nextjs_v13_data(None, None, fatal=False), []) + self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), {}) + self.assertEqual(self.ie._search_nextjs_v13_data(None, None, fatal=False), {}) def test_search_nuxt_json(self): HTML_TMPL = '' diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index a3ff5a1c0b..d601e17514 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1785,7 +1785,7 @@ class InfoExtractor: def _search_nextjs_v13_data(self, webpage, video_id, fatal=True): """Parses Next.js app router flight data that was introduced in Next.js v13""" - nextjs_data = [] + nextjs_data = {} if not fatal and not isinstance(webpage, str): return nextjs_data @@ -1797,9 +1797,9 @@ class InfoExtractor: if not isinstance(data, dict): return children = data.pop('children', None) - if data and isinstance(name, str) and name.startswith('$'): + if data and isinstance(name, str) and re.fullmatch(r'\$L[0-9a-f]+', name): # It is useful hydration JSON data - nextjs_data.append(data) + nextjs_data[name[2:]] = data flatten(children) return for f in flight_data: @@ -1823,10 +1823,16 @@ class InfoExtractor: flight_text += chunk for f in flight_text.splitlines(): - prefix, _, body = f.partition(':') - if body.startswith('[') and body.endswith(']') and re.fullmatch(r'[0-9a-f]{1,3}', prefix.lstrip()): - # The body isn't necessarily valid JSON, so this should always be non-fatal + prefix, _, body = f.lstrip().partition(':') + if not re.fullmatch(r'[0-9a-f]+', prefix): + continue + # The body still isn't guaranteed to be valid JSON, so parsing should always be non-fatal + if body.startswith('[') and body.endswith(']'): flatten(self._parse_json(body, video_id, fatal=False, errnote=False)) + elif body.startswith('{') and body.endswith('}'): + data = self._parse_json(body, video_id, fatal=False, errnote=False) + if data is not None: + nextjs_data[prefix] = data return nextjs_data From 0f33950c778331bf4803c76e8b0ba1862df93431 Mon Sep 17 00:00:00 2001 From: ShockedPlot7560 Date: Sun, 13 Jul 2025 01:35:51 +0200 Subject: [PATCH 06/81] [ie/mixlr] Add extractors (#13561) Authored by: ShockedPlot7560, seproDev Co-authored-by: sepro --- yt_dlp/extractor/_extractors.py | 4 + yt_dlp/extractor/mixlr.py | 134 ++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 yt_dlp/extractor/mixlr.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 804536cce7..18a3cac54b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1169,6 +1169,10 @@ from .mixcloud import ( MixcloudPlaylistIE, MixcloudUserIE, ) +from .mixlr import ( + MixlrIE, + MixlrRecoringIE, +) from .mlb import ( MLBIE, MLBTVIE, diff --git a/yt_dlp/extractor/mixlr.py b/yt_dlp/extractor/mixlr.py new file mode 100644 index 0000000000..53f3ffe6f8 --- /dev/null +++ b/yt_dlp/extractor/mixlr.py @@ -0,0 +1,134 @@ +from .common import InfoExtractor +from ..networking import HEADRequest +from ..utils import int_or_none, parse_iso8601, url_or_none, urlhandle_detect_ext +from ..utils.traversal import traverse_obj + + +class MixlrIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?P[\w-]+)\.mixlr\.com/events/(?P\d+)' + _TESTS = [{ + 'url': 'https://suncity-104-9fm.mixlr.com/events/4387115', + 'info_dict': { + 'id': '4387115', + 'ext': 'mp3', + 'title': r're:SUNCITY 104.9FM\'s live audio \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'uploader': 'suncity-104-9fm', + 'like_count': int, + 'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/cd5b34d05fa2cee72d80477724a2f02e.png', + 'timestamp': 1751943773, + 'upload_date': '20250708', + 'release_timestamp': 1751943764, + 'release_date': '20250708', + 'live_status': 'is_live', + }, + }, { + 'url': 'https://brcountdown.mixlr.com/events/4395480', + 'info_dict': { + 'id': '4395480', + 'ext': 'aac', + 'title': r're:Beats Revolution Countdown Episodio 461 \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'description': 'md5:5cacd089723f7add3f266bd588315bb3', + 'uploader': 'brcountdown', + 'like_count': int, + 'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/c48727a59f690b87a55d47d123ba0d6d.jpg', + 'timestamp': 1752354007, + 'upload_date': '20250712', + 'release_timestamp': 1752354000, + 'release_date': '20250712', + 'live_status': 'is_live', + }, + }, { + 'url': 'https://www.brcountdown.mixlr.com/events/4395480', + 'only_matching': True, + }] + + def _real_extract(self, url): + username, event_id = self._match_valid_url(url).group('username', 'id') + + broadcast_info = self._download_json( + f'https://api.mixlr.com/v3/channels/{username}/events/{event_id}', event_id) + + formats = [] + format_url = traverse_obj( + broadcast_info, ('included', 0, 'attributes', 'progressive_stream_url', {url_or_none})) + if format_url: + urlh = self._request_webpage( + HEADRequest(format_url), event_id, fatal=False, note='Checking stream') + if urlh and urlh.status == 200: + ext = urlhandle_detect_ext(urlh) + if ext == 'octet-stream': + self.report_warning( + 'The server did not return a valid file extension for the stream URL. ' + 'Assuming an mp3 stream; postprocessing may fail if this is incorrect') + ext = 'mp3' + formats.append({ + 'url': format_url, + 'ext': ext, + 'vcodec': 'none', + }) + + release_timestamp = traverse_obj( + broadcast_info, ('data', 'attributes', 'starts_at', {str})) + if not formats and release_timestamp: + self.raise_no_formats(f'This event will start at {release_timestamp}', expected=True) + + return { + 'id': event_id, + 'uploader': username, + 'formats': formats, + 'release_timestamp': parse_iso8601(release_timestamp), + **traverse_obj(broadcast_info, ('included', 0, 'attributes', { + 'title': ('title', {str}), + 'timestamp': ('started_at', {parse_iso8601}), + 'concurrent_view_count': ('concurrent_view_count', {int_or_none}), + 'like_count': ('heart_count', {int_or_none}), + 'is_live': ('live', {bool}), + })), + **traverse_obj(broadcast_info, ('data', 'attributes', { + 'title': ('title', {str}), + 'description': ('description', {str}), + 'timestamp': ('started_at', {parse_iso8601}), + 'concurrent_view_count': ('concurrent_view_count', {int_or_none}), + 'like_count': ('heart_count', {int_or_none}), + 'thumbnail': ('artwork_url', {url_or_none}), + 'uploader_id': ('broadcaster_id', {str}), + })), + } + + +class MixlrRecoringIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?P[\w-]+)\.mixlr\.com/recordings/(?P\d+)' + _TESTS = [{ + 'url': 'https://biblewayng.mixlr.com/recordings/2375193', + 'info_dict': { + 'id': '2375193', + 'ext': 'mp3', + 'title': "God's Jewels and Their Resting Place Bro. Adeniji", + 'description': 'Preached February 21, 2024 in the evening', + 'uploader_id': '8659190', + 'duration': 10968, + 'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/ceca120ef707f642abeea6e29cd74238.jpg', + 'timestamp': 1708544542, + 'upload_date': '20240221', + }, + }] + + def _real_extract(self, url): + username, recording_id = self._match_valid_url(url).group('username', 'id') + + recording_info = self._download_json( + f'https://api.mixlr.com/v3/channels/{username}/recordings/{recording_id}', recording_id) + + return { + 'id': recording_id, + **traverse_obj(recording_info, ('data', 'attributes', { + 'ext': ('file_format', {str}), + 'url': ('url', {url_or_none}), + 'title': ('title', {str}), + 'description': ('description', {str}), + 'timestamp': ('created_at', {parse_iso8601}), + 'duration': ('duration', {int_or_none}), + 'thumbnail': ('artwork_url', {url_or_none}), + 'uploader_id': ('user_id', {str}), + })), + } From a6db1d297ab40cc346de24aacbeab93112b2f4e1 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 13 Jul 2025 16:09:39 -0500 Subject: [PATCH 07/81] [ie/vimeo] Handle age-restricted videos (#13719) Closes #13716 Authored by: bashonly --- yt_dlp/extractor/vimeo.py | 50 +++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index fdd42ec94f..7ffe89f227 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -50,6 +50,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'with the URL of the page that embeds this video.') _DEFAULT_CLIENT = 'android' + _DEFAULT_AUTHED_CLIENT = 'web' _CLIENT_HEADERS = { 'Accept': 'application/vnd.vimeo.*+json; version=3.4.10', 'Accept-Language': 'en', @@ -125,7 +126,14 @@ class VimeoBaseInfoExtractor(InfoExtractor): return self._viewer_info + @property + def _is_logged_in(self): + return 'vimeo' in self._get_cookies('https://vimeo.com') + def _perform_login(self, username, password): + if self._is_logged_in: + return + viewer = self._fetch_viewer_info() data = { 'action': 'login', @@ -150,7 +158,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): raise ExtractorError('Unable to log in') def _real_initialize(self): - if self._LOGIN_REQUIRED and not self._get_cookies('https://vimeo.com').get('vimeo'): + if self._LOGIN_REQUIRED and not self._is_logged_in: self.raise_login_required() def _get_video_password(self): @@ -354,15 +362,22 @@ class VimeoBaseInfoExtractor(InfoExtractor): return f'Bearer {self._oauth_tokens[cache_key]}' - def _call_videos_api(self, video_id, unlisted_hash=None, path=None, *, force_client=None, query=None, **kwargs): - client = force_client or self._configuration_arg('client', [self._DEFAULT_CLIENT], ie_key=VimeoIE)[0] + def _get_requested_client(self): + default_client = self._DEFAULT_AUTHED_CLIENT if self._is_logged_in else self._DEFAULT_CLIENT + + client = self._configuration_arg('client', [default_client], ie_key=VimeoIE)[0] if client not in self._CLIENT_CONFIGS: raise ExtractorError( f'Unsupported API client "{client}" requested. ' f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True) + return client + + def _call_videos_api(self, video_id, unlisted_hash=None, path=None, *, force_client=None, query=None, **kwargs): + client = force_client or self._get_requested_client() + client_config = self._CLIENT_CONFIGS[client] - if client_config['REQUIRES_AUTH'] and not self._get_cookies('https://vimeo.com').get('vimeo'): + if client_config['REQUIRES_AUTH'] and not self._is_logged_in: self.raise_login_required(f'The {client} client requires authentication') return self._download_json( @@ -382,7 +397,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): def _extract_original_format(self, url, video_id, unlisted_hash=None): # Original/source formats are only available when logged in - if not self._get_cookies('https://vimeo.com/').get('vimeo'): + if not self._is_logged_in: return None policy = self._configuration_arg('original_format_policy', ['auto'], ie_key=VimeoIE)[0] @@ -1111,14 +1126,25 @@ class VimeoIE(VimeoBaseInfoExtractor): video = self._call_videos_api(video_id, unlisted_hash) break except ExtractorError as e: - if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400 - and 'password' in traverse_obj( - self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False), - ({json.loads}, 'invalid_parameters', ..., 'field'), - )): + if not isinstance(e.cause, HTTPError): + raise + response = traverse_obj( + self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False), + ({json.loads}, {dict})) or {} + if ( + not retry and e.cause.status == 400 + and 'password' in traverse_obj(response, ('invalid_parameters', ..., 'field')) + ): self._verify_video_password(video_id) - continue - raise + elif e.cause.status == 404 and response.get('error_code') == 5460: + self.raise_login_required(join_nonempty( + traverse_obj(response, ('error', {str.strip})), + 'Authentication may be needed due to your location.', + 'If your IP address is located in Europe you could try using a VPN/proxy,', + f'or else u{self._login_hint()[1:]}', + delim=' '), method=None) + else: + raise if config_url := traverse_obj(video, ('config_url', {url_or_none})): info = self._parse_config(self._download_json(config_url, video_id), video_id) From 630f3389c33f0f7f6ec97e8917d20aeb4e4078da Mon Sep 17 00:00:00 2001 From: Frank Cai <70647872+averageFOSSenjoyer@users.noreply.github.com> Date: Sun, 13 Jul 2025 16:16:01 -0500 Subject: [PATCH 08/81] [ie/UnitedNationsWebTv] Add extractor (#13538) Closes #2675 Authored by: averageFOSSenjoyer --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/unitednations.py | 32 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 yt_dlp/extractor/unitednations.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 18a3cac54b..e99edfd40b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2290,6 +2290,7 @@ from .uliza import ( ) from .umg import UMGDeIE from .unistra import UnistraIE +from .unitednations import UnitedNationsWebTvIE from .unity import UnityIE from .unsupported import ( KnownDRMIE, diff --git a/yt_dlp/extractor/unitednations.py b/yt_dlp/extractor/unitednations.py new file mode 100644 index 0000000000..f9283fd6c1 --- /dev/null +++ b/yt_dlp/extractor/unitednations.py @@ -0,0 +1,32 @@ +from .common import InfoExtractor +from .kaltura import KalturaIE + + +class UnitedNationsWebTvIE(InfoExtractor): + _VALID_URL = r'https?://webtv\.un\.org/(?:ar|zh|en|fr|ru|es)/asset/\w+/(?P\w+)' + _TESTS = [{ + 'url': 'https://webtv.un.org/en/asset/k1o/k1o7stmi6p', + 'md5': 'b2f8b3030063298ae841b4b7ddc01477', + 'info_dict': { + 'id': '1_o7stmi6p', + 'ext': 'mp4', + 'title': 'António Guterres (Secretary-General) on Israel and Iran - Security Council, 9939th meeting', + 'thumbnail': 'http://cfvod.kaltura.com/p/2503451/sp/250345100/thumbnail/entry_id/1_o7stmi6p/version/100021', + 'uploader_id': 'evgeniia.alisova@un.org', + 'upload_date': '20250620', + 'timestamp': 1750430976, + 'duration': 234, + 'view_count': int, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + partner_id = self._html_search_regex( + r'partnerId:\s*(\d+)', webpage, 'partner_id') + entry_id = self._html_search_regex( + r'const\s+kentryID\s*=\s*["\'](\w+)["\']', webpage, 'kentry_id') + + return self.url_result(f'kaltura:{partner_id}:{entry_id}', KalturaIE) From b4b4486effdcb96bb6b8148171a49ff579b69a4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Povilas=20Balzaravi=C4=8Dius?= Date: Mon, 14 Jul 2025 00:24:37 +0300 Subject: [PATCH 09/81] [ie/LRTRadio] Fix extractor (#13717) Authored by: Pawka --- yt_dlp/extractor/lrt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/lrt.py b/yt_dlp/extractor/lrt.py index caff9125e0..34c9ece2d1 100644 --- a/yt_dlp/extractor/lrt.py +++ b/yt_dlp/extractor/lrt.py @@ -134,7 +134,7 @@ class LRTRadioIE(LRTBaseIE): def _real_extract(self, url): video_id, path = self._match_valid_url(url).group('id', 'path') media = self._download_json( - 'https://www.lrt.lt/radioteka/api/media', video_id, + 'https://www.lrt.lt/rest-api/media', video_id, query={'url': f'/mediateka/irasas/{video_id}/{path}'}) return { From 85c3fa1925a9057ef4ae8af682686d5b3eb8e568 Mon Sep 17 00:00:00 2001 From: barsnick Date: Sun, 13 Jul 2025 23:35:10 +0200 Subject: [PATCH 10/81] [ie/RaiSudtirol] Support alternative domain (#13718) Authored by: barsnick --- yt_dlp/extractor/rai.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index c489dc7312..027f7a7b6f 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -765,7 +765,7 @@ class RaiCulturaIE(RaiNewsIE): # XXX: Do not subclass from concrete IE class RaiSudtirolIE(RaiBaseIE): - _VALID_URL = r'https?://raisudtirol\.rai\.it/.+media=(?P\w+)' + _VALID_URL = r'https?://rai(?:bz|sudtirol)\.rai\.it/.+media=(?P\w+)' _TESTS = [{ # mp4 file 'url': 'https://raisudtirol.rai.it/la/index.php?media=Ptv1619729460', @@ -791,6 +791,9 @@ class RaiSudtirolIE(RaiBaseIE): 'formats': 'count:6', }, 'params': {'skip_download': True}, + }, { + 'url': 'https://raibz.rai.it/de/index.php?media=Ptv1751660400', + 'only_matching': True, }] def _real_extract(self, url): From 6d39c420f7774562a106d90253e2ed5b75036321 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Sun, 13 Jul 2025 23:42:45 +0200 Subject: [PATCH 11/81] [ie/JoqrAg] Remove extractor (#13152) Authored by: doe1080 --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/joqrag.py | 112 -------------------------------- 2 files changed, 113 deletions(-) delete mode 100644 yt_dlp/extractor/joqrag.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e99edfd40b..e173f86883 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -929,7 +929,6 @@ from .jiosaavn import ( JioSaavnSongIE, ) from .joj import JojIE -from .joqrag import JoqrAgIE from .jove import JoveIE from .jstream import JStreamIE from .jtbc import ( diff --git a/yt_dlp/extractor/joqrag.py b/yt_dlp/extractor/joqrag.py deleted file mode 100644 index 7a91d4a235..0000000000 --- a/yt_dlp/extractor/joqrag.py +++ /dev/null @@ -1,112 +0,0 @@ -import datetime as dt -import urllib.parse - -from .common import InfoExtractor -from ..utils import ( - clean_html, - datetime_from_str, - unified_timestamp, - urljoin, -) - - -class JoqrAgIE(InfoExtractor): - IE_DESC = '超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)' - _VALID_URL = [r'https?://www\.uniqueradio\.jp/agplayer5/(?:player|inc-player-hls)\.php', - r'https?://(?:www\.)?joqr\.co\.jp/ag/', - r'https?://(?:www\.)?joqr\.co\.jp/qr/ag(?:daily|regular)program/?(?:$|[#?])'] - _TESTS = [{ - 'url': 'https://www.uniqueradio.jp/agplayer5/player.php', - 'info_dict': { - 'id': 'live', - 'title': str, - 'channel': '超!A&G+', - 'description': str, - 'live_status': 'is_live', - 'release_timestamp': int, - }, - 'params': { - 'skip_download': True, - 'ignore_no_formats_error': True, - }, - }, { - 'url': 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php', - 'only_matching': True, - }, { - 'url': 'https://www.joqr.co.jp/ag/article/103760/', - 'only_matching': True, - }, { - 'url': 'http://www.joqr.co.jp/qr/agdailyprogram/', - 'only_matching': True, - }, { - 'url': 'http://www.joqr.co.jp/qr/agregularprogram/', - 'only_matching': True, - }] - - def _extract_metadata(self, variable, html): - return clean_html(urllib.parse.unquote_plus(self._search_regex( - rf'var\s+{variable}\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', - html, 'metadata', group='value', default=''))) or None - - def _extract_start_timestamp(self, video_id, is_live): - def extract_start_time_from(date_str): - dt_ = datetime_from_str(date_str) + dt.timedelta(hours=9) - date = dt_.strftime('%Y%m%d') - start_time = self._search_regex( - r']+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+–\s*(\d{1,2}:\d{1,2})', - self._download_webpage( - f'https://www.joqr.co.jp/qr/agdailyprogram/?date={date}', video_id, - note=f'Downloading program list of {date}', fatal=False, - errnote=f'Failed to download program list of {date}') or '', - 'start time', default=None) - if start_time: - return unified_timestamp(f'{dt_.strftime("%Y/%m/%d")} {start_time} +09:00') - return None - - start_timestamp = extract_start_time_from('today') - if not start_timestamp: - return None - - if not is_live or start_timestamp < datetime_from_str('now').timestamp(): - return start_timestamp - else: - return extract_start_time_from('yesterday') - - def _real_extract(self, url): - video_id = 'live' - - metadata = self._download_webpage( - 'https://www.uniqueradio.jp/aandg', video_id, - note='Downloading metadata', errnote='Failed to download metadata') - title = self._extract_metadata('Program_name', metadata) - - if not title or title == '放送休止': - formats = [] - live_status = 'is_upcoming' - release_timestamp = self._extract_start_timestamp(video_id, False) - msg = 'This stream is not currently live' - if release_timestamp: - msg += (' and will start at ' - + dt.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S')) - self.raise_no_formats(msg, expected=True) - else: - m3u8_path = self._search_regex( - r']*\bsrc="([^"]+)"', - self._download_webpage( - 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php', video_id, - note='Downloading player data', errnote='Failed to download player data'), - 'm3u8 url') - formats = self._extract_m3u8_formats( - urljoin('https://www.uniqueradio.jp/', m3u8_path), video_id) - live_status = 'is_live' - release_timestamp = self._extract_start_timestamp(video_id, True) - - return { - 'id': video_id, - 'title': title, - 'channel': '超!A&G+', - 'description': self._extract_metadata('Program_text', metadata), - 'formats': formats, - 'live_status': live_status, - 'release_timestamp': release_timestamp, - } From 23e9389f936ec5236a87815b8576e5ce567b2f77 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Sun, 13 Jul 2025 23:43:14 +0200 Subject: [PATCH 12/81] [ie/bandaichannel] Remove extractor (#13152) Closes #8829 Authored by: doe1080 --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/bandaichannel.py | 33 ------------------------------- yt_dlp/extractor/unsupported.py | 4 ++++ 3 files changed, 4 insertions(+), 34 deletions(-) delete mode 100644 yt_dlp/extractor/bandaichannel.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e173f86883..c3073ff47a 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -201,7 +201,6 @@ from .banbye import ( BanByeChannelIE, BanByeIE, ) -from .bandaichannel import BandaiChannelIE from .bandcamp import ( BandcampAlbumIE, BandcampIE, diff --git a/yt_dlp/extractor/bandaichannel.py b/yt_dlp/extractor/bandaichannel.py deleted file mode 100644 index d7fcf44bd9..0000000000 --- a/yt_dlp/extractor/bandaichannel.py +++ /dev/null @@ -1,33 +0,0 @@ -from .brightcove import BrightcoveNewBaseIE -from ..utils import extract_attributes - - -class BandaiChannelIE(BrightcoveNewBaseIE): - IE_NAME = 'bandaichannel' - _VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P\d+/\d+)' - _TESTS = [{ - 'url': 'https://www.b-ch.com/titles/514/001', - 'md5': 'a0f2d787baa5729bed71108257f613a4', - 'info_dict': { - 'id': '6128044564001', - 'ext': 'mp4', - 'title': 'メタルファイターMIKU 第1話', - 'timestamp': 1580354056, - 'uploader_id': '5797077852001', - 'upload_date': '20200130', - 'duration': 1387.733, - }, - 'params': { - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - attrs = extract_attributes(self._search_regex( - r'(]+\bid="bcplayer"[^>]*>)', webpage, 'player')) - bc = self._download_json( - 'https://pbifcd.b-ch.com/v1/playbackinfo/ST/70/' + attrs['data-info'], - video_id, headers={'X-API-KEY': attrs['data-auth'].strip()})['bc'] - return self._parse_brightcove_metadata(bc, bc['id']) diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py index 31393b02a4..1b77198100 100644 --- a/yt_dlp/extractor/unsupported.py +++ b/yt_dlp/extractor/unsupported.py @@ -53,6 +53,7 @@ class KnownDRMIE(UnsupportedInfoExtractor): r'(?:beta\.)?crunchyroll\.com', r'viki\.com', r'deezer\.com', + r'b-ch\.com', ) _TESTS = [{ @@ -168,6 +169,9 @@ class KnownDRMIE(UnsupportedInfoExtractor): }, { 'url': 'http://www.deezer.com/playlist/176747451', 'only_matching': True, + }, { + 'url': 'https://www.b-ch.com/titles/8203/001', + 'only_matching': True, }] def _real_extract(self, url): From 5d693446e882931618c40c99bb593f0b87b30eb9 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Mon, 14 Jul 2025 07:10:59 +0900 Subject: [PATCH 13/81] [ie/limelight] Remove extractors (#13267) Authored by: doe1080 --- yt_dlp/extractor/_extractors.py | 5 - yt_dlp/extractor/generic.py | 24 --- yt_dlp/extractor/limelight.py | 358 -------------------------------- yt_dlp/extractor/tfo.py | 1 + yt_dlp/extractor/tv5unis.py | 2 + 5 files changed, 3 insertions(+), 387 deletions(-) delete mode 100644 yt_dlp/extractor/limelight.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index c3073ff47a..1efc313be2 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1030,11 +1030,6 @@ from .likee import ( LikeeIE, LikeeUserIE, ) -from .limelight import ( - LimelightChannelIE, - LimelightChannelListIE, - LimelightMediaIE, -) from .linkedin import ( LinkedInEventsIE, LinkedInIE, diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 721d04e317..d9a666f991 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1481,30 +1481,6 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['SenateISVP'], }, - { - # Limelight embeds (1 channel embed + 4 media embeds) - 'url': 'http://www.sedona.com/FacilitatorTraining2017', - 'info_dict': { - 'id': 'FacilitatorTraining2017', - 'title': 'Facilitator Training 2017', - }, - 'playlist_mincount': 5, - }, - { - # Limelight embed (LimelightPlayerUtil.embed) - 'url': 'https://tv5.ca/videos?v=xuu8qowr291ri', - 'info_dict': { - 'id': '95d035dc5c8a401588e9c0e6bd1e9c92', - 'ext': 'mp4', - 'title': '07448641', - 'timestamp': 1499890639, - 'upload_date': '20170712', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['LimelightMedia'], - }, { 'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/', 'info_dict': { diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py deleted file mode 100644 index 763a01448c..0000000000 --- a/yt_dlp/extractor/limelight.py +++ /dev/null @@ -1,358 +0,0 @@ -import re - -from .common import InfoExtractor -from ..networking.exceptions import HTTPError -from ..utils import ( - ExtractorError, - determine_ext, - float_or_none, - int_or_none, - smuggle_url, - try_get, - unsmuggle_url, -) - - -class LimelightBaseIE(InfoExtractor): - _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s' - - @classmethod - def _extract_embed_urls(cls, url, webpage): - lm = { - 'Media': 'media', - 'Channel': 'channel', - 'ChannelList': 'channel_list', - } - - def smuggle(url): - return smuggle_url(url, {'source_url': url}) - - entries = [] - for kind, video_id in re.findall( - r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P[a-z0-9]{32})', - webpage): - entries.append(cls.url_result( - smuggle(f'limelight:{lm[kind]}:{video_id}'), - f'Limelight{kind}', video_id)) - for mobj in re.finditer( - # As per [1] class attribute should be exactly equal to - # LimelightEmbeddedPlayerFlash but numerous examples seen - # that don't exactly match it (e.g. [2]). - # 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage - # 2. http://www.sedona.com/FacilitatorTraining2017 - r'''(?sx) - ]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*? - ]+ - name=(["\'])flashVars\2[^>]+ - value=(["\'])(?:(?!\3).)*(?Pmedia|channel(?:List)?)Id=(?P[a-z0-9]{32}) - ''', webpage): - kind, video_id = mobj.group('kind'), mobj.group('id') - entries.append(cls.url_result( - smuggle(f'limelight:{kind}:{video_id}'), - f'Limelight{kind.capitalize()}', video_id)) - # http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page) - for video_id in re.findall( - r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P[a-z0-9]{32})', - webpage): - entries.append(cls.url_result( - smuggle(f'limelight:media:{video_id}'), - LimelightMediaIE.ie_key(), video_id)) - return entries - - def _call_playlist_service(self, item_id, method, fatal=True, referer=None): - headers = {} - if referer: - headers['Referer'] = referer - try: - return self._download_json( - self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method), - item_id, f'Downloading PlaylistService {method} JSON', - fatal=fatal, headers=headers) - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 403: - error = self._parse_json(e.cause.response.read().decode(), item_id)['detail']['contentAccessPermission'] - if error == 'CountryDisabled': - self.raise_geo_restricted() - raise ExtractorError(error, expected=True) - raise - - def _extract(self, item_id, pc_method, mobile_method, referer=None): - pc = self._call_playlist_service(item_id, pc_method, referer=referer) - mobile = self._call_playlist_service( - item_id, mobile_method, fatal=False, referer=referer) - return pc, mobile - - def _extract_info(self, pc, mobile, i, referer): - get_item = lambda x, y: try_get(x, lambda x: x[y][i], dict) or {} - pc_item = get_item(pc, 'playlistItems') - mobile_item = get_item(mobile, 'mediaList') - video_id = pc_item.get('mediaId') or mobile_item['mediaId'] - title = pc_item.get('title') or mobile_item['title'] - - formats = [] - urls = [] - for stream in pc_item.get('streams', []): - stream_url = stream.get('url') - if not stream_url or stream_url in urls: - continue - if not self.get_param('allow_unplayable_formats') and stream.get('drmProtected'): - continue - urls.append(stream_url) - ext = determine_ext(stream_url) - if ext == 'f4m': - formats.extend(self._extract_f4m_formats( - stream_url, video_id, f4m_id='hds', fatal=False)) - else: - fmt = { - 'url': stream_url, - 'abr': float_or_none(stream.get('audioBitRate')), - 'fps': float_or_none(stream.get('videoFrameRate')), - 'ext': ext, - } - width = int_or_none(stream.get('videoWidthInPixels')) - height = int_or_none(stream.get('videoHeightInPixels')) - vbr = float_or_none(stream.get('videoBitRate')) - if width or height or vbr: - fmt.update({ - 'width': width, - 'height': height, - 'vbr': vbr, - }) - else: - fmt['vcodec'] = 'none' - rtmp = re.search(r'^(?Prtmpe?://(?P[^/]+)/(?P.+))/(?Pmp[34]:.+)$', stream_url) - if rtmp: - format_id = 'rtmp' - if stream.get('videoBitRate'): - format_id += '-%d' % int_or_none(stream['videoBitRate']) - http_format_id = format_id.replace('rtmp', 'http') - - CDN_HOSTS = ( - ('delvenetworks.com', 'cpl.delvenetworks.com'), - ('video.llnw.net', 's2.content.video.llnw.net'), - ) - for cdn_host, http_host in CDN_HOSTS: - if cdn_host not in rtmp.group('host').lower(): - continue - http_url = 'http://{}/{}'.format(http_host, rtmp.group('playpath')[4:]) - urls.append(http_url) - if self._is_valid_url(http_url, video_id, http_format_id): - http_fmt = fmt.copy() - http_fmt.update({ - 'url': http_url, - 'format_id': http_format_id, - }) - formats.append(http_fmt) - break - - fmt.update({ - 'url': rtmp.group('url'), - 'play_path': rtmp.group('playpath'), - 'app': rtmp.group('app'), - 'ext': 'flv', - 'format_id': format_id, - }) - formats.append(fmt) - - for mobile_url in mobile_item.get('mobileUrls', []): - media_url = mobile_url.get('mobileUrl') - format_id = mobile_url.get('targetMediaPlatform') - if not media_url or media_url in urls: - continue - if (format_id in ('Widevine', 'SmoothStreaming') - and not self.get_param('allow_unplayable_formats', False)): - continue - urls.append(media_url) - ext = determine_ext(media_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - media_url, video_id, 'mp4', 'm3u8_native', - m3u8_id=format_id, fatal=False)) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - stream_url, video_id, f4m_id=format_id, fatal=False)) - else: - formats.append({ - 'url': media_url, - 'format_id': format_id, - 'quality': -10, - 'ext': ext, - }) - - subtitles = {} - for flag in mobile_item.get('flags'): - if flag == 'ClosedCaptions': - closed_captions = self._call_playlist_service( - video_id, 'getClosedCaptionsDetailsByMediaId', - False, referer) or [] - for cc in closed_captions: - cc_url = cc.get('webvttFileUrl') - if not cc_url: - continue - lang = cc.get('languageCode') or self._search_regex(r'/([a-z]{2})\.vtt', cc_url, 'lang', default='en') - subtitles.setdefault(lang, []).append({ - 'url': cc_url, - }) - break - - get_meta = lambda x: pc_item.get(x) or mobile_item.get(x) - - return { - 'id': video_id, - 'title': title, - 'description': get_meta('description'), - 'formats': formats, - 'duration': float_or_none(get_meta('durationInMilliseconds'), 1000), - 'thumbnail': get_meta('previewImageUrl') or get_meta('thumbnailImageUrl'), - 'subtitles': subtitles, - } - - -class LimelightMediaIE(LimelightBaseIE): - IE_NAME = 'limelight' - _VALID_URL = r'''(?x) - (?: - limelight:media:| - https?:// - (?: - link\.videoplatform\.limelight\.com/media/| - assets\.delvenetworks\.com/player/loader\.swf - ) - \?.*?\bmediaId= - ) - (?P[a-z0-9]{32}) - ''' - _TESTS = [{ - 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86', - 'info_dict': { - 'id': '3ffd040b522b4485b6d84effc750cd86', - 'ext': 'mp4', - 'title': 'HaP and the HB Prince Trailer', - 'description': 'md5:8005b944181778e313d95c1237ddb640', - 'thumbnail': r're:^https?://.*\.jpeg$', - 'duration': 144.23, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - # video with subtitles - 'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335', - 'md5': '2fa3bad9ac321e23860ca23bc2c69e3d', - 'info_dict': { - 'id': 'a3e00274d4564ec4a9b29b9466432335', - 'ext': 'mp4', - 'title': '3Play Media Overview Video', - 'thumbnail': r're:^https?://.*\.jpeg$', - 'duration': 78.101, - # TODO: extract all languages that were accessible via API - # 'subtitles': 'mincount:9', - 'subtitles': 'mincount:1', - }, - }, { - 'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452', - 'only_matching': True, - }] - _PLAYLIST_SERVICE_PATH = 'media' - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - video_id = self._match_id(url) - source_url = smuggled_data.get('source_url') - self._initialize_geo_bypass({ - 'countries': smuggled_data.get('geo_countries'), - }) - - pc, mobile = self._extract( - video_id, 'getPlaylistByMediaId', - 'getMobilePlaylistByMediaId', source_url) - - return self._extract_info(pc, mobile, 0, source_url) - - -class LimelightChannelIE(LimelightBaseIE): - IE_NAME = 'limelight:channel' - _VALID_URL = r'''(?x) - (?: - limelight:channel:| - https?:// - (?: - link\.videoplatform\.limelight\.com/media/| - assets\.delvenetworks\.com/player/loader\.swf - ) - \?.*?\bchannelId= - ) - (?P[a-z0-9]{32}) - ''' - _TESTS = [{ - 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082', - 'info_dict': { - 'id': 'ab6a524c379342f9b23642917020c082', - 'title': 'Javascript Sample Code', - 'description': 'Javascript Sample Code - http://www.delvenetworks.com/sample-code/playerCode-demo.html', - }, - 'playlist_mincount': 3, - }, { - 'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082', - 'only_matching': True, - }] - _PLAYLIST_SERVICE_PATH = 'channel' - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - channel_id = self._match_id(url) - source_url = smuggled_data.get('source_url') - - pc, mobile = self._extract( - channel_id, 'getPlaylistByChannelId', - 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', - source_url) - - entries = [ - self._extract_info(pc, mobile, i, source_url) - for i in range(len(pc['playlistItems']))] - - return self.playlist_result( - entries, channel_id, pc.get('title'), mobile.get('description')) - - -class LimelightChannelListIE(LimelightBaseIE): - IE_NAME = 'limelight:channel_list' - _VALID_URL = r'''(?x) - (?: - limelight:channel_list:| - https?:// - (?: - link\.videoplatform\.limelight\.com/media/| - assets\.delvenetworks\.com/player/loader\.swf - ) - \?.*?\bchannelListId= - ) - (?P[a-z0-9]{32}) - ''' - _TESTS = [{ - 'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b', - 'info_dict': { - 'id': '301b117890c4465c8179ede21fd92e2b', - 'title': 'Website - Hero Player', - }, - 'playlist_mincount': 2, - }, { - 'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b', - 'only_matching': True, - }] - _PLAYLIST_SERVICE_PATH = 'channel_list' - - def _real_extract(self, url): - channel_list_id = self._match_id(url) - - channel_list = self._call_playlist_service( - channel_list_id, 'getMobileChannelListById') - - entries = [ - self.url_result('limelight:channel:{}'.format(channel['id']), 'LimelightChannel') - for channel in channel_list['channelList']] - - return self.playlist_result( - entries, channel_list_id, channel_list['title']) diff --git a/yt_dlp/extractor/tfo.py b/yt_dlp/extractor/tfo.py index 0d1b252175..1884ab2e8e 100644 --- a/yt_dlp/extractor/tfo.py +++ b/yt_dlp/extractor/tfo.py @@ -6,6 +6,7 @@ from ..utils import ExtractorError, clean_html, int_or_none class TFOIE(InfoExtractor): + _WORKING = False _GEO_COUNTRIES = ['CA'] _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P\d+)' _TEST = { diff --git a/yt_dlp/extractor/tv5unis.py b/yt_dlp/extractor/tv5unis.py index 88fd334822..fe7fd0325b 100644 --- a/yt_dlp/extractor/tv5unis.py +++ b/yt_dlp/extractor/tv5unis.py @@ -51,6 +51,7 @@ class TV5UnisBaseIE(InfoExtractor): class TV5UnisVideoIE(TV5UnisBaseIE): + _WORKING = False IE_NAME = 'tv5unis:video' _VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/[^/]+/(?P\d+)' _TEST = { @@ -71,6 +72,7 @@ class TV5UnisVideoIE(TV5UnisBaseIE): class TV5UnisIE(TV5UnisBaseIE): + _WORKING = False IE_NAME = 'tv5unis' _VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/(?P[^/]+)(?:/saisons/(?P\d+)/episodes/(?P\d+))?/?(?:[?#&]|$)' _TESTS = [{ From 07d1d85f6387e4bdb107096f0131c7054f078bb9 Mon Sep 17 00:00:00 2001 From: chauhantirth <92777505+chauhantirth@users.noreply.github.com> Date: Mon, 14 Jul 2025 04:05:26 +0530 Subject: [PATCH 14/81] [ie/hotstar] Fix support for free accounts (#13700) Fixes b5bd057fe86550f3aa67f2fc8790d1c6a251c57b Closes #13600 Authored by: chauhantirth --- yt_dlp/extractor/hotstar.py | 57 ++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 891bcc8731..f10aab27a3 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -12,8 +12,11 @@ from ..utils import ( ExtractorError, OnDemandPagedList, determine_ext, + filter_dict, int_or_none, join_nonempty, + jwt_decode_hs256, + parse_iso8601, str_or_none, url_or_none, ) @@ -21,35 +24,48 @@ from ..utils.traversal import require, traverse_obj class HotStarBaseIE(InfoExtractor): + _TOKEN_NAME = 'userUP' _BASE_URL = 'https://www.hotstar.com' _API_URL = 'https://api.hotstar.com' _API_URL_V2 = 'https://apix.hotstar.com/v2' _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' + _FREE_HEADERS = { + 'user-agent': 'Hotstar;in.startv.hotstar/25.06.30.0.11580 (Android/12)', + 'x-hs-client': 'platform:android;app_id:in.startv.hotstar;app_version:25.06.30.0;os:Android;os_version:12;schema_version:0.0.1523', + 'x-hs-platform': 'android', + } + _SUB_HEADERS = { + 'user-agent': 'Disney+;in.startv.hotstar.dplus.tv/23.08.14.4.2915 (Android/13)', + 'x-hs-client': 'platform:androidtv;app_id:in.startv.hotstar.dplus.tv;app_version:23.08.14.4;os:Android;os_version:13;schema_version:0.0.970', + 'x-hs-platform': 'androidtv', + } + + def _has_active_subscription(self, cookies, server_time): + expiry = traverse_obj(cookies, ( + self._TOKEN_NAME, 'value', {jwt_decode_hs256}, 'sub', {json.loads}, + 'subscriptions', 'in', ..., 'expiry', {parse_iso8601}, all, {max})) or 0 + return expiry > server_time + def _call_api_v1(self, path, *args, **kwargs): return self._download_json( f'{self._API_URL}/o/v1/{path}', *args, **kwargs, headers={'x-country-code': 'IN', 'x-platform-code': 'PCTV'}) - def _call_api_impl(self, path, video_id, query, st=None, cookies=None): - if not cookies or not cookies.get('userUP'): - self.raise_login_required() - + def _call_api_impl(self, path, video_id, query, cookies=None, st=None): st = int_or_none(st) or int(time.time()) exp = st + 6000 auth = f'st={st}~exp={exp}~acl=/*' auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() response = self._download_json( f'{self._API_URL_V2}/{path}', video_id, query=query, - headers={ - 'user-agent': 'Disney+;in.startv.hotstar.dplus.tv/23.08.14.4.2915 (Android/13)', + headers=filter_dict({ + **(self._SUB_HEADERS if self._has_active_subscription(cookies, st) else self._FREE_HEADERS), 'hotstarauth': auth, - 'x-hs-usertoken': cookies['userUP'].value, + 'x-hs-usertoken': traverse_obj(cookies, (self._TOKEN_NAME, 'value')), 'x-hs-device-id': traverse_obj(cookies, ('deviceId', 'value')) or str(uuid.uuid4()), - 'x-hs-client': 'platform:androidtv;app_id:in.startv.hotstar.dplus.tv;app_version:23.08.14.4;os:Android;os_version:13;schema_version:0.0.970', - 'x-hs-platform': 'androidtv', 'content-type': 'application/json', - }) + })) if not traverse_obj(response, ('success', {dict})): raise ExtractorError('API call was unsuccessful') @@ -61,21 +77,22 @@ class HotStarBaseIE(InfoExtractor): 'filters': f'content_type={content_type}', 'client_capabilities': json.dumps({ 'package': ['dash', 'hls'], - 'container': ['fmp4br', 'fmp4'], + 'container': ['fmp4', 'fmp4br', 'ts'], 'ads': ['non_ssai', 'ssai'], - 'audio_channel': ['atmos', 'dolby51', 'stereo'], + 'audio_channel': ['stereo', 'dolby51', 'atmos'], 'encryption': ['plain', 'widevine'], # wv only so we can raise appropriate error - 'video_codec': ['h265', 'h264'], - 'ladder': ['tv', 'full'], - 'resolution': ['4k', 'hd'], - 'true_resolution': ['4k', 'hd'], - 'dynamic_range': ['hdr', 'sdr'], + 'video_codec': ['h264', 'h265'], + 'video_codec_non_secure': ['h264', 'h265', 'vp9'], + 'ladder': ['phone', 'tv', 'full'], + 'resolution': ['hd', '4k'], + 'true_resolution': ['hd', '4k'], + 'dynamic_range': ['sdr', 'hdr'], }, separators=(',', ':')), 'drm_parameters': json.dumps({ 'widevine_security_level': ['SW_SECURE_DECODE', 'SW_SECURE_CRYPTO'], 'hdcp_version': ['HDCP_V2_2', 'HDCP_V2_1', 'HDCP_V2', 'HDCP_V1'], }, separators=(',', ':')), - }, st=st, cookies=cookies) + }, cookies=cookies, st=st) @staticmethod def _parse_metadata_v1(video_data): @@ -274,6 +291,8 @@ class HotStarIE(HotStarBaseIE): video_id, video_type = self._match_valid_url(url).group('id', 'type') video_type = self._TYPE[video_type] cookies = self._get_cookies(url) # Cookies before any request + if not cookies or not cookies.get(self._TOKEN_NAME): + self.raise_login_required() video_data = traverse_obj( self._call_api_v1(f'{video_type}/detail', video_id, fatal=False, query={ @@ -292,7 +311,7 @@ class HotStarIE(HotStarBaseIE): # See https://github.com/yt-dlp/yt-dlp/issues/396 st = self._request_webpage( f'{self._BASE_URL}/in', video_id, 'Fetching server time').get_header('x-origin-date') - watch = self._call_api_v2('pages/watch', video_id, content_type, cookies=cookies, st=st) + watch = self._call_api_v2('pages/watch', video_id, content_type, cookies, st) player_config = traverse_obj(watch, ( 'page', 'spaces', 'player', 'widget_wrappers', lambda _, v: v['template'] == 'PlayerWidget', 'widget', 'data', 'player_config', {dict}, any, {require('player config')})) From 9f54ea38984788811773ca2ceaca73864acf0e8a Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Mon, 9 Jun 2025 18:14:19 +0900 Subject: [PATCH 15/81] [ie/ctv] Remove extractor (#13429) Authored by: doe1080 --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/ctv.py | 49 --------------------------------- yt_dlp/extractor/unsupported.py | 4 +++ 3 files changed, 4 insertions(+), 50 deletions(-) delete mode 100644 yt_dlp/extractor/ctv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 1efc313be2..0d1dc2b045 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -446,7 +446,6 @@ from .cspan import ( CSpanIE, ) from .ctsnews import CtsNewsIE -from .ctv import CTVIE from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .curiositystream import ( diff --git a/yt_dlp/extractor/ctv.py b/yt_dlp/extractor/ctv.py deleted file mode 100644 index a41dab11b1..0000000000 --- a/yt_dlp/extractor/ctv.py +++ /dev/null @@ -1,49 +0,0 @@ -from .common import InfoExtractor - - -class CTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P(?:show|movie)s/[^/]+/[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88', - 'info_dict': { - 'id': '2102249', - 'ext': 'flv', - 'title': 'Wednesday, December 23, 2020', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.', - 'timestamp': 1608732000, - 'upload_date': '20201223', - 'series': 'Your Morning', - 'season': '2020-2021', - 'season_number': 5, - 'episode_number': 88, - 'tags': ['Your Morning'], - 'categories': ['Talk Show'], - 'duration': 7467.126, - }, - }, { - 'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - content = self._download_json( - 'https://www.ctv.ca/space-graphql/graphql', display_id, query={ - 'query': '''{ - resolvedPath(path: "/%s") { - lastSegment { - content { - ... on AxisContent { - axisId - videoPlayerDestCode - } - } - } - } -}''' % display_id, # noqa: UP031 - })['data']['resolvedPath']['lastSegment']['content'] - video_id = content['axisId'] - return self.url_result( - '9c9media:{}:{}'.format(content['videoPlayerDestCode'], video_id), - 'NineCNineMedia', video_id) diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py index 1b77198100..bd90bc533a 100644 --- a/yt_dlp/extractor/unsupported.py +++ b/yt_dlp/extractor/unsupported.py @@ -54,6 +54,7 @@ class KnownDRMIE(UnsupportedInfoExtractor): r'viki\.com', r'deezer\.com', r'b-ch\.com', + r'ctv\.ca', ) _TESTS = [{ @@ -172,6 +173,9 @@ class KnownDRMIE(UnsupportedInfoExtractor): }, { 'url': 'https://www.b-ch.com/titles/8203/001', 'only_matching': True, + }, { + 'url': 'https://www.ctv.ca/shows/masterchef-53506/the-audition-battles-s15e1', + 'only_matching': True, }] def _real_extract(self, url): From 6fb3947c0dc6d0e3eab5077c5bada8402f47a277 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Mon, 9 Jun 2025 20:41:14 +0900 Subject: [PATCH 16/81] [ie/bellmedia] Remove extractor (#13429) Authored by: doe1080 --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/bellmedia.py | 91 --------------------------------- yt_dlp/extractor/unsupported.py | 4 ++ 3 files changed, 4 insertions(+), 92 deletions(-) delete mode 100644 yt_dlp/extractor/bellmedia.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 0d1dc2b045..b4ca2175cf 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -228,7 +228,6 @@ from .beatbump import ( from .beatport import BeatportIE from .beeg import BeegIE from .behindkink import BehindKinkIE -from .bellmedia import BellMediaIE from .berufetv import BerufeTVIE from .bet import BetIE from .bfi import BFIPlayerIE diff --git a/yt_dlp/extractor/bellmedia.py b/yt_dlp/extractor/bellmedia.py deleted file mode 100644 index ac45dd4779..0000000000 --- a/yt_dlp/extractor/bellmedia.py +++ /dev/null @@ -1,91 +0,0 @@ -from .common import InfoExtractor - - -class BellMediaIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:www\.)? - (?P - (?: - ctv| - tsn| - bnn(?:bloomberg)?| - thecomedynetwork| - discovery| - discoveryvelocity| - sciencechannel| - investigationdiscovery| - animalplanet| - bravo| - mtv| - space| - etalk| - marilyn - )\.ca| - (?:much|cp24)\.com - )/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P[0-9]{6,})''' - _TESTS = [{ - 'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070', - 'md5': '3e5b8e38370741d5089da79161646635', - 'info_dict': { - 'id': '1403070', - 'ext': 'flv', - 'title': 'David Cockfield\'s Top Picks', - 'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3', - 'upload_date': '20180525', - 'timestamp': 1527288600, - 'season_id': '73997', - 'season': '2018', - 'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg', - 'tags': [], - 'categories': ['ETFs'], - 'season_number': 8, - 'duration': 272.038, - 'series': 'Market Call Tonight', - }, - }, { - 'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582', - 'only_matching': True, - }, { - 'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549', - 'only_matching': True, - }, { - 'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654', - 'only_matching': True, - }, { - 'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009', - 'only_matching': True, - }, { - 'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016', - 'only_matching': True, - }, { - 'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6', - 'only_matching': True, - }, { - 'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430', - 'only_matching': True, - }, { - 'url': 'http://www.etalk.ca/video?videoid=663455', - 'only_matching': True, - }, { - 'url': 'https://www.cp24.com/video?clipId=1982548', - 'only_matching': True, - }] - _DOMAINS = { - 'thecomedynetwork': 'comedy', - 'discoveryvelocity': 'discvel', - 'sciencechannel': 'discsci', - 'investigationdiscovery': 'invdisc', - 'animalplanet': 'aniplan', - 'etalk': 'ctv', - 'bnnbloomberg': 'bnn', - 'marilyn': 'ctv_marilyn', - } - - def _real_extract(self, url): - domain, video_id = self._match_valid_url(url).groups() - domain = domain.split('.')[0] - return { - '_type': 'url_transparent', - 'id': video_id, - 'url': f'9c9media:{self._DOMAINS.get(domain, domain)}_web:{video_id}', - 'ie_key': 'NineCNineMedia', - } diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py index bd90bc533a..628e406191 100644 --- a/yt_dlp/extractor/unsupported.py +++ b/yt_dlp/extractor/unsupported.py @@ -55,6 +55,7 @@ class KnownDRMIE(UnsupportedInfoExtractor): r'deezer\.com', r'b-ch\.com', r'ctv\.ca', + r'tsn\.ca', ) _TESTS = [{ @@ -176,6 +177,9 @@ class KnownDRMIE(UnsupportedInfoExtractor): }, { 'url': 'https://www.ctv.ca/shows/masterchef-53506/the-audition-battles-s15e1', 'only_matching': True, + }, { + 'url': 'https://www.tsn.ca/video/relaxed-oilers-look-to-put-emotional-game-2-loss-in-the-rearview%7E3148747', + 'only_matching': True, }] def _real_extract(self, url): From d57a0b5aa78d59324b037d37492fe86aa4fbf58a Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Wed, 11 Jun 2025 05:16:17 +0900 Subject: [PATCH 17/81] [ie/noovo] Remove extractor (#13429) Authored by: doe1080 --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/noovo.py | 100 -------------------------------- yt_dlp/extractor/unsupported.py | 4 ++ 3 files changed, 4 insertions(+), 101 deletions(-) delete mode 100644 yt_dlp/extractor/noovo.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index b4ca2175cf..0a00db437e 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1374,7 +1374,6 @@ from .nobelprize import NobelPrizeIE from .noice import NoicePodcastIE from .nonktube import NonkTubeIE from .noodlemagazine import NoodleMagazineIE -from .noovo import NoovoIE from .nosnl import NOSNLArticleIE from .nova import ( NovaEmbedIE, diff --git a/yt_dlp/extractor/noovo.py b/yt_dlp/extractor/noovo.py deleted file mode 100644 index 772d4ed9e0..0000000000 --- a/yt_dlp/extractor/noovo.py +++ /dev/null @@ -1,100 +0,0 @@ -from .brightcove import BrightcoveNewIE -from .common import InfoExtractor -from ..utils import ( - int_or_none, - js_to_json, - smuggle_url, - try_get, -) - - -class NoovoIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?noovo\.ca/videos/(?P[^/]+/[^/?#&]+)' - _TESTS = [{ - # clip - 'url': 'http://noovo.ca/videos/rpm-plus/chrysler-imperial', - 'info_dict': { - 'id': '5386045029001', - 'ext': 'mp4', - 'title': 'Chrysler Imperial', - 'description': 'md5:de3c898d1eb810f3e6243e08c8b4a056', - 'timestamp': 1491399228, - 'upload_date': '20170405', - 'uploader_id': '618566855001', - 'series': 'RPM+', - }, - 'params': { - 'skip_download': True, - }, - }, { - # episode - 'url': 'http://noovo.ca/videos/l-amour-est-dans-le-pre/episode-13-8', - 'info_dict': { - 'id': '5395865725001', - 'title': 'Épisode 13 : Les retrouvailles', - 'description': 'md5:888c3330f0c1b4476c5bc99a1c040473', - 'ext': 'mp4', - 'timestamp': 1492019320, - 'upload_date': '20170412', - 'uploader_id': '618566855001', - 'series': "L'amour est dans le pré", - 'season_number': 5, - 'episode': 'Épisode 13', - 'episode_number': 13, - }, - 'params': { - 'skip_download': True, - }, - }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/618566855001/default_default/index.html?videoId=%s' - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - brightcove_id = self._search_regex( - r'data-video-id=["\'](\d+)', webpage, 'brightcove id') - - data = self._parse_json( - self._search_regex( - r'(?s)dataLayer\.push\(\s*({.+?})\s*\);', webpage, 'data', - default='{}'), - video_id, transform_source=js_to_json, fatal=False) - - title = try_get( - data, lambda x: x['video']['nom'], - str) or self._html_search_meta( - 'dcterms.Title', webpage, 'title', fatal=True) - - description = self._html_search_meta( - ('dcterms.Description', 'description'), webpage, 'description') - - series = try_get( - data, lambda x: x['emission']['nom']) or self._search_regex( - r']+class="banner-card__subtitle h4"[^>]*>([^<]+)', - webpage, 'series', default=None) - - season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {} - season = try_get(season_el, lambda x: x['nom'], str) - season_number = int_or_none(try_get(season_el, lambda x: x['numero'])) - - episode_el = try_get(season_el, lambda x: x['episode'], dict) or {} - episode = try_get(episode_el, lambda x: x['nom'], str) - episode_number = int_or_none(try_get(episode_el, lambda x: x['numero'])) - - return { - '_type': 'url_transparent', - 'ie_key': BrightcoveNewIE.ie_key(), - 'url': smuggle_url( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, - {'geo_countries': ['CA']}), - 'id': brightcove_id, - 'title': title, - 'description': description, - 'series': series, - 'season': season, - 'season_number': season_number, - 'episode': episode, - 'episode_number': episode_number, - } diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py index 628e406191..05ae4dd18a 100644 --- a/yt_dlp/extractor/unsupported.py +++ b/yt_dlp/extractor/unsupported.py @@ -55,6 +55,7 @@ class KnownDRMIE(UnsupportedInfoExtractor): r'deezer\.com', r'b-ch\.com', r'ctv\.ca', + r'noovo\.ca', r'tsn\.ca', ) @@ -177,6 +178,9 @@ class KnownDRMIE(UnsupportedInfoExtractor): }, { 'url': 'https://www.ctv.ca/shows/masterchef-53506/the-audition-battles-s15e1', 'only_matching': True, + }, { + 'url': 'https://www.noovo.ca/emissions/lamour-est-dans-le-pre/prets-pour-lamour-s10e1', + 'only_matching': True, }, { 'url': 'https://www.tsn.ca/video/relaxed-oilers-look-to-put-emotional-game-2-loss-in-the-rearview%7E3148747', 'only_matching': True, From 7e0af2b1f0c3edb688603b022f3a9ca0bfdf75e9 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 14 Jul 2025 12:24:52 -0500 Subject: [PATCH 18/81] [ie/hotstar] Improve error handling (#13727) Authored by: bashonly --- yt_dlp/extractor/hotstar.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index f10aab27a3..b280fb53ab 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -383,10 +383,13 @@ class HotStarIE(HotStarBaseIE): formats.extend(current_formats) subs = self._merge_subtitles(subs, current_subs) - if not formats and geo_restricted: - self.raise_geo_restricted(countries=['IN'], metadata_available=True) - elif not formats and has_drm: - self.report_drm(video_id) + if not formats: + if geo_restricted: + self.raise_geo_restricted(countries=['IN'], metadata_available=True) + elif has_drm: + self.report_drm(video_id) + elif not self._has_active_subscription(cookies, st): + self.raise_no_formats('Your account does not have access to this content', expected=True) self._remove_duplicate_formats(formats) for f in formats: f.setdefault('http_headers', {}).update(headers) From ade876efb31d55d3394185ffc56942fdc8d325cc Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 14 Jul 2025 12:25:45 -0500 Subject: [PATCH 19/81] [ie/francetv] Improve error handling (#13726) Closes #13324 Authored by: bashonly --- yt_dlp/extractor/francetv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index edf6708a03..54c2c53aca 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -124,9 +124,10 @@ class FranceTVIE(InfoExtractor): elif code := traverse_obj(dinfo, ('code', {int})): if code == 2009: self.raise_geo_restricted(countries=self._GEO_COUNTRIES) - elif code in (2015, 2017): + elif code in (2015, 2017, 2019): # 2015: L'accès à cette vidéo est impossible. (DRM-only) # 2017: Cette vidéo n'est pas disponible depuis le site web mobile (b/c DRM) + # 2019: L'accès à cette vidéo est incompatible avec votre configuration. (DRM-only) drm_formats = True continue self.report_warning( From d42a6ff0c4ca8893d722ff4e0c109aecbf4cc7cf Mon Sep 17 00:00:00 2001 From: rdamas Date: Mon, 14 Jul 2025 20:55:52 +0200 Subject: [PATCH 20/81] [ie/archive.org] Fix extractor (#13706) Closes #13704 Authored by: rdamas --- yt_dlp/extractor/archiveorg.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index 2849d9fd5b..572bd6bfe2 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -16,6 +16,7 @@ from ..utils import ( dict_get, extract_attributes, get_element_by_id, + get_element_text_and_html_by_tag, int_or_none, join_nonempty, js_to_json, @@ -72,6 +73,7 @@ class ArchiveOrgIE(InfoExtractor): 'display_id': 'Cops-v2.mp4', 'thumbnail': r're:https://archive\.org/download/.*\.jpg', 'duration': 1091.96, + 'track': 'Cops-v2', }, }, { 'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect', @@ -86,6 +88,7 @@ class ArchiveOrgIE(InfoExtractor): 'thumbnail': r're:https://archive\.org/download/.*\.jpg', 'duration': 59.77, 'display_id': 'Commercial-JFK1960ElectionAdCampaignJingle.mpg', + 'track': 'Commercial-JFK1960ElectionAdCampaignJingle', }, }, { 'url': 'https://archive.org/details/Election_Ads/Commercial-Nixon1960ElectionAdToughonDefense.mpg', @@ -102,6 +105,7 @@ class ArchiveOrgIE(InfoExtractor): 'duration': 59.51, 'license': 'http://creativecommons.org/licenses/publicdomain/', 'thumbnail': r're:https://archive\.org/download/.*\.jpg', + 'track': 'Commercial-Nixon1960ElectionAdToughonDefense', }, }, { 'url': 'https://archive.org/details/gd1977-05-08.shure57.stevenson.29303.flac16', @@ -182,6 +186,7 @@ class ArchiveOrgIE(InfoExtractor): 'duration': 130.46, 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_01_000117.jpg', 'display_id': 'irelandthemakingofarepublicreel1_01.mov', + 'track': 'irelandthemakingofarepublicreel1 01', }, }, { 'md5': '67335ee3b23a0da930841981c1e79b02', @@ -192,6 +197,7 @@ class ArchiveOrgIE(InfoExtractor): 'title': 'irelandthemakingofarepublicreel1_02.mov', 'display_id': 'irelandthemakingofarepublicreel1_02.mov', 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_02_001374.jpg', + 'track': 'irelandthemakingofarepublicreel1 02', }, }, { 'md5': 'e470e86787893603f4a341a16c281eb5', @@ -202,6 +208,7 @@ class ArchiveOrgIE(InfoExtractor): 'title': 'irelandthemakingofarepublicreel2.mov', 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg', 'display_id': 'irelandthemakingofarepublicreel2.mov', + 'track': 'irelandthemakingofarepublicreel2', }, }, ], @@ -229,15 +236,8 @@ class ArchiveOrgIE(InfoExtractor): @staticmethod def _playlist_data(webpage): - element = re.findall(r'''(?xs) - - ''', webpage)[0] - - return json.loads(extract_attributes(element)['value']) + element = get_element_text_and_html_by_tag('play-av', webpage)[1] + return json.loads(extract_attributes(element)['playlist']) def _real_extract(self, url): video_id = urllib.parse.unquote_plus(self._match_id(url)) From 3a84be9d1660ef798ea28f929a20391bef6afda4 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Mon, 14 Jul 2025 22:01:53 +0300 Subject: [PATCH 21/81] [ie/TheHighWire] Add extractor (#13505) Closes #13364 Authored by: swayll --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/thehighwire.py | 43 +++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 yt_dlp/extractor/thehighwire.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 0a00db437e..c9172fef78 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2092,6 +2092,7 @@ from .theguardian import ( TheGuardianPodcastIE, TheGuardianPodcastPlaylistIE, ) +from .thehighwire import TheHighWireIE from .theholetv import TheHoleTvIE from .theintercept import TheInterceptIE from .theplatform import ( diff --git a/yt_dlp/extractor/thehighwire.py b/yt_dlp/extractor/thehighwire.py new file mode 100644 index 0000000000..8b596143f7 --- /dev/null +++ b/yt_dlp/extractor/thehighwire.py @@ -0,0 +1,43 @@ +from .common import InfoExtractor +from ..utils import ( + clean_html, + extract_attributes, + url_or_none, +) +from ..utils.traversal import ( + find_element, + require, + traverse_obj, +) + + +class TheHighWireIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?thehighwire\.com/ark-videos/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://thehighwire.com/ark-videos/the-deposition-of-stanley-plotkin/', + 'info_dict': { + 'id': 'the-deposition-of-stanley-plotkin', + 'ext': 'mp4', + 'title': 'THE DEPOSITION OF STANLEY PLOTKIN', + 'description': 'md5:6d0be4f1181daaa10430fd8b945a5e54', + 'thumbnail': r're:https?://static\.arkengine\.com/video/.+\.jpg', + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + embed_url = traverse_obj(webpage, ( + {find_element(cls='ark-video-embed', html=True)}, + {extract_attributes}, 'src', {url_or_none}, {require('embed URL')})) + embed_page = self._download_webpage(embed_url, display_id) + + return { + 'id': display_id, + **traverse_obj(webpage, { + 'title': ({find_element(cls='section-header')}, {clean_html}), + 'description': ({find_element(cls='episode-description__copy')}, {clean_html}), + }), + **self._parse_html5_media_entries(embed_url, embed_page, display_id, m3u8_id='hls')[0], + } From dcc4cba39e2a79d3efce16afa28dbe245468489f Mon Sep 17 00:00:00 2001 From: flanter21 <139064898+flanter21@users.noreply.github.com> Date: Thu, 17 Jul 2025 02:17:48 +0300 Subject: [PATCH 22/81] [ie/blackboardcollaborate] Support subtitles and authwalled videos (#12473) Authored by: flanter21 --- yt_dlp/extractor/_extractors.py | 5 +- yt_dlp/extractor/blackboardcollaborate.py | 146 +++++++++++++++++++--- 2 files changed, 135 insertions(+), 16 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index c9172fef78..4d67e1caa3 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -273,7 +273,10 @@ from .bitchute import ( BitChuteChannelIE, BitChuteIE, ) -from .blackboardcollaborate import BlackboardCollaborateIE +from .blackboardcollaborate import ( + BlackboardCollaborateIE, + BlackboardCollaborateLaunchIE, +) from .bleacherreport import ( BleacherReportCMSIE, BleacherReportIE, diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index 535890979b..c14ff1f142 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -1,16 +1,27 @@ from .common import InfoExtractor -from ..utils import parse_iso8601 +from ..utils import ( + UnsupportedError, + float_or_none, + int_or_none, + join_nonempty, + jwt_decode_hs256, + mimetype2ext, + parse_iso8601, + parse_qs, + url_or_none, +) +from ..utils.traversal import traverse_obj class BlackboardCollaborateIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// - (?P[a-z-]+)\.bbcollab\.com/ + (?P[a-z]+)(?:-lti)?\.bbcollab\.com/ (?: collab/ui/session/playback/load| recording )/ - (?P[^/]+)''' + (?P[^/?#]+)''' _TESTS = [ { 'url': 'https://us-lti.bbcollab.com/collab/ui/session/playback/load/0a633b6a88824deb8c918f470b22b256', @@ -19,9 +30,55 @@ class BlackboardCollaborateIE(InfoExtractor): 'id': '0a633b6a88824deb8c918f470b22b256', 'title': 'HESI A2 Information Session - Thursday, May 6, 2021 - recording_1', 'ext': 'mp4', - 'duration': 1896000, - 'timestamp': 1620331399, + 'duration': 1896, + 'timestamp': 1620333295, 'upload_date': '20210506', + 'subtitles': { + 'live_chat': 'mincount:1', + }, + }, + }, + { + 'url': 'https://eu.bbcollab.com/collab/ui/session/playback/load/4bde2dee104f40289a10f8e554270600', + 'md5': '108db6a8f83dcb0c2a07793649581865', + 'info_dict': { + 'id': '4bde2dee104f40289a10f8e554270600', + 'title': 'Meeting - Azerbaycanca erize formasi', + 'ext': 'mp4', + 'duration': 880, + 'timestamp': 1671176868, + 'upload_date': '20221216', + }, + }, + { + 'url': 'https://eu.bbcollab.com/recording/f83be390ecff46c0bf7dccb9dddcf5f6', + 'md5': 'e3b0b88ddf7847eae4b4c0e2d40b83a5', + 'info_dict': { + 'id': 'f83be390ecff46c0bf7dccb9dddcf5f6', + 'title': 'Keynote lecture by Laura Carvalho - recording_1', + 'ext': 'mp4', + 'duration': 5506, + 'timestamp': 1662721705, + 'upload_date': '20220909', + 'subtitles': { + 'live_chat': 'mincount:1', + }, + }, + }, + { + 'url': 'https://eu.bbcollab.com/recording/c3e1e7c9e83d4cd9981c93c74888d496', + 'md5': 'fdb2d8c43d66fbc0b0b74ef5e604eb1f', + 'info_dict': { + 'id': 'c3e1e7c9e83d4cd9981c93c74888d496', + 'title': 'International Ally User Group - recording_18', + 'ext': 'mp4', + 'duration': 3479, + 'timestamp': 1721919621, + 'upload_date': '20240725', + 'subtitles': { + 'en': 'mincount:1', + 'live_chat': 'mincount:1', + }, }, }, { @@ -42,22 +99,81 @@ class BlackboardCollaborateIE(InfoExtractor): }, ] + def _call_api(self, region, video_id, path=None, token=None, note=None, fatal=False): + # Ref: https://github.com/blackboard/BBDN-Collab-Postman-REST + return self._download_json( + join_nonempty(f'https://{region}.bbcollab.com/collab/api/csa/recordings', video_id, path, delim='/'), + video_id, note or 'Downloading JSON metadata', fatal=fatal, + headers={'Authorization': f'Bearer {token}'} if token else None) + def _real_extract(self, url): mobj = self._match_valid_url(url) region = mobj.group('region') video_id = mobj.group('id') - info = self._download_json( - f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/data', video_id) - duration = info.get('duration') - title = info['name'] - upload_date = info.get('created') - streams = info['streams'] - formats = [{'format_id': k, 'url': url} for k, url in streams.items()] + token = parse_qs(url).get('authToken', [None])[-1] + + video_info = self._call_api(region, video_id, path='data/secure', token=token, note='Trying auth token') + if video_info: + video_extra = self._call_api(region, video_id, token=token, note='Retrieving extra attributes') + else: + video_info = self._call_api(region, video_id, path='data', note='Trying fallback', fatal=True) + video_extra = {} + + formats = traverse_obj(video_info, ('extStreams', lambda _, v: url_or_none(v['streamUrl']), { + 'url': 'streamUrl', + 'ext': ('contentType', {mimetype2ext}), + 'aspect_ratio': ('aspectRatio', {float_or_none}), + })) + + if filesize := traverse_obj(video_extra, ('storageSize', {int_or_none})): + for fmt in formats: + fmt['filesize'] = filesize + + subtitles = {} + for subs in traverse_obj(video_info, ('subtitles', lambda _, v: url_or_none(v['url']))): + subtitles.setdefault(subs.get('lang') or 'und', []).append({ + 'name': traverse_obj(subs, ('label', {str})), + 'url': subs['url'], + }) + + for live_chat_url in traverse_obj(video_info, ('chats', ..., 'url', {url_or_none})): + subtitles.setdefault('live_chat', []).append({'url': live_chat_url}) return { - 'duration': duration, + **traverse_obj(video_info, { + 'title': ('name', {str}), + 'timestamp': ('created', {parse_iso8601}), + 'duration': ('duration', {int_or_none(scale=1000)}), + }), 'formats': formats, 'id': video_id, - 'timestamp': parse_iso8601(upload_date), - 'title': title, + 'subtitles': subtitles, } + + +class BlackboardCollaborateLaunchIE(InfoExtractor): + _VALID_URL = r'https?://[a-z]+\.bbcollab\.com/launch/(?P[^/?#]+)' + + _TESTS = [ + { + 'url': 'https://au.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNzQwNDE2NDgzLCJpYXQiOjE3NDA0MTYxODMsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI3MzI4YzRjZTNmM2U0ZTcwYmY3MTY3N2RkZTgzMzk2NSIsImNvbnN1bWVySWQiOiJhM2Q3NGM0Y2QyZGU0MGJmODFkMjFlODNlMmEzNzM5MCIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.xuELw4EafEwUMoYcCHidGn4Tw9O1QCbYHzYGJUl0kKk', + 'only_matching': True, + }, + { + 'url': 'https://us.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNjk0NDgxOTc3LCJpYXQiOjE2OTQ0ODE2NzcsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI3YWU0MTFhNTU3NjU0OWFiOTZlYjVmMTM1YmY3MWU5MCIsImNvbnN1bWVySWQiOiJBRUU2MEI4MDI2QzM3ODU2RjMwMzNEN0ZEOTQzMTFFNSIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.yOhRZNaIjXYoMYMpcTzgjZJCnIFaYf2cAzbco8OAxlY', + 'only_matching': True, + }, + { + 'url': 'https://eu.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNzUyNjgyODYwLCJpYXQiOjE3NTI2ODI1NjAsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI4MjQzYjFiODg2Nzk0NTZkYjkwN2NmNDZmZmE1MmFhZiIsImNvbnN1bWVySWQiOiI5ZTY4NzYwZWJiNzM0MzRiYWY3NTQyZjA1YmJkOTMzMCIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.Xj4ymojYLwZ1vKPKZ-KxjpqQvFXoJekjRaG0npngwWs', + 'only_matching': True, + }, + ] + + def _real_extract(self, url): + token = self._match_id(url) + video_id = jwt_decode_hs256(token)['resourceAccessTicket']['resourceId'] + + redirect_url = self._request_webpage(url, video_id).url + if self.suitable(redirect_url): + raise UnsupportedError(redirect_url) + return self.url_result(redirect_url, BlackboardCollaborateIE, video_id) From c1ac543c8166ff031d62e340b3244ca8556e3fb9 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 16 Jul 2025 18:19:58 -0500 Subject: [PATCH 23/81] [ie/soundcloud] Always extract original format extension (#13746) Closes #13743 Authored by: bashonly --- yt_dlp/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 3496a08ef6..404e298978 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -242,7 +242,7 @@ class SoundcloudBaseIE(InfoExtractor): format_urls.add(format_url) formats.append({ 'format_id': 'download', - 'ext': urlhandle_detect_ext(urlh, default='mp3'), + 'ext': urlhandle_detect_ext(urlh), 'filesize': int_or_none(urlh.headers.get('Content-Length')), 'url': format_url, 'quality': 10, From b8abd255e454acbe0023cdb946f9eb461ced7eeb Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 18 Jul 2025 14:43:40 -0500 Subject: [PATCH 24/81] [utils] `mimetype2ext`: Always parse `flac` from `audio/flac` (#13748) Authored by: bashonly --- yt_dlp/utils/_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 20aa341ca3..c930830d99 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -2961,6 +2961,7 @@ def mimetype2ext(mt, default=NO_DEFAULT): 'audio/x-matroska': 'mka', 'audio/x-mpegurl': 'm3u', 'aacp': 'aac', + 'flac': 'flac', 'midi': 'mid', 'ogg': 'ogg', 'wav': 'wav', From 28bf46b7dafe2e241137763bf570a2f91ba8a53a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 18 Jul 2025 14:46:06 -0500 Subject: [PATCH 25/81] [utils] `urlhandle_detect_ext`: Use `x-amz-meta-file-type` headers (#13749) Authored by: bashonly --- yt_dlp/utils/_utils.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index c930830d99..c91a06e9a6 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -3106,21 +3106,15 @@ def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None): def urlhandle_detect_ext(url_handle, default=NO_DEFAULT): getheader = url_handle.headers.get - cd = getheader('Content-Disposition') - if cd: - m = re.match(r'attachment;\s*filename="(?P[^"]+)"', cd) - if m: - e = determine_ext(m.group('filename'), default_ext=None) - if e: - return e - - meta_ext = getheader('x-amz-meta-name') - if meta_ext: - e = meta_ext.rpartition('.')[2] - if e: - return e + if cd := getheader('Content-Disposition'): + if m := re.match(r'attachment;\s*filename="(?P[^"]+)"', cd): + if ext := determine_ext(m.group('filename'), default_ext=None): + return ext - return mimetype2ext(getheader('Content-Type'), default=default) + return ( + determine_ext(getheader('x-amz-meta-name'), default_ext=None) + or getheader('x-amz-meta-file-type') + or mimetype2ext(getheader('Content-Type'), default=default)) def encode_data_uri(data, mime_type): From 5f951ce929b56a822514f1a02cc06af030855ec7 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 18 Jul 2025 15:06:02 -0500 Subject: [PATCH 26/81] [ie/aenetworks] Support new URL formats (#13747) Closes #13745 Authored by: bashonly --- yt_dlp/extractor/aenetworks.py | 70 ++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/aenetworks.py b/yt_dlp/extractor/aenetworks.py index e5c922b41f..a4a5f409ec 100644 --- a/yt_dlp/extractor/aenetworks.py +++ b/yt_dlp/extractor/aenetworks.py @@ -111,11 +111,9 @@ class AENetworksIE(AENetworksBaseIE): IE_NAME = 'aenetworks' IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault' _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P - shows/[^/]+/season-\d+/episode-\d+| - (?: - (?:movie|special)s/[^/]+| - (?:shows/[^/]+/)?videos - )/[^/?#&]+ + shows/[^/?#]+/season-\d+/episode-\d+| + (?Pmovie|special)s/[^/?#]+(?P/[^/?#]+)?| + (?:shows/[^/?#]+/)?videos/[^/?#]+ )''' _TESTS = [{ 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', @@ -128,7 +126,7 @@ class AENetworksIE(AENetworksBaseIE): 'upload_date': '20120529', 'uploader': 'AENE-NEW', 'duration': 2592.0, - 'thumbnail': r're:^https?://.*\.jpe?g$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'chapters': 'count:5', 'tags': 'count:14', 'categories': ['Mountain Men'], @@ -139,10 +137,7 @@ class AENetworksIE(AENetworksBaseIE): 'series': 'Mountain Men', 'age_limit': 0, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'params': {'skip_download': 'm3u8'}, 'add_ie': ['ThePlatform'], 'skip': 'Geo-restricted - This content is not available in your location.', }, { @@ -156,7 +151,7 @@ class AENetworksIE(AENetworksBaseIE): 'upload_date': '20160112', 'uploader': 'AENE-NEW', 'duration': 1277.695, - 'thumbnail': r're:^https?://.*\.jpe?g$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'chapters': 'count:4', 'tags': 'count:23', 'episode': 'Inlawful Entry', @@ -166,10 +161,53 @@ class AENetworksIE(AENetworksBaseIE): 'series': 'Duck Dynasty', 'age_limit': 0, }, - 'params': { - # m3u8 download - 'skip_download': True, + 'params': {'skip_download': 'm3u8'}, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'https://play.mylifetime.com/movies/v-c-andrews-web-of-dreams', + 'info_dict': { + 'id': '1590627395981', + 'ext': 'mp4', + 'title': 'VC Andrews\' Web of Dreams', + 'description': 'md5:2a8ba13ae64271c79eb65c0577d312ce', + 'uploader': 'AENE-NEW', + 'age_limit': 14, + 'duration': 5253.665, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'chapters': 'count:8', + 'tags': ['lifetime', 'mylifetime', 'lifetime channel', "VC Andrews' Web of Dreams"], + 'series': '', + 'season': 'Season 0', + 'season_number': 0, + 'episode': 'VC Andrews\' Web of Dreams', + 'episode_number': 0, + 'timestamp': 1566489703.0, + 'upload_date': '20190822', + }, + 'params': {'skip_download': 'm3u8'}, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story', + 'info_dict': { + 'id': '1488235587551', + 'ext': 'mp4', + 'title': 'Hunting JonBenet\'s Killer: The Untold Story', + 'description': 'md5:209869425ee392d74fe29201821e48b4', + 'uploader': 'AENE-NEW', + 'age_limit': 14, + 'duration': 5003.903, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'chapters': 'count:10', + 'tags': 'count:11', + 'series': '', + 'season': 'Season 0', + 'season_number': 0, + 'episode': 'Hunting JonBenet\'s Killer: The Untold Story', + 'episode_number': 0, + 'timestamp': 1554987697.0, + 'upload_date': '20190411', }, + 'params': {'skip_download': 'm3u8'}, 'add_ie': ['ThePlatform'], }, { 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', @@ -198,7 +236,9 @@ class AENetworksIE(AENetworksBaseIE): }] def _real_extract(self, url): - domain, canonical = self._match_valid_url(url).groups() + domain, canonical, url_type, extra = self._match_valid_url(url).group('domain', 'id', 'type', 'extra') + if url_type in ('movie', 'special') and not extra: + canonical += f'/full-{url_type}' return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url) From 4919051e447c7f8ae9df8ba5c4208b6b5c04915a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 18 Jul 2025 16:55:02 -0500 Subject: [PATCH 27/81] [core] Don't let format testing alter the return code (#13767) Closes #13750 Authored by: bashonly --- yt_dlp/YoutubeDL.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 44a6696c02..3cfcb8ef0f 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2208,6 +2208,9 @@ class YoutubeDL: continue temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None) temp_file.close() + # If FragmentFD fails when testing a fragment, it will wrongly set a non-zero return code. + # Save the actual return code for later. See https://github.com/yt-dlp/yt-dlp/issues/13750 + original_retcode = self._download_retcode try: success, _ = self.dl(temp_file.name, f, test=True) except (DownloadError, OSError, ValueError, *network_exceptions): @@ -2218,6 +2221,8 @@ class YoutubeDL: os.remove(temp_file.name) except OSError: self.report_warning(f'Unable to delete temporary file "{temp_file.name}"') + # Restore the actual return code + self._download_retcode = original_retcode f['__working'] = success if success: f.pop('__needs_testing', None) From 1f27a9f8baccb9105f2476154557540efe09a937 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 18 Jul 2025 16:59:50 -0500 Subject: [PATCH 28/81] [core] Warn when skipping formats (#13090) Authored by: bashonly --- yt_dlp/YoutubeDL.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 3cfcb8ef0f..9c9ee64a8c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2195,7 +2195,7 @@ class YoutubeDL: return op(actual_value, comparison_value) return _filter - def _check_formats(self, formats): + def _check_formats(self, formats, warning=True): for f in formats: working = f.get('__working') if working is not None: @@ -2228,7 +2228,11 @@ class YoutubeDL: f.pop('__needs_testing', None) yield f else: - self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id'])) + msg = f'Unable to download format {f["format_id"]}. Skipping...' + if warning: + self.report_warning(msg) + else: + self.to_screen(f'[info] {msg}') def _select_formats(self, formats, selector): return list(selector({ @@ -2954,7 +2958,7 @@ class YoutubeDL: ) if self.params.get('check_formats') is True: - formats = LazyList(self._check_formats(formats[::-1]), reverse=True) + formats = LazyList(self._check_formats(formats[::-1], warning=False), reverse=True) if not formats or formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them From c8329fc572903eeed7edad1642773b2268b71a62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Schmidt?= <121871105+moonshinerd@users.noreply.github.com> Date: Fri, 18 Jul 2025 19:43:04 -0300 Subject: [PATCH 29/81] [ie/rai] Fix formats extraction (#13572) Closes #13548 Authored by: moonshinerd, seproDev Co-authored-by: sepro --- yt_dlp/extractor/rai.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index 027f7a7b6f..d1a4d4c37f 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -81,7 +81,7 @@ class RaiBaseIE(InfoExtractor): # geo flag is a bit unreliable and not properly set all the time geoprotection = xpath_text(relinker, './geoprotection', default='N') == 'Y' - ext = determine_ext(media_url) + ext = determine_ext(media_url).lower() formats = [] if ext == 'mp3': @@ -108,7 +108,7 @@ class RaiBaseIE(InfoExtractor): 'format_id': join_nonempty('https', bitrate, delim='-'), }) else: - raise ExtractorError('Unrecognized media file found') + raise ExtractorError(f'Unrecognized media extension "{ext}"') if (not formats and geoprotection is True) or '/video_no_available.mp4' in media_url: self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) @@ -503,6 +503,28 @@ class RaiPlaySoundIE(RaiBaseIE): 'upload_date': '20211201', }, 'params': {'skip_download': True}, + }, { + # case-sensitivity test for uppercase extension + 'url': 'https://www.raiplaysound.it/audio/2020/05/Storia--Lunita-dItalia-e-lunificazione-della-Germania-b4c16390-7f3f-4282-b353-d94897dacb7c.html', + 'md5': 'c69ebd69282f0effd7ef67b7e2f6c7d8', + 'info_dict': { + 'id': 'b4c16390-7f3f-4282-b353-d94897dacb7c', + 'ext': 'mp3', + 'title': "Storia | 01 L'unità d'Italia e l'unificazione della Germania", + 'alt_title': 'md5:ed4ed82585c52057b71b43994a59b705', + 'description': 'md5:92818b6f31b2c150567d56b75db2ea7f', + 'uploader': 'rai radio 3', + 'duration': 2439.0, + 'thumbnail': 'https://www.raiplaysound.it/dl/img/2023/09/07/1694084898279_Maturadio-LOGO-2048x1152.jpg', + 'creators': ['rai radio 3'], + 'series': 'Maturadio', + 'season': 'Season 9', + 'season_number': 9, + 'episode': "01. L'unità d'Italia e l'unificazione della Germania", + 'episode_number': 1, + 'timestamp': 1590400740, + 'upload_date': '20200525', + }, }] def _real_extract(self, url): From 09982bc33e2f1f9a1ff66e6738df44f15b36f6a6 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 18 Jul 2025 18:24:52 -0500 Subject: [PATCH 30/81] [ie/dangalplay] Support other login regions (#13768) Authored by: bashonly --- yt_dlp/extractor/dangalplay.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/dangalplay.py b/yt_dlp/extractor/dangalplay.py index f7b243234a..3b0dc1f607 100644 --- a/yt_dlp/extractor/dangalplay.py +++ b/yt_dlp/extractor/dangalplay.py @@ -11,8 +11,14 @@ from ..utils.traversal import traverse_obj class DangalPlayBaseIE(InfoExtractor): _NETRC_MACHINE = 'dangalplay' + _REGION = 'IN' _OTV_USER_ID = None - _LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage' + _LOGIN_HINT = ( + 'Pass credentials as -u "token" -p "USER_ID" ' + '(where USER_ID is the value of "otv_user_id" in your browser local storage). ' + 'Your login region can be optionally suffixed to the username as @REGION ' + '(where REGION is the two-letter "region" code found in your browser local storage), ' + 'e.g.: -u "token@IN" -p "USER_ID"') _API_BASE = 'https://ottapi.dangalplay.com' _AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js _SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above @@ -20,8 +26,12 @@ class DangalPlayBaseIE(InfoExtractor): def _perform_login(self, username, password): if self._OTV_USER_ID: return - if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password): + mobj = re.fullmatch(r'token(?:@(?P[A-Z]{2}))?', username) + if not mobj or not re.fullmatch(r'[\da-f]{32}', password): raise ExtractorError(self._LOGIN_HINT, expected=True) + if region := mobj.group('region'): + self._REGION = region + self.write_debug(f'Setting login region to "{self._REGION}"') self._OTV_USER_ID = password def _real_initialize(self): @@ -52,7 +62,7 @@ class DangalPlayBaseIE(InfoExtractor): f'{self._API_BASE}/{path}', display_id, note, fatal=fatal, headers={'Accept': 'application/json'}, query={ 'auth_token': self._AUTH_TOKEN, - 'region': 'IN', + 'region': self._REGION, **query, }) @@ -106,7 +116,7 @@ class DangalPlayIE(DangalPlayBaseIE): 'catalog_id': catalog_id, 'content_id': content_id, 'category': '', - 'region': 'IN', + 'region': self._REGION, 'auth_token': self._AUTH_TOKEN, 'id': self._OTV_USER_ID, 'md5': hashlib.md5(unhashed.encode()).hexdigest(), @@ -129,11 +139,14 @@ class DangalPlayIE(DangalPlayBaseIE): except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 422: error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {} - if error_info.get('code') == '1016': + error_code = error_info.get('code') + if error_code == '1016': self.raise_login_required( f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None) - elif msg := error_info.get('message'): - raise ExtractorError(msg) + elif error_code == '4028': + self.raise_login_required( + f'Your login region is unspecified or incorrect. {self._LOGIN_HINT}', method=None) + raise ExtractorError(join_nonempty(error_code, error_info.get('message'), delim=': ')) raise m3u8_url = traverse_obj(details, ( From 1a8474c3ca6dbe51bb153b2b8eef7b9a61fa7dc3 Mon Sep 17 00:00:00 2001 From: R0hanW <30849420+R0hanW@users.noreply.github.com> Date: Fri, 18 Jul 2025 19:38:52 -0400 Subject: [PATCH 31/81] [ie/PlayerFm] Add extractor (#13016) Closes #4518 Authored by: R0hanW --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/playerfm.py | 70 +++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 yt_dlp/extractor/playerfm.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 4d67e1caa3..59a61e0604 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1557,6 +1557,7 @@ from .platzi import ( PlatziCourseIE, PlatziIE, ) +from .playerfm import PlayerFmIE from .playplustv import PlayPlusTVIE from .playsuisse import PlaySuisseIE from .playtvak import PlaytvakIE diff --git a/yt_dlp/extractor/playerfm.py b/yt_dlp/extractor/playerfm.py new file mode 100644 index 0000000000..d59d651a32 --- /dev/null +++ b/yt_dlp/extractor/playerfm.py @@ -0,0 +1,70 @@ +from .common import InfoExtractor +from ..utils import clean_html, clean_podcast_url, int_or_none, str_or_none, url_or_none +from ..utils.traversal import traverse_obj + + +class PlayerFmIE(InfoExtractor): + _VALID_URL = r'(?Phttps?://(?:www\.)?player\.fm/(?:series/)?[\w-]+/(?P[\w-]+))' + _TESTS = [{ + 'url': 'https://player.fm/series/chapo-trap-house/movie-mindset-33-casino-feat-felix', + 'info_dict': { + 'ext': 'mp3', + 'id': '478606546', + 'display_id': 'movie-mindset-33-casino-feat-felix', + 'thumbnail': r're:^https://.*\.(jpg|png)', + 'title': 'Movie Mindset 33 - Casino feat. Felix', + 'creators': ['Chapo Trap House'], + 'description': r're:The first episode of this season of Movie Mindset is free .+ we feel about it\.', + 'duration': 6830, + 'timestamp': 1745406000, + 'upload_date': '20250423', + }, + }, { + 'url': 'https://player.fm/series/nbc-nightly-news-with-tom-llamas/thursday-april-17-2025', + 'info_dict': { + 'ext': 'mp3', + 'id': '477635490', + 'display_id': 'thursday-april-17-2025', + 'title': 'Thursday, April 17, 2025', + 'thumbnail': r're:^https://.*\.(jpg|png)', + 'duration': 1143, + 'description': 'md5:4890b8cf9a55a787561cd5d59dfcda82', + 'creators': ['NBC News'], + 'timestamp': 1744941374, + 'upload_date': '20250418', + }, + }, { + 'url': 'https://player.fm/series/soccer-101/ep-109-its-kicking-off-how-have-the-rules-for-kickoff-changed-what-are-the-best-approaches-to-getting-the-game-underway-and-how-could-we-improve-on-the-present-system-ack3NzL3yibvs4pf', + 'info_dict': { + 'ext': 'mp3', + 'id': '481418710', + 'thumbnail': r're:^https://.*\.(jpg|png)', + 'title': r're:#109 It\'s kicking off! How have the rules for kickoff changed, .+ the present system\?', + 'creators': ['TSS'], + 'duration': 1510, + 'display_id': 'md5:b52ecacaefab891b59db69721bfd9b13', + 'description': 'md5:52a39e36d08d8919527454f152ad3c25', + 'timestamp': 1659102055, + 'upload_date': '20220729', + }, + }] + + def _real_extract(self, url): + display_id, url = self._match_valid_url(url).group('id', 'url') + data = self._download_json(f'{url}.json', display_id) + + return { + 'display_id': display_id, + 'vcodec': 'none', + **traverse_obj(data, { + 'id': ('id', {int}, {str_or_none}), + 'url': ('url', {clean_podcast_url}), + 'title': ('title', {str}), + 'description': ('description', {clean_html}), + 'duration': ('duration', {int_or_none}), + 'thumbnail': (('image', ('series', 'image')), 'url', {url_or_none}, any), + 'filesize': ('size', {int_or_none}), + 'timestamp': ('publishedAt', {int_or_none}), + 'creators': ('series', 'author', {str}, filter, all, filter), + }), + } From 87e3dc8c7f78929d2ef4f4a44e6a567e04cd8226 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 20 Jul 2025 14:57:20 -0500 Subject: [PATCH 32/81] [ie/mlbtv] Make formats downloadable with ffmpeg (#13761) Authored by: bashonly --- yt_dlp/extractor/mlb.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/yt_dlp/extractor/mlb.py b/yt_dlp/extractor/mlb.py index 562b93fc78..b2b35a7121 100644 --- a/yt_dlp/extractor/mlb.py +++ b/yt_dlp/extractor/mlb.py @@ -457,12 +457,9 @@ mutation initPlaybackSession( self.report_warning(f'No formats available for {format_id} broadcast; skipping') return [], {} - cdn_headers = {'x-cdn-token': token} fmts, subs = self._extract_m3u8_formats_and_subtitles( - m3u8_url.replace(f'/{token}/', '/'), video_id, 'mp4', - m3u8_id=format_id, fatal=False, headers=cdn_headers) + m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) for fmt in fmts: - fmt['http_headers'] = cdn_headers fmt.setdefault('format_note', join_nonempty(feed, medium, delim=' ')) fmt.setdefault('language', language) if fmt.get('vcodec') == 'none' and fmt['language'] == 'en': From 790c286ce3e0b534ca2d8f6648ced220d888f139 Mon Sep 17 00:00:00 2001 From: Tim Date: Mon, 21 Jul 2025 04:00:44 +0800 Subject: [PATCH 33/81] [ie/10play] Support new site domain (#13611) Closes #13577 Authored by: Georift --- yt_dlp/extractor/tenplay.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py index 825da6516b..dd4ea56580 100644 --- a/yt_dlp/extractor/tenplay.py +++ b/yt_dlp/extractor/tenplay.py @@ -7,11 +7,11 @@ from ..utils import int_or_none, traverse_obj, url_or_none, urljoin class TenPlayIE(InfoExtractor): IE_NAME = '10play' - _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/?#]+/)+(?Ptpv\d{6}[a-z]{5})' + _VALID_URL = r'https?://(?:www\.)?10(?:play)?\.com\.au/(?:[^/?#]+/)+(?Ptpv\d{6}[a-z]{5})' _NETRC_MACHINE = '10play' _TESTS = [{ # Geo-restricted to Australia - 'url': 'https://10play.com.au/australian-survivor/web-extras/season-10-brains-v-brawn-ii/myless-journey/tpv250414jdmtf', + 'url': 'https://10.com.au/australian-survivor/web-extras/season-10-brains-v-brawn-ii/myless-journey/tpv250414jdmtf', 'info_dict': { 'id': '7440980000013868', 'ext': 'mp4', @@ -32,7 +32,7 @@ class TenPlayIE(InfoExtractor): 'params': {'skip_download': 'm3u8'}, }, { # Geo-restricted to Australia - 'url': 'https://10play.com.au/neighbours/episodes/season-42/episode-9107/tpv240902nzqyp', + 'url': 'https://10.com.au/neighbours/episodes/season-42/episode-9107/tpv240902nzqyp', 'info_dict': { 'id': '9000000000091177', 'ext': 'mp4', @@ -55,7 +55,7 @@ class TenPlayIE(InfoExtractor): 'params': {'skip_download': 'm3u8'}, }, { # Geo-restricted to Australia; upgrading the m3u8 quality fails and we need the fallback - 'url': 'https://10play.com.au/tiny-chef-show/episodes/season-1/episode-2/tpv240228pofvt', + 'url': 'https://10.com.au/tiny-chef-show/episodes/season-1/episode-2/tpv240228pofvt', 'info_dict': { 'id': '9000000000084116', 'ext': 'mp4', @@ -77,6 +77,7 @@ class TenPlayIE(InfoExtractor): }, 'params': {'skip_download': 'm3u8'}, 'expected_warnings': ['Failed to download m3u8 information: HTTP Error 502'], + 'skip': 'video unavailable', }, { 'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc', 'only_matching': True, @@ -96,7 +97,7 @@ class TenPlayIE(InfoExtractor): def _real_extract(self, url): content_id = self._match_id(url) data = self._download_json( - 'https://10play.com.au/api/v1/videos/' + content_id, content_id) + 'https://10.com.au/api/v1/videos/' + content_id, content_id) video_data = self._download_json( f'https://vod.ten.com.au/api/videos/bcquery?command=find_videos_by_id&video_id={data["altId"]}', @@ -137,21 +138,24 @@ class TenPlayIE(InfoExtractor): class TenPlaySeasonIE(InfoExtractor): IE_NAME = '10play:season' - _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?P[^/?#]+)/episodes/(?P[^/?#]+)/?(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?10(?:play)?\.com\.au/(?P[^/?#]+)/episodes/(?P[^/?#]+)/?(?:$|[?#])' _TESTS = [{ - 'url': 'https://10play.com.au/masterchef/episodes/season-15', + 'url': 'https://10.com.au/masterchef/episodes/season-15', 'info_dict': { 'title': 'Season 15', 'id': 'MTQ2NjMxOQ==', }, 'playlist_mincount': 50, }, { - 'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2024', + 'url': 'https://10.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2024', 'info_dict': { 'title': 'Season 2024', 'id': 'Mjc0OTIw', }, 'playlist_mincount': 159, + }, { + 'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2024', + 'only_matching': True, }] def _entries(self, load_more_url, display_id=None): @@ -172,7 +176,7 @@ class TenPlaySeasonIE(InfoExtractor): def _real_extract(self, url): show, season = self._match_valid_url(url).group('show', 'season') season_info = self._download_json( - f'https://10play.com.au/api/shows/{show}/episodes/{season}', f'{show}/{season}') + f'https://10.com.au/api/shows/{show}/episodes/{season}', f'{show}/{season}') episodes_carousel = traverse_obj(season_info, ( 'content', 0, 'components', ( From f9dff95cb1c138913011417b3bba020c0a691bba Mon Sep 17 00:00:00 2001 From: WouterGordts Date: Sun, 20 Jul 2025 22:12:40 +0200 Subject: [PATCH 34/81] [ie/bandcamp] Extract tags (#13480) Authored by: WouterGordts --- yt_dlp/extractor/bandcamp.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index 939c2800e6..d07d6e48b2 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( KNOWN_EXTENSIONS, ExtractorError, + clean_html, extract_attributes, float_or_none, int_or_none, @@ -19,7 +20,7 @@ from ..utils import ( url_or_none, urljoin, ) -from ..utils.traversal import find_element, traverse_obj +from ..utils.traversal import find_element, find_elements, traverse_obj class BandcampIE(InfoExtractor): @@ -70,6 +71,9 @@ class BandcampIE(InfoExtractor): 'album': 'FTL: Advanced Edition Soundtrack', 'uploader_url': 'https://benprunty.bandcamp.com', 'uploader_id': 'benprunty', + 'tags': ['soundtrack', 'chiptunes', 'cinematic', 'electronic', 'video game music', 'California'], + 'artists': ['Ben Prunty'], + 'album_artists': ['Ben Prunty'], }, }, { # no free download, mp3 128 @@ -94,6 +98,9 @@ class BandcampIE(InfoExtractor): 'album': 'Call of the Mastodon', 'uploader_url': 'https://relapsealumni.bandcamp.com', 'uploader_id': 'relapsealumni', + 'tags': ['Philadelphia'], + 'artists': ['Mastodon'], + 'album_artists': ['Mastodon'], }, }, { # track from compilation album (artist/album_artist difference) @@ -118,6 +125,9 @@ class BandcampIE(InfoExtractor): 'album': 'DSK F/W 2016-2017 Free Compilation', 'uploader_url': 'https://diskotopia.bandcamp.com', 'uploader_id': 'diskotopia', + 'tags': ['Japan'], + 'artists': ['submerse'], + 'album_artists': ['Diskotopia'], }, }] @@ -252,6 +262,7 @@ class BandcampIE(InfoExtractor): 'album': embed.get('album_title'), 'album_artist': album_artist, 'formats': formats, + 'tags': traverse_obj(webpage, ({find_elements(cls='tag')}, ..., {clean_html})), } From 32809eb2da92c649e540a5b714f6235036026161 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 20 Jul 2025 18:05:43 -0500 Subject: [PATCH 35/81] Allow extractors to designate formats/subtitles for impersonation (#13778) Authored by: bashonly --- yt_dlp/YoutubeDL.py | 37 ++++++++++++++++++++++++++++++++++- yt_dlp/downloader/__init__.py | 2 +- yt_dlp/downloader/http.py | 5 ++++- yt_dlp/extractor/common.py | 30 ++++++++++++---------------- 4 files changed, 54 insertions(+), 20 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9c9ee64a8c..68074a5626 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -52,7 +52,7 @@ from .networking.exceptions import ( SSLError, network_exceptions, ) -from .networking.impersonate import ImpersonateRequestHandler +from .networking.impersonate import ImpersonateRequestHandler, ImpersonateTarget from .plugins import directories as plugin_directories, load_all_plugins from .postprocessor import ( EmbedThumbnailPP, @@ -3231,6 +3231,16 @@ class YoutubeDL: } else: params = self.params + + impersonate = info.pop('impersonate', None) + # Do not override --impersonate with extractor-specified impersonation + if params.get('impersonate') is None: + available_target, requested_targets = self._parse_impersonate_targets(impersonate) + if available_target: + info['impersonate'] = available_target + elif requested_targets: + self.report_warning(self._unavailable_targets_message(requested_targets), only_once=True) + fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params) if not test: for ph in self._progress_hooks: @@ -4183,6 +4193,31 @@ class YoutubeDL: for rh in self._request_director.handlers.values() if isinstance(rh, ImpersonateRequestHandler)) + def _parse_impersonate_targets(self, impersonate): + if impersonate in (True, ''): + impersonate = ImpersonateTarget() + + requested_targets = [ + t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t) + for t in variadic(impersonate) + ] if impersonate else [] + + available_target = next(filter(self._impersonate_target_available, requested_targets), None) + + return available_target, requested_targets + + @staticmethod + def _unavailable_targets_message(requested_targets, note=None, is_error=False): + note = note or 'The extractor specified to use impersonation for this download' + specific_targets = ', '.join(filter(None, map(str, requested_targets))) + message = ( + 'no impersonate target is available' if not specific_targets + else f'none of these impersonate targets are available: {specific_targets}') + return ( + f'{note}, but {message}. {"See" if is_error else "If you encounter errors, then see"}' + f' https://github.com/yt-dlp/yt-dlp#impersonation ' + f'for information on installing the required dependencies') + def urlopen(self, req): """ Start an HTTP download """ if isinstance(req, str): diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index 9c34bd289a..17458b9b94 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -99,7 +99,7 @@ def _get_suitable_downloader(info_dict, protocol, params, default): if external_downloader is None: if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params): return FFmpegFD - elif external_downloader.lower() != 'native': + elif external_downloader.lower() != 'native' and info_dict.get('impersonate') is None: ed = get_external_downloader(external_downloader) if ed.can_download(info_dict, external_downloader): return ed diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 90bfcaf552..073860f6f9 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -27,6 +27,9 @@ class HttpFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] request_data = info_dict.get('request_data', None) + request_extensions = {} + if info_dict.get('impersonate') is not None: + request_extensions['impersonate'] = info_dict['impersonate'] class DownloadContext(dict): __getattr__ = dict.get @@ -109,7 +112,7 @@ class HttpFD(FileDownloader): if try_call(lambda: range_end >= ctx.content_len): range_end = ctx.content_len - 1 - request = Request(url, request_data, headers) + request = Request(url, request_data, headers, extensions=request_extensions) has_range = range_start is not None if has_range: request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}' diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index d601e17514..8a914abf0b 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -38,7 +38,6 @@ from ..networking.exceptions import ( TransportError, network_exceptions, ) -from ..networking.impersonate import ImpersonateTarget from ..utils import ( IDENTITY, JSON_LD_RE, @@ -259,6 +258,11 @@ class InfoExtractor: * key The key (as hex) used to decrypt fragments. If `key` is given, any key URI will be ignored * iv The IV (as hex) used to decrypt fragments + * impersonate Impersonate target(s). Can be any of the following entities: + * an instance of yt_dlp.networking.impersonate.ImpersonateTarget + * a string in the format of CLIENT[:OS] + * a list or a tuple of CLIENT[:OS] strings or ImpersonateTarget instances + * a boolean value; True means any impersonate target is sufficient * downloader_options A dictionary of downloader options (For internal use only) * http_chunk_size Chunk size for HTTP downloads @@ -336,6 +340,7 @@ class InfoExtractor: * "name": Name or description of the subtitles * "http_headers": A dictionary of additional HTTP headers to add to the request. + * "impersonate": Impersonate target(s); same as the "formats" field "ext" will be calculated from URL if missing automatic_captions: Like 'subtitles'; contains automatically generated captions instead of normal subtitles @@ -884,26 +889,17 @@ class InfoExtractor: extensions = {} - if impersonate in (True, ''): - impersonate = ImpersonateTarget() - requested_targets = [ - t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t) - for t in variadic(impersonate) - ] if impersonate else [] - - available_target = next(filter(self._downloader._impersonate_target_available, requested_targets), None) + available_target, requested_targets = self._downloader._parse_impersonate_targets(impersonate) if available_target: extensions['impersonate'] = available_target elif requested_targets: - message = 'The extractor is attempting impersonation, but ' - message += ( - 'no impersonate target is available' if not str(impersonate) - else f'none of these impersonate targets are available: "{", ".join(map(str, requested_targets))}"') - info_msg = ('see https://github.com/yt-dlp/yt-dlp#impersonation ' - 'for information on installing the required dependencies') + msg = 'The extractor is attempting impersonation' if require_impersonation: - raise ExtractorError(f'{message}; {info_msg}', expected=True) - self.report_warning(f'{message}; if you encounter errors, then {info_msg}', only_once=True) + raise ExtractorError( + self._downloader._unavailable_targets_message(requested_targets, note=msg, is_error=True), + expected=True) + self.report_warning( + self._downloader._unavailable_targets_message(requested_targets, note=msg), only_once=True) try: return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query, extensions)) From a4561c7a66c39d88efe7ae51e7fa1986faf093fb Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 20 Jul 2025 18:20:58 -0500 Subject: [PATCH 36/81] [rh:requests] Refactor default headers (#13785) Authored by: bashonly --- yt_dlp/networking/_requests.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py index 555c21ac33..6582038fcb 100644 --- a/yt_dlp/networking/_requests.py +++ b/yt_dlp/networking/_requests.py @@ -313,7 +313,7 @@ class RequestsRH(RequestHandler, InstanceStoreMixin): max_retries=urllib3.util.retry.Retry(False), ) session.adapters.clear() - session.headers = requests.models.CaseInsensitiveDict({'Connection': 'keep-alive'}) + session.headers = requests.models.CaseInsensitiveDict() session.mount('https://', http_adapter) session.mount('http://', http_adapter) session.cookies = cookiejar @@ -322,6 +322,7 @@ class RequestsRH(RequestHandler, InstanceStoreMixin): def _prepare_headers(self, _, headers): add_accept_encoding_header(headers, SUPPORTED_ENCODINGS) + headers.setdefault('Connection', 'keep-alive') def _send(self, request): From 8820101aa3152e5f4811541c645f8b5de231ba8c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 20 Jul 2025 18:22:04 -0500 Subject: [PATCH 37/81] [ie/youtube] Use impersonation for downloading subtitles (#13786) Closes #13770 Authored by: bashonly --- yt_dlp/extractor/youtube/_video.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index fc1f087ace..5968edc60e 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -4056,6 +4056,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': fmt, 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)), 'name': sub_name, + 'impersonate': True, STREAMING_DATA_CLIENT_NAME: client_name, }) From 2ac3eb98373d1c31341c5e918c83872c7ff409c6 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 21 Jul 2025 13:41:00 -0500 Subject: [PATCH 38/81] Fix `ImpersonateTarget` sanitization (#13791) Fix 32809eb2da92c649e540a5b714f6235036026161 Authored by: bashonly --- yt_dlp/YoutubeDL.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 68074a5626..14beb3df98 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3716,6 +3716,8 @@ class YoutubeDL: return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)} elif isinstance(obj, (list, tuple, set, LazyList)): return list(map(filter_fn, obj)) + elif isinstance(obj, ImpersonateTarget): + return str(obj) elif obj is None or isinstance(obj, (str, int, float, bool)): return obj else: From 3e49bc8a1bdb4109b857f2c361c358e86fa63405 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 21 Jul 2025 13:42:21 -0500 Subject: [PATCH 39/81] Make extractor-designated impersonation override `--impersonate` (#13792) Fix 32809eb2da92c649e540a5b714f6235036026161 Authored by: bashonly --- yt_dlp/YoutubeDL.py | 9 --------- yt_dlp/downloader/common.py | 11 +++++++++++ yt_dlp/downloader/http.py | 5 +++-- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 14beb3df98..e42fa73dd6 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3232,15 +3232,6 @@ class YoutubeDL: else: params = self.params - impersonate = info.pop('impersonate', None) - # Do not override --impersonate with extractor-specified impersonation - if params.get('impersonate') is None: - available_target, requested_targets = self._parse_impersonate_targets(impersonate) - if available_target: - info['impersonate'] = available_target - elif requested_targets: - self.report_warning(self._unavailable_targets_message(requested_targets), only_once=True) - fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params) if not test: for ph in self._progress_hooks: diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index bb9303f8a1..7bc70a51a2 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -495,3 +495,14 @@ class FileDownloader: exe = os.path.basename(args[0]) self.write_debug(f'{exe} command line: {shell_quote(args)}') + + def _get_impersonate_target(self, info_dict): + impersonate = info_dict.get('impersonate') + if impersonate is None: + return None + available_target, requested_targets = self.ydl._parse_impersonate_targets(impersonate) + if available_target: + return available_target + elif requested_targets: + self.report_warning(self.ydl._unavailable_targets_message(requested_targets)) + return None diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 073860f6f9..c388deb7ea 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -28,8 +28,9 @@ class HttpFD(FileDownloader): url = info_dict['url'] request_data = info_dict.get('request_data', None) request_extensions = {} - if info_dict.get('impersonate') is not None: - request_extensions['impersonate'] = info_dict['impersonate'] + impersonate_target = self._get_impersonate_target(info_dict) + if impersonate_target is not None: + request_extensions['impersonate'] = impersonate_target class DownloadContext(dict): __getattr__ = dict.get From ef103b2d115bd0e880f9cfd2f7dd705f48e4b40d Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 21 Jul 2025 14:09:52 -0500 Subject: [PATCH 40/81] [ie/hotstar] Fix error handling (#13793) Fix 7e0af2b1f0c3edb688603b022f3a9ca0bfdf75e9 Closes #13790 Authored by: bashonly --- yt_dlp/extractor/hotstar.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index b280fb53ab..2ae527a59e 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -42,6 +42,7 @@ class HotStarBaseIE(InfoExtractor): } def _has_active_subscription(self, cookies, server_time): + server_time = int_or_none(server_time) or int(time.time()) expiry = traverse_obj(cookies, ( self._TOKEN_NAME, 'value', {jwt_decode_hs256}, 'sub', {json.loads}, 'subscriptions', 'in', ..., 'expiry', {parse_iso8601}, all, {max})) or 0 From 6be26626f7cfa71d28e0fac2861eb04758810c5d Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Tue, 22 Jul 2025 06:59:13 +0900 Subject: [PATCH 41/81] [utils] `unified_timestamp`: Return `int` values (#13796) Authored by: doe1080 --- yt_dlp/utils/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index c91a06e9a6..7d79f417fa 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -1285,7 +1285,7 @@ def unified_timestamp(date_str, day_first=True): timetuple = email.utils.parsedate_tz(date_str) if timetuple: - return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds() + return calendar.timegm(timetuple) + pm_delta * 3600 - int(timezone.total_seconds()) @partial_application From 060c6a4501a0b8a92f1b9c12788f556d902c83c6 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Tue, 22 Jul 2025 07:32:10 +0900 Subject: [PATCH 42/81] [ie/skeb] Rework extractor (#13593) Closes #7440 Authored by: doe1080 --- yt_dlp/extractor/skeb.py | 190 +++++++++++++++++---------------------- 1 file changed, 84 insertions(+), 106 deletions(-) diff --git a/yt_dlp/extractor/skeb.py b/yt_dlp/extractor/skeb.py index bc5ec3da7f..70111d0944 100644 --- a/yt_dlp/extractor/skeb.py +++ b/yt_dlp/extractor/skeb.py @@ -1,140 +1,118 @@ from .common import InfoExtractor -from ..utils import ExtractorError, determine_ext, parse_qs, traverse_obj +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + clean_html, + int_or_none, + str_or_none, + url_or_none, +) +from ..utils.traversal import traverse_obj class SkebIE(InfoExtractor): - _VALID_URL = r'https?://skeb\.jp/@[^/]+/works/(?P\d+)' - + _VALID_URL = r'https?://skeb\.jp/@(?P[^/?#]+)/works/(?P\d+)' _TESTS = [{ 'url': 'https://skeb.jp/@riiru_wm/works/10', 'info_dict': { 'id': '466853', - 'title': '内容はおまかせします! by 姫ノ森りぃる@一周年', + 'ext': 'mp4', + 'title': '10-1', 'description': 'md5:1ec50901efc3437cfbfe3790468d532d', - 'uploader': '姫ノ森りぃる@一周年', - 'uploader_id': 'riiru_wm', - 'age_limit': 0, - 'tags': [], - 'url': r're:https://skeb.+', - 'thumbnail': r're:https://skeb.+', - 'subtitles': { - 'jpn': [{ - 'url': r're:https://skeb.+', - 'ext': 'vtt', - }], - }, - 'width': 720, - 'height': 405, 'duration': 313, - 'fps': 30, - 'ext': 'mp4', + 'genres': ['video'], + 'thumbnail': r're:https?://.+', + 'uploader': '姫ノ森りぃる@ひとづま', + 'uploader_id': 'riiru_wm', }, }, { 'url': 'https://skeb.jp/@furukawa_nob/works/3', 'info_dict': { 'id': '489408', - 'title': 'いつもお世話になってお... by 古川ノブ@音楽とVlo...', - 'description': 'md5:5adc2e41d06d33b558bf7b1faeb7b9c2', - 'uploader': '古川ノブ@音楽とVlogのVtuber', - 'uploader_id': 'furukawa_nob', - 'age_limit': 0, - 'tags': [ - 'よろしく', '大丈夫', 'お願い', 'でした', - '是非', 'O', 'バー', '遊び', 'おはよう', - 'オーバ', 'ボイス', - ], - 'url': r're:https://skeb.+', - 'thumbnail': r're:https://skeb.+', - 'subtitles': { - 'jpn': [{ - 'url': r're:https://skeb.+', - 'ext': 'vtt', - }], - }, - 'duration': 98, 'ext': 'mp3', - 'vcodec': 'none', - 'abr': 128, + 'title': '3-1', + 'description': 'md5:6de1f8f876426a6ac321c123848176a8', + 'duration': 98, + 'genres': ['voice'], + 'tags': 'count:11', + 'thumbnail': r're:https?://.+', + 'uploader': '古川ノブ@宮城の動画勢Vtuber', + 'uploader_id': 'furukawa_nob', }, }, { - 'url': 'https://skeb.jp/@mollowmollow/works/6', + 'url': 'https://skeb.jp/@Rizu_panda_cube/works/626', 'info_dict': { - 'id': '6', - 'title': 'ヒロ。\n\n私のキャラク... by 諸々', - 'description': 'md5:aa6cbf2ba320b50bce219632de195f07', - '_type': 'playlist', - 'entries': [{ - 'id': '486430', - 'title': 'ヒロ。\n\n私のキャラク... by 諸々', - 'description': 'md5:aa6cbf2ba320b50bce219632de195f07', - }, { - 'id': '486431', - 'title': 'ヒロ。\n\n私のキャラク... by 諸々', - }], + 'id': '626', + 'description': 'md5:834557b39ca56960c5f77dd6ddabe775', + 'uploader': 'りづ100億%', + 'uploader_id': 'Rizu_panda_cube', + 'tags': 'count:57', + 'genres': ['video'], }, + 'playlist_count': 2, + 'expected_warnings': ['Skipping unsupported extension'], }] + def _call_api(self, uploader_id, work_id): + return self._download_json( + f'https://skeb.jp/api/users/{uploader_id}/works/{work_id}', work_id, headers={ + 'Accept': 'application/json', + 'Authorization': 'Bearer null', + }) + def _real_extract(self, url): - video_id = self._match_id(url) - nuxt_data = self._search_nuxt_data(self._download_webpage(url, video_id), video_id) + uploader_id, work_id = self._match_valid_url(url).group('uploader_id', 'id') + try: + works = self._call_api(uploader_id, work_id) + except ExtractorError as e: + if not isinstance(e.cause, HTTPError) or e.cause.status != 429: + raise + webpage = e.cause.response.read().decode() + value = self._search_regex( + r'document\.cookie\s*=\s*["\']request_key=([^;"\']+)', webpage, 'request key') + self._set_cookie('skeb.jp', 'request_key', value) + works = self._call_api(uploader_id, work_id) - parent = { - 'id': video_id, - 'title': nuxt_data.get('title'), - 'description': nuxt_data.get('description'), - 'uploader': traverse_obj(nuxt_data, ('creator', 'name')), - 'uploader_id': traverse_obj(nuxt_data, ('creator', 'screen_name')), - 'age_limit': 18 if nuxt_data.get('nsfw') else 0, - 'tags': nuxt_data.get('tag_list'), + info = { + 'uploader_id': uploader_id, + **traverse_obj(works, { + 'age_limit': ('nsfw', {bool}, {lambda x: 18 if x else None}), + 'description': (('source_body', 'body'), {clean_html}, filter, any), + 'genres': ('genre', {str}, filter, all, filter), + 'tags': ('tag_list', ..., {str}, filter, all, filter), + 'uploader': ('creator', 'name', {str}), + }), } entries = [] - for item in nuxt_data.get('previews') or []: - vid_url = item.get('url') - given_ext = traverse_obj(item, ('information', 'extension')) - preview_ext = determine_ext(vid_url, default_ext=None) - if not preview_ext: - content_disposition = parse_qs(vid_url)['response-content-disposition'][0] - preview_ext = self._search_regex( - r'filename="[^"]+\.([^\.]+?)"', content_disposition, - 'preview file extension', fatal=False, group=1) - if preview_ext not in ('mp4', 'mp3'): + for idx, preview in enumerate(traverse_obj(works, ('previews', lambda _, v: url_or_none(v['url']))), 1): + ext = traverse_obj(preview, ('information', 'extension', {str})) + if ext not in ('mp3', 'mp4'): + self.report_warning(f'Skipping unsupported extension "{ext}"') continue - if not vid_url or not item.get('id'): - continue - width, height = traverse_obj(item, ('information', 'width')), traverse_obj(item, ('information', 'height')) - if width is not None and height is not None: - # the longest side is at most 720px for non-client viewers - max_size = max(width, height) - width, height = (x * 720 // max_size for x in (width, height)) + entries.append({ - **parent, - 'id': str(item['id']), - 'url': vid_url, - 'thumbnail': item.get('poster_url'), + 'ext': ext, + 'title': f'{work_id}-{idx}', 'subtitles': { - 'jpn': [{ - 'url': item.get('vtt_url'), + 'ja': [{ 'ext': 'vtt', + 'url': preview['vtt_url'], }], - } if item.get('vtt_url') else None, - 'width': width, - 'height': height, - 'duration': traverse_obj(item, ('information', 'duration')), - 'fps': traverse_obj(item, ('information', 'frame_rate')), - 'ext': preview_ext or given_ext, - 'vcodec': 'none' if preview_ext == 'mp3' else None, - # you'll always get 128kbps MP3 for non-client viewers - 'abr': 128 if preview_ext == 'mp3' else None, + } if url_or_none(preview.get('vtt_url')) else None, + 'vcodec': 'none' if ext == 'mp3' else None, + **info, + **traverse_obj(preview, { + 'id': ('id', {str_or_none}), + 'thumbnail': ('poster_url', {url_or_none}), + 'url': ('url', {url_or_none}), + }), + **traverse_obj(preview, ('information', { + 'duration': ('duration', {int_or_none}), + 'fps': ('frame_rate', {int_or_none}), + 'height': ('height', {int_or_none}), + 'width': ('width', {int_or_none}), + })), }) - if not entries: - raise ExtractorError('No video/audio attachment found in this commission.', expected=True) - elif len(entries) == 1: - return entries[0] - else: - parent.update({ - '_type': 'playlist', - 'entries': entries, - }) - return parent + return self.playlist_result(entries, work_id, **info) From d3edc5d52a7159eda2331dbc7e14bf40a6585c81 Mon Sep 17 00:00:00 2001 From: c-basalt <117849907+c-basalt@users.noreply.github.com> Date: Mon, 21 Jul 2025 19:04:43 -0400 Subject: [PATCH 43/81] [ie/bilibili] Pass newer user-agent with API requests (#13736) Closes #12887 Authored by: c-basalt --- yt_dlp/extractor/bilibili.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 0c6535fc72..3282a11bb7 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -175,6 +175,13 @@ class BilibiliBaseIE(InfoExtractor): else: note = f'Downloading video formats for cid {cid}' + # TODO: remove this patch once utils.networking.random_user_agent() is updated, see #13735 + # playurl requests carrying old UA will be rejected + headers = { + 'User-Agent': f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(118,138)}.0.0.0 Safari/537.36', + **(headers or {}), + } + return self._download_json( 'https://api.bilibili.com/x/player/wbi/playurl', bvid, query=self._sign_wbi(params, bvid), headers=headers, note=note)['data'] @@ -353,7 +360,7 @@ class BiliBiliIE(BilibiliBaseIE): 'id': 'BV1bK411W797', 'title': '物语中的人物是如何吐槽自己的OP的', }, - 'playlist_count': 18, + 'playlist_count': 23, 'playlist': [{ 'info_dict': { 'id': 'BV1bK411W797_p1', @@ -373,6 +380,7 @@ class BiliBiliIE(BilibiliBaseIE): '_old_archive_ids': ['bilibili 498159642_part1'], }, }], + 'params': {'playlist_items': '2'}, }, { 'note': 'Specific page of Anthology', 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1', @@ -1002,6 +1010,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE): 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], + 'params': {'playlist_items': '2'}, }] def _real_extract(self, url): @@ -1057,6 +1066,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE): 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], + 'params': {'playlist_items': '2'}, }] def _real_extract(self, url): @@ -1847,7 +1857,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE): 'thumbnail': r're:^https?://.+\.jpg', 'timestamp': 1564836614, 'upload_date': '20190803', - 'uploader': 'tsukimi-つきみぐー', + 'uploader': '十六夜tsukimiつきみぐ', 'view_count': int, }, } @@ -1902,10 +1912,10 @@ class BilibiliAudioAlbumIE(BilibiliAudioBaseIE): 'url': 'https://www.bilibili.com/audio/am10624', 'info_dict': { 'id': '10624', - 'title': '每日新曲推荐(每日11:00更新)', + 'title': '新曲推荐', 'description': '每天11:00更新,为你推送最新音乐', }, - 'playlist_count': 19, + 'playlist_count': 16, } def _real_extract(self, url): From b15aa8d77257b86fa44c9a42a615dfe47ac5b3b7 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 21 Jul 2025 18:11:58 -0500 Subject: [PATCH 44/81] [ie/BiliBiliBangumi] Fix extractor (#13800) Closes #13795 Authored by: bashonly --- yt_dlp/extractor/bilibili.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 3282a11bb7..2846702f6a 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -907,13 +907,26 @@ class BiliBiliBangumiIE(BilibiliBaseIE): 'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id}, headers=headers)) + # play_info can be structured in at least three different ways, e.g.: + # 1.) play_info['result']['video_info'] and play_info['code'] + # 2.) play_info['raw']['data']['video_info'] and play_info['code'] + # 3.) play_info['data']['result']['video_info'] and play_info['data']['code'] + # So we need to transform any of the above into a common structure + status_code = play_info.get('code') + if 'raw' in play_info: + play_info = play_info['raw'] + if 'data' in play_info: + play_info = play_info['data'] + if status_code is None: + status_code = play_info.get('code') + if 'result' in play_info: + play_info = play_info['result'] + geo_blocked = traverse_obj(play_info, ( - ('result', ('raw', 'data')), 'plugins', - lambda _, v: v['name'] == 'AreaLimitPanel', - 'config', 'is_block', {bool}, any)) - premium_only = play_info.get('code') == -10403 + 'plugins', lambda _, v: v['name'] == 'AreaLimitPanel', 'config', 'is_block', {bool}, any)) + premium_only = status_code == -10403 - video_info = traverse_obj(play_info, (('result', ('raw', 'data')), 'video_info', {dict}, any)) or {} + video_info = traverse_obj(play_info, ('video_info', {dict})) or {} formats = self.extract_formats(video_info) if not formats: @@ -923,8 +936,8 @@ class BiliBiliBangumiIE(BilibiliBaseIE): self.raise_login_required('This video is for premium members only') if traverse_obj(play_info, (( - ('result', 'play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE' - (('result', ('raw', 'data')), 'play_video_type'), # 'preview' vs 'whole' vs 'none' + ('play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE' vs 'PLAY_NONE' + 'play_video_type', # 'preview' vs 'whole' vs 'none' ), any, {lambda x: x in ('PLAY_PREVIEW', 'preview')})): self.report_warning( 'Only preview format is available, ' From d88b304d44c599d81acfa4231502270c8b9fe2f8 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 21 Jul 2025 18:15:31 -0500 Subject: [PATCH 45/81] [ie/patreon:campaign] Fix extractor (#13712) Closes #13622 Authored by: bashonly --- yt_dlp/extractor/patreon.py | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 2c1436cac1..9038b4a7ff 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -19,7 +19,7 @@ from ..utils import ( url_or_none, urljoin, ) -from ..utils.traversal import traverse_obj, value +from ..utils.traversal import require, traverse_obj, value class PatreonBaseIE(InfoExtractor): @@ -462,7 +462,7 @@ class PatreonCampaignIE(PatreonBaseIE): _VALID_URL = r'''(?x) https?://(?:www\.)?patreon\.com/(?: (?:m|api/campaigns)/(?P\d+)| - (?:c/)?(?P(?!creation[?/]|posts/|rss[?/])[\w-]+) + (?:cw?/)?(?P(?!creation[?/]|posts/|rss[?/])[\w-]+) )(?:/posts)?/?(?:$|[?#])''' _TESTS = [{ 'url': 'https://www.patreon.com/dissonancepod/', @@ -531,6 +531,28 @@ class PatreonCampaignIE(PatreonBaseIE): 'age_limit': 0, }, 'playlist_mincount': 331, + 'skip': 'Channel removed', + }, { + # next.js v13 data, see https://github.com/yt-dlp/yt-dlp/issues/13622 + 'url': 'https://www.patreon.com/c/anythingelse/posts', + 'info_dict': { + 'id': '9631148', + 'title': 'Anything Else?', + 'description': 'md5:2ee1db4aed2f9460c2b295825a24aa08', + 'uploader': 'dan ', + 'uploader_id': '13852412', + 'uploader_url': 'https://www.patreon.com/anythingelse', + 'channel': 'Anything Else?', + 'channel_id': '9631148', + 'channel_url': 'https://www.patreon.com/anythingelse', + 'channel_follower_count': int, + 'age_limit': 0, + 'thumbnail': r're:https?://.+/.+', + }, + 'playlist_mincount': 151, + }, { + 'url': 'https://www.patreon.com/cw/anythingelse', + 'only_matching': True, }, { 'url': 'https://www.patreon.com/c/OgSog/posts', 'only_matching': True, @@ -572,8 +594,11 @@ class PatreonCampaignIE(PatreonBaseIE): campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity') if campaign_id is None: webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent}) - campaign_id = self._search_nextjs_data( - webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id'] + campaign_id = traverse_obj(self._search_nextjs_data(webpage, vanity, default=None), ( + 'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str})) + if not campaign_id: + campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), ( + lambda _, v: v['type'] == 'campaign', 'id', {str}, any, {require('campaign ID')})) params = { 'json-api-use-default-includes': 'false', From 959ac99e98c3215437e573c22d64be42d361e863 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Tue, 15 Jul 2025 01:17:34 +0200 Subject: [PATCH 46/81] Fix `--exec` placeholder expansion on Windows See https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-45hg-7f49-5h56 for more details Authored by: Grub4K --- yt_dlp/postprocessor/exec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/exec.py b/yt_dlp/postprocessor/exec.py index 1f0a0015ec..243487dd25 100644 --- a/yt_dlp/postprocessor/exec.py +++ b/yt_dlp/postprocessor/exec.py @@ -18,7 +18,7 @@ class ExecPP(PostProcessor): if filepath: if '{}' not in cmd: cmd += ' {}' - cmd = cmd.replace('{}', shell_quote(filepath)) + cmd = cmd.replace('{}', shell_quote(filepath, shell=True)) return cmd def run(self, info): From 9951fdd0d08b655cb1af8cd7f32a3fb7e2b1324e Mon Sep 17 00:00:00 2001 From: sepro Date: Tue, 22 Jul 2025 01:43:30 +0200 Subject: [PATCH 47/81] [cleanup] Misc (#13595) Closes #10853, Closes #12436, Closes #13314, Closes #13609 Authored by: seproDev, InvalidUsernameException, doe1080, hseg, bashonly, adamralph Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: InvalidUsernameException Co-authored-by: gesh Co-authored-by: Adam Ralph Co-authored-by: doe1080 <98906116+doe1080@users.noreply.github.com> --- CONTRIBUTING.md | 2 +- README.md | 6 +++--- devscripts/changelog_override.json | 10 ++++++++++ test/test_download.py | 4 ---- yt_dlp/YoutubeDL.py | 1 + yt_dlp/extractor/common.py | 5 ++++- yt_dlp/extractor/mirrativ.py | 2 +- yt_dlp/extractor/newspicks.py | 2 -- yt_dlp/extractor/youtube/_video.py | 4 ++-- 9 files changed, 22 insertions(+), 14 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fd7b0f1210..2c58cdfc94 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -126,7 +126,7 @@ By sharing an account with anyone, you agree to bear all risks associated with i While these steps won't necessarily ensure that no misuse of the account takes place, these are still some good practices to follow. - Look for people with `Member` (maintainers of the project) or `Contributor` (people who have previously contributed code) tag on their messages. -- Change the password before sharing the account to something random (use [this](https://passwordsgenerator.net/) if you don't have a random password generator). +- Change the password before sharing the account to something random. - Change the password after receiving the account back. ### Is the website primarily used for piracy? diff --git a/README.md b/README.md index 925ebd8c5b..7a6d1073f4 100644 --- a/README.md +++ b/README.md @@ -277,7 +277,7 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git yt-dlp [OPTIONS] [--] URL [URL...] -`Ctrl+F` is your friend :D +Tip: Use `CTRL`+`F` (or `Command`+`F`) to search by keywords @@ -1902,8 +1902,8 @@ The following extractors use this feature: * `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated) #### vimeo -* `client`: Client to extract video data from. One of `android` (default), `ios` or `web`. The `ios` client only works with previously cached OAuth tokens. The `web` client only works when authenticated with credentials or account cookies -* `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability +* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `android` client is used by default. If account cookies or credentials are used for authentication, then the `web` client is used by default. The `web` client only works with authentication. The `ios` client only works with previously cached OAuth tokens +* `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the web client's API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability **Note**: These options may be changed/removed in the future without concern for backward compatibility diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index d7296bf309..c22ea94bfc 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -262,5 +262,15 @@ { "action": "remove", "when": "500761e41acb96953a5064e951d41d190c287e46" + }, + { + "action": "add", + "when": "f3008bc5f89d2691f2f8dfc51b406ef4e25281c3", + "short": "[priority] **Default behaviour changed from `--mtime` to `--no-mtime`**\nyt-dlp no longer applies the server modified time to downloaded files by default. [Read more](https://github.com/yt-dlp/yt-dlp/issues/12780)" + }, + { + "action": "add", + "when": "959ac99e98c3215437e573c22d64be42d361e863", + "short": "[priority] Security: [[CVE-2025-54072](https://nvd.nist.gov/vuln/detail/CVE-2025-54072)] [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-45hg-7f49-5h56)\n - When `--exec` is used on Windows, the filepath expanded from `{}` (or the default placeholder) is now properly escaped" } ] diff --git a/test/test_download.py b/test/test_download.py index c7842735c2..1714cb52ec 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -66,10 +66,6 @@ tests_counter = collections.defaultdict(collections.Counter) @is_download_test class TestDownload(unittest.TestCase): - # Parallel testing in nosetests. See - # http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html - _multiprocess_shared_ = True - maxDiff = None COMPLETED_TESTS = {} diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index e42fa73dd6..76fd18c338 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -529,6 +529,7 @@ class YoutubeDL: discontinuities such as ad breaks (default: False) extractor_args: A dictionary of arguments to be passed to the extractors. See "EXTRACTOR ARGUMENTS" for details. + Argument values must always be a list of string(s). E.g. {'youtube': {'skip': ['dash', 'hls']}} mark_watched: Mark videos watched (even with --simulate). Only for YouTube diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 8a914abf0b..4a4b5416d0 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -397,6 +397,8 @@ class InfoExtractor: chapters: A list of dictionaries, with the following entries: * "start_time" - The start time of the chapter in seconds * "end_time" - The end time of the chapter in seconds + (optional: core code can determine this value from + the next chapter's start_time or the video's duration) * "title" (optional, string) heatmap: A list of dictionaries, with the following entries: * "start_time" - The start time of the data point in seconds @@ -411,7 +413,8 @@ class InfoExtractor: 'unlisted' or 'public'. Use 'InfoExtractor._availability' to set it media_type: The type of media as classified by the site, e.g. "episode", "clip", "trailer" - _old_archive_ids: A list of old archive ids needed for backward compatibility + _old_archive_ids: A list of old archive ids needed for backward + compatibility. Use yt_dlp.utils.make_archive_id to generate ids _format_sort_fields: A list of fields to use for sorting formats __post_extractor: A function to be called just before the metadata is written to either disk, logger or console. The function diff --git a/yt_dlp/extractor/mirrativ.py b/yt_dlp/extractor/mirrativ.py index 4e24371a22..36a736a21d 100644 --- a/yt_dlp/extractor/mirrativ.py +++ b/yt_dlp/extractor/mirrativ.py @@ -18,7 +18,7 @@ class MirrativIE(MirrativBaseIE): IE_NAME = 'mirrativ' _VALID_URL = r'https?://(?:www\.)?mirrativ\.com/live/(?P[^/?#&]+)' - TESTS = [{ + _TESTS = [{ 'url': 'https://mirrativ.com/live/UQomuS7EMgHoxRHjEhNiHw', 'info_dict': { 'id': 'UQomuS7EMgHoxRHjEhNiHw', diff --git a/yt_dlp/extractor/newspicks.py b/yt_dlp/extractor/newspicks.py index 5f19eed984..25be3c7203 100644 --- a/yt_dlp/extractor/newspicks.py +++ b/yt_dlp/extractor/newspicks.py @@ -18,7 +18,6 @@ class NewsPicksIE(InfoExtractor): 'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】', 'cast': 'count:4', 'description': 'md5:09397aad46d6ded6487ff13f138acadf', - 'duration': 2940, 'release_date': '20220117', 'release_timestamp': 1642424400, 'series': 'HORIE ONE', @@ -35,7 +34,6 @@ class NewsPicksIE(InfoExtractor): 'title': '【検証】専門家は、KADOKAWAをどう見るか', 'cast': 'count:3', 'description': 'md5:2c2d4bf77484a4333ec995d676f9a91d', - 'duration': 1320, 'release_date': '20240622', 'release_timestamp': 1719088080, 'series': 'NPレポート', diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 5968edc60e..171aa9b5c4 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -2076,7 +2076,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): assert os.path.basename(func_id) == func_id self.write_debug(f'Extracting signature function {func_id}') - cache_spec, code = self.cache.load('youtube-sigfuncs', func_id, min_ver='2025.03.31'), None + cache_spec, code = self.cache.load('youtube-sigfuncs', func_id, min_ver='2025.07.21'), None if not cache_spec: code = self._load_player(video_id, player_url) @@ -2180,7 +2180,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if data := self._player_cache.get(cache_id): return data - data = self.cache.load(*cache_id, min_ver='2025.03.31') + data = self.cache.load(*cache_id, min_ver='2025.07.21') if data: self._player_cache[cache_id] = data From 035b1ece8f382358f5503bf5011ca098f6c9eaf9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 21 Jul 2025 23:47:12 +0000 Subject: [PATCH 48/81] Release 2025.07.21 Created by: bashonly :ci skip all --- CONTRIBUTORS | 9 +++++ Changelog.md | 91 +++++++++++++++++++++++++++++++++++++++++++++++ README.md | 4 +-- supportedsites.md | 22 ++++++------ yt_dlp/version.py | 6 ++-- 5 files changed, 116 insertions(+), 16 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index ba23b66dc5..f20b4ce172 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -784,3 +784,12 @@ eason1478 ceandreasen chauhantirth helpimnotdrowning +adamralph +averageFOSSenjoyer +bubo +flanter21 +Georift +moonshinerd +R0hanW +ShockedPlot7560 +swayll diff --git a/Changelog.md b/Changelog.md index 5a5c18cf34..7205b95aa3 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,97 @@ # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2025.07.21 + +#### Important changes +- **Default behaviour changed from `--mtime` to `--no-mtime`** +yt-dlp no longer applies the server modified time to downloaded files by default. [Read more](https://github.com/yt-dlp/yt-dlp/issues/12780) +- Security: [[CVE-2025-54072](https://nvd.nist.gov/vuln/detail/CVE-2025-54072)] [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-45hg-7f49-5h56) + - When `--exec` is used on Windows, the filepath expanded from `{}` (or the default placeholder) is now properly escaped + +#### Core changes +- [Allow extractors to designate formats/subtitles for impersonation](https://github.com/yt-dlp/yt-dlp/commit/32809eb2da92c649e540a5b714f6235036026161) ([#13778](https://github.com/yt-dlp/yt-dlp/issues/13778)) by [bashonly](https://github.com/bashonly) (With fixes in [3e49bc8](https://github.com/yt-dlp/yt-dlp/commit/3e49bc8a1bdb4109b857f2c361c358e86fa63405), [2ac3eb9](https://github.com/yt-dlp/yt-dlp/commit/2ac3eb98373d1c31341c5e918c83872c7ff409c6)) +- [Don't let format testing alter the return code](https://github.com/yt-dlp/yt-dlp/commit/4919051e447c7f8ae9df8ba5c4208b6b5c04915a) ([#13767](https://github.com/yt-dlp/yt-dlp/issues/13767)) by [bashonly](https://github.com/bashonly) +- [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/commit/959ac99e98c3215437e573c22d64be42d361e863) by [Grub4K](https://github.com/Grub4K) +- [No longer enable `--mtime` by default](https://github.com/yt-dlp/yt-dlp/commit/f3008bc5f89d2691f2f8dfc51b406ef4e25281c3) ([#12781](https://github.com/yt-dlp/yt-dlp/issues/12781)) by [seproDev](https://github.com/seproDev) +- [Warn when skipping formats](https://github.com/yt-dlp/yt-dlp/commit/1f27a9f8baccb9105f2476154557540efe09a937) ([#13090](https://github.com/yt-dlp/yt-dlp/issues/13090)) by [bashonly](https://github.com/bashonly) +- **jsinterp** + - [Cache undefined variable names](https://github.com/yt-dlp/yt-dlp/commit/b342d27f3f82d913976509ddf5bff539ad8567ec) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly) (With fixes in [805519b](https://github.com/yt-dlp/yt-dlp/commit/805519bfaa7cb5443912dfe45ac774834ba65a16)) + - [Fix variable scoping](https://github.com/yt-dlp/yt-dlp/commit/b6328ca05030d815222b25d208cc59a964623bf9) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) +- **utils** + - `mimetype2ext`: [Always parse `flac` from `audio/flac`](https://github.com/yt-dlp/yt-dlp/commit/b8abd255e454acbe0023cdb946f9eb461ced7eeb) ([#13748](https://github.com/yt-dlp/yt-dlp/issues/13748)) by [bashonly](https://github.com/bashonly) + - `unified_timestamp`: [Return `int` values](https://github.com/yt-dlp/yt-dlp/commit/6be26626f7cfa71d28e0fac2861eb04758810c5d) ([#13796](https://github.com/yt-dlp/yt-dlp/issues/13796)) by [doe1080](https://github.com/doe1080) + - `urlhandle_detect_ext`: [Use `x-amz-meta-file-type` headers](https://github.com/yt-dlp/yt-dlp/commit/28bf46b7dafe2e241137763bf570a2f91ba8a53a) ([#13749](https://github.com/yt-dlp/yt-dlp/issues/13749)) by [bashonly](https://github.com/bashonly) + +#### Extractor changes +- [Add `_search_nextjs_v13_data` helper](https://github.com/yt-dlp/yt-dlp/commit/5245231e4a39ecd5595d4337d46d85e150e2430a) ([#13398](https://github.com/yt-dlp/yt-dlp/issues/13398)) by [bashonly](https://github.com/bashonly) (With fixes in [b5fea53](https://github.com/yt-dlp/yt-dlp/commit/b5fea53f2099bed41ba1b17ab0ac87c8dba5a5ec)) +- [Detect invalid m3u8 playlist data](https://github.com/yt-dlp/yt-dlp/commit/e99c0b838a9c5feb40c0dcd291bd7b8620b8d36d) ([#13601](https://github.com/yt-dlp/yt-dlp/issues/13601)) by [Grub4K](https://github.com/Grub4K) +- **10play**: [Support new site domain](https://github.com/yt-dlp/yt-dlp/commit/790c286ce3e0b534ca2d8f6648ced220d888f139) ([#13611](https://github.com/yt-dlp/yt-dlp/issues/13611)) by [Georift](https://github.com/Georift) +- **9gag**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/0b359b184dee0c7052be482857bf562de67e4928) ([#13678](https://github.com/yt-dlp/yt-dlp/issues/13678)) by [bashonly](https://github.com/bashonly) +- **aenetworks**: [Support new URL formats](https://github.com/yt-dlp/yt-dlp/commit/5f951ce929b56a822514f1a02cc06af030855ec7) ([#13747](https://github.com/yt-dlp/yt-dlp/issues/13747)) by [bashonly](https://github.com/bashonly) +- **archive.org**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d42a6ff0c4ca8893d722ff4e0c109aecbf4cc7cf) ([#13706](https://github.com/yt-dlp/yt-dlp/issues/13706)) by [rdamas](https://github.com/rdamas) +- **bandaichannel**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/23e9389f936ec5236a87815b8576e5ce567b2f77) ([#13152](https://github.com/yt-dlp/yt-dlp/issues/13152)) by [doe1080](https://github.com/doe1080) +- **bandcamp**: [Extract tags](https://github.com/yt-dlp/yt-dlp/commit/f9dff95cb1c138913011417b3bba020c0a691bba) ([#13480](https://github.com/yt-dlp/yt-dlp/issues/13480)) by [WouterGordts](https://github.com/WouterGordts) +- **bellmedia**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/6fb3947c0dc6d0e3eab5077c5bada8402f47a277) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080) +- **bilibili**: [Pass newer user-agent with API requests](https://github.com/yt-dlp/yt-dlp/commit/d3edc5d52a7159eda2331dbc7e14bf40a6585c81) ([#13736](https://github.com/yt-dlp/yt-dlp/issues/13736)) by [c-basalt](https://github.com/c-basalt) +- **bilibilibangumi** + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b15aa8d77257b86fa44c9a42a615dfe47ac5b3b7) ([#13800](https://github.com/yt-dlp/yt-dlp/issues/13800)) by [bashonly](https://github.com/bashonly) + - [Fix geo-block detection](https://github.com/yt-dlp/yt-dlp/commit/884f35d54a64f1e6e7be49459842f573fc3a2701) ([#13667](https://github.com/yt-dlp/yt-dlp/issues/13667)) by [bashonly](https://github.com/bashonly) +- **blackboardcollaborate**: [Support subtitles and authwalled videos](https://github.com/yt-dlp/yt-dlp/commit/dcc4cba39e2a79d3efce16afa28dbe245468489f) ([#12473](https://github.com/yt-dlp/yt-dlp/issues/12473)) by [flanter21](https://github.com/flanter21) +- **btvplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3ae61e0f313dd03a09060abc7a212775c3717818) ([#13541](https://github.com/yt-dlp/yt-dlp/issues/13541)) by [bubo](https://github.com/bubo) +- **ctv**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/9f54ea38984788811773ca2ceaca73864acf0e8a) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080) +- **dangalplay**: [Support other login regions](https://github.com/yt-dlp/yt-dlp/commit/09982bc33e2f1f9a1ff66e6738df44f15b36f6a6) ([#13768](https://github.com/yt-dlp/yt-dlp/issues/13768)) by [bashonly](https://github.com/bashonly) +- **francetv**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/ade876efb31d55d3394185ffc56942fdc8d325cc) ([#13726](https://github.com/yt-dlp/yt-dlp/issues/13726)) by [bashonly](https://github.com/bashonly) +- **hotstar** + - [Fix support for free accounts](https://github.com/yt-dlp/yt-dlp/commit/07d1d85f6387e4bdb107096f0131c7054f078bb9) ([#13700](https://github.com/yt-dlp/yt-dlp/issues/13700)) by [chauhantirth](https://github.com/chauhantirth) + - [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/7e0af2b1f0c3edb688603b022f3a9ca0bfdf75e9) ([#13727](https://github.com/yt-dlp/yt-dlp/issues/13727)) by [bashonly](https://github.com/bashonly) (With fixes in [ef103b2](https://github.com/yt-dlp/yt-dlp/commit/ef103b2d115bd0e880f9cfd2f7dd705f48e4b40d)) +- **joqrag**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/6d39c420f7774562a106d90253e2ed5b75036321) ([#13152](https://github.com/yt-dlp/yt-dlp/issues/13152)) by [doe1080](https://github.com/doe1080) +- **limelight**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/5d693446e882931618c40c99bb593f0b87b30eb9) ([#13267](https://github.com/yt-dlp/yt-dlp/issues/13267)) by [doe1080](https://github.com/doe1080) +- **lrtradio**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b4b4486effdcb96bb6b8148171a49ff579b69a4a) ([#13717](https://github.com/yt-dlp/yt-dlp/issues/13717)) by [Pawka](https://github.com/Pawka) +- **mir24.tv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/7b4c96e0898db048259ef5fdf12ed14e3605dce3) ([#13651](https://github.com/yt-dlp/yt-dlp/issues/13651)) by [swayll](https://github.com/swayll) +- **mixlr**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/0f33950c778331bf4803c76e8b0ba1862df93431) ([#13561](https://github.com/yt-dlp/yt-dlp/issues/13561)) by [seproDev](https://github.com/seproDev), [ShockedPlot7560](https://github.com/ShockedPlot7560) +- **mlbtv**: [Make formats downloadable with ffmpeg](https://github.com/yt-dlp/yt-dlp/commit/87e3dc8c7f78929d2ef4f4a44e6a567e04cd8226) ([#13761](https://github.com/yt-dlp/yt-dlp/issues/13761)) by [bashonly](https://github.com/bashonly) +- **newspicks**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2aaf1aa71d174700859c9ec1a81109b78e34961c) ([#13612](https://github.com/yt-dlp/yt-dlp/issues/13612)) by [doe1080](https://github.com/doe1080) +- **nhkradiru**: [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/7c49a937887756efcfa162abdcf17e48c244cb0c) ([#12708](https://github.com/yt-dlp/yt-dlp/issues/12708)) by [garret1317](https://github.com/garret1317) +- **noovo**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/d57a0b5aa78d59324b037d37492fe86aa4fbf58a) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080) +- **patreon**: campaign: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d88b304d44c599d81acfa4231502270c8b9fe2f8) ([#13712](https://github.com/yt-dlp/yt-dlp/issues/13712)) by [bashonly](https://github.com/bashonly) +- **playerfm**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1a8474c3ca6dbe51bb153b2b8eef7b9a61fa7dc3) ([#13016](https://github.com/yt-dlp/yt-dlp/issues/13016)) by [R0hanW](https://github.com/R0hanW) +- **rai**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c8329fc572903eeed7edad1642773b2268b71a62) ([#13572](https://github.com/yt-dlp/yt-dlp/issues/13572)) by [moonshinerd](https://github.com/moonshinerd), [seproDev](https://github.com/seproDev) +- **raisudtirol**: [Support alternative domain](https://github.com/yt-dlp/yt-dlp/commit/85c3fa1925a9057ef4ae8af682686d5b3eb8e568) ([#13718](https://github.com/yt-dlp/yt-dlp/issues/13718)) by [barsnick](https://github.com/barsnick) +- **skeb**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/060c6a4501a0b8a92f1b9c12788f556d902c83c6) ([#13593](https://github.com/yt-dlp/yt-dlp/issues/13593)) by [doe1080](https://github.com/doe1080) +- **soundcloud**: [Always extract original format extension](https://github.com/yt-dlp/yt-dlp/commit/c1ac543c8166ff031d62e340b3244ca8556e3fb9) ([#13746](https://github.com/yt-dlp/yt-dlp/issues/13746)) by [bashonly](https://github.com/bashonly) +- **sproutvideo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0b41746964e1d0470ac286ce09408940a3a51147) ([#13610](https://github.com/yt-dlp/yt-dlp/issues/13610)) by [bashonly](https://github.com/bashonly) +- **thehighwire**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3a84be9d1660ef798ea28f929a20391bef6afda4) ([#13505](https://github.com/yt-dlp/yt-dlp/issues/13505)) by [swayll](https://github.com/swayll) +- **twitch**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/422cc8cb2ff2bd3b4c2bc64e23507b7e6f522c35) ([#13618](https://github.com/yt-dlp/yt-dlp/issues/13618)) by [bashonly](https://github.com/bashonly) +- **unitednationswebtv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/630f3389c33f0f7f6ec97e8917d20aeb4e4078da) ([#13538](https://github.com/yt-dlp/yt-dlp/issues/13538)) by [averageFOSSenjoyer](https://github.com/averageFOSSenjoyer) +- **vimeo** + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a5d697f62d8be78ffd472acb2f52c8bc32833003) ([#13692](https://github.com/yt-dlp/yt-dlp/issues/13692)) by [bashonly](https://github.com/bashonly) + - [Handle age-restricted videos](https://github.com/yt-dlp/yt-dlp/commit/a6db1d297ab40cc346de24aacbeab93112b2f4e1) ([#13719](https://github.com/yt-dlp/yt-dlp/issues/13719)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Do not require PO Token for premium accounts](https://github.com/yt-dlp/yt-dlp/commit/5b57b72c1a7c6bd249ffcebdf5630761ec664c10) ([#13640](https://github.com/yt-dlp/yt-dlp/issues/13640)) by [coletdjnz](https://github.com/coletdjnz) + - [Ensure context params are consistent for web clients](https://github.com/yt-dlp/yt-dlp/commit/6e5bee418bc108565108153fd745c8e7a59f16dd) ([#13701](https://github.com/yt-dlp/yt-dlp/issues/13701)) by [coletdjnz](https://github.com/coletdjnz) + - [Extract global nsig helper functions](https://github.com/yt-dlp/yt-dlp/commit/fca94ac5d63ed6578b5cd9c8129d97a8a713c39a) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) + - [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/0e68332bcb9fba87c42805b7a051eeb2bed36206) ([#13659](https://github.com/yt-dlp/yt-dlp/issues/13659)) by [bashonly](https://github.com/bashonly) + - [Log bad playability statuses of player responses](https://github.com/yt-dlp/yt-dlp/commit/aa9f1f4d577e99897ac16cd19d4e217d688ea75d) ([#13647](https://github.com/yt-dlp/yt-dlp/issues/13647)) by [coletdjnz](https://github.com/coletdjnz) + - [Use impersonation for downloading subtitles](https://github.com/yt-dlp/yt-dlp/commit/8820101aa3152e5f4811541c645f8b5de231ba8c) ([#13786](https://github.com/yt-dlp/yt-dlp/issues/13786)) by [bashonly](https://github.com/bashonly) + - tab: [Fix subscriptions feed extraction](https://github.com/yt-dlp/yt-dlp/commit/c23d837b6524d1e7a4595948871ba1708cba4dfa) ([#13665](https://github.com/yt-dlp/yt-dlp/issues/13665)) by [bashonly](https://github.com/bashonly) + +#### Downloader changes +- **hls**: [Do not fall back to ffmpeg when native is required](https://github.com/yt-dlp/yt-dlp/commit/a7113722ec33f30fc898caee9242af2b82188a53) ([#13655](https://github.com/yt-dlp/yt-dlp/issues/13655)) by [bashonly](https://github.com/bashonly) + +#### Networking changes +- **Request Handler** + - requests + - [Refactor default headers](https://github.com/yt-dlp/yt-dlp/commit/a4561c7a66c39d88efe7ae51e7fa1986faf093fb) ([#13785](https://github.com/yt-dlp/yt-dlp/issues/13785)) by [bashonly](https://github.com/bashonly) + - [Work around partial read dropping data](https://github.com/yt-dlp/yt-dlp/commit/c2ff2dbaec7929015373fe002e9bd4849931a4ce) ([#13599](https://github.com/yt-dlp/yt-dlp/issues/13599)) by [Grub4K](https://github.com/Grub4K) (With fixes in [c316416](https://github.com/yt-dlp/yt-dlp/commit/c316416b972d1b05e58fbcc21e80428b900ce102)) + +#### Misc. changes +- **cleanup** + - [Bump ruff to 0.12.x](https://github.com/yt-dlp/yt-dlp/commit/ca5cce5b07d51efe7310b449cdefeca8d873e9df) ([#13596](https://github.com/yt-dlp/yt-dlp/issues/13596)) by [seproDev](https://github.com/seproDev) + - Miscellaneous: [9951fdd](https://github.com/yt-dlp/yt-dlp/commit/9951fdd0d08b655cb1af8cd7f32a3fb7e2b1324e) by [adamralph](https://github.com/adamralph), [bashonly](https://github.com/bashonly), [doe1080](https://github.com/doe1080), [hseg](https://github.com/hseg), [InvalidUsernameException](https://github.com/InvalidUsernameException), [seproDev](https://github.com/seproDev) +- **devscripts**: [Fix filename/directory Bash completions](https://github.com/yt-dlp/yt-dlp/commit/99093e96fd6a26dea9d6e4bd1e4b16283b6ad1ee) ([#13620](https://github.com/yt-dlp/yt-dlp/issues/13620)) by [barsnick](https://github.com/barsnick) +- **test**: download: [Support `playlist_maxcount`](https://github.com/yt-dlp/yt-dlp/commit/fd36b8f31bafbd8096bdb92a446a0c9c6081209c) ([#13433](https://github.com/yt-dlp/yt-dlp/issues/13433)) by [InvalidUsernameException](https://github.com/InvalidUsernameException) + ### 2025.06.30 #### Core changes diff --git a/README.md b/README.md index 7a6d1073f4..f1d119317c 100644 --- a/README.md +++ b/README.md @@ -639,9 +639,9 @@ Tip: Use `CTRL`+`F` (or `Command`+`F`) to search by keywords --no-part Do not use .part files - write directly into output file --mtime Use the Last-modified header to set the file - modification time (default) + modification time --no-mtime Do not use the Last-modified header to set - the file modification time + the file modification time (default) --write-description Write video description to a .description file --no-write-description Do not write video description (default) --write-info-json Write video metadata to a .info.json file diff --git a/supportedsites.md b/supportedsites.md index 8e48135d22..3e0bef4bcf 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -133,7 +133,6 @@ The only reliable way to check if a site is supported is to try it. - **BaiduVideo**: 百度视频 - **BanBye** - **BanByeChannel** - - **bandaichannel** - **Bandcamp** - **Bandcamp:album** - **Bandcamp:user** @@ -157,7 +156,6 @@ The only reliable way to check if a site is supported is to try it. - **Beeg** - **BehindKink**: (**Currently broken**) - **Bellator** - - **BellMedia** - **BerufeTV** - **Bet**: (**Currently broken**) - **bfi:player**: (**Currently broken**) @@ -197,6 +195,7 @@ The only reliable way to check if a site is supported is to try it. - **BitChute** - **BitChuteChannel** - **BlackboardCollaborate** + - **BlackboardCollaborateLaunch** - **BleacherReport**: (**Currently broken**) - **BleacherReportCMS**: (**Currently broken**) - **blerp** @@ -225,6 +224,7 @@ The only reliable way to check if a site is supported is to try it. - **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org - **bt:article**: Bergens Tidende Articles - **bt:vestlendingen**: Bergens Tidende - Vestlendingen + - **BTVPlus** - **Bundesliga** - **Bundestag** - **BunnyCdn** @@ -317,7 +317,6 @@ The only reliable way to check if a site is supported is to try it. - **CSpan**: C-SPAN - **CSpanCongress** - **CtsNews**: 華視新聞 - - **CTV** - **CTVNews** - **cu.ntv.co.jp**: 日テレ無料TADA! - **CultureUnplugged** @@ -652,7 +651,6 @@ The only reliable way to check if a site is supported is to try it. - **jiosaavn:​show:playlist** - **jiosaavn:song** - **Joj** - - **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR) - **Jove** - **JStream** - **JTBC**: jtbc.co.kr @@ -723,9 +721,6 @@ The only reliable way to check if a site is supported is to try it. - **life:embed** - **likee** - **likee:user** - - **limelight** - - **limelight:channel** - - **limelight:channel_list** - **LinkedIn**: [*linkedin*](## "netrc machine") - **linkedin:events**: [*linkedin*](## "netrc machine") - **linkedin:learning**: [*linkedin*](## "netrc machine") @@ -807,6 +802,7 @@ The only reliable way to check if a site is supported is to try it. - **minds:channel** - **minds:group** - **Minoto** + - **mir24.tv** - **mirrativ** - **mirrativ:user** - **MirrorCoUK** @@ -817,6 +813,8 @@ The only reliable way to check if a site is supported is to try it. - **mixcloud** - **mixcloud:playlist** - **mixcloud:user** + - **Mixlr** + - **MixlrRecoring** - **MLB** - **MLBArticle** - **MLBTV**: [*mlb*](## "netrc machine") @@ -973,7 +971,6 @@ The only reliable way to check if a site is supported is to try it. - **NoicePodcast** - **NonkTube** - **NoodleMagazine** - - **Noovo** - **NOSNLArticle** - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - **NovaEmbed** @@ -1097,6 +1094,7 @@ The only reliable way to check if a site is supported is to try it. - **Platzi**: [*platzi*](## "netrc machine") - **PlatziCourse**: [*platzi*](## "netrc machine") - **player.sky.it** + - **PlayerFm** - **playeur** - **PlayPlusTV**: [*playplustv*](## "netrc machine") - **PlaySuisse**: [*playsuisse*](## "netrc machine") @@ -1472,11 +1470,12 @@ The only reliable way to check if a site is supported is to try it. - **Tempo** - **TennisTV**: [*tennistv*](## "netrc machine") - **TF1** - - **TFO** + - **TFO**: (**Currently broken**) - **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine") - **theatercomplextown:vod**: [*theatercomplextown*](## "netrc machine") - **TheGuardianPodcast** - **TheGuardianPodcastPlaylist** + - **TheHighWire** - **TheHoleTv** - **TheIntercept** - **ThePlatform** @@ -1544,8 +1543,8 @@ The only reliable way to check if a site is supported is to try it. - **tv2playseries.hu** - **TV4**: tv4.se and tv4play.se - **TV5MONDE** - - **tv5unis** - - **tv5unis:video** + - **tv5unis**: (**Currently broken**) + - **tv5unis:video**: (**Currently broken**) - **tv8.it** - **tv8.it:live**: TV8 Live - **tv8.it:playlist**: TV8 Playlist @@ -1600,6 +1599,7 @@ The only reliable way to check if a site is supported is to try it. - **UlizaPortal**: ulizaportal.jp - **umg:de**: Universal Music Deutschland - **Unistra** + - **UnitedNationsWebTv** - **Unity**: (**Currently broken**) - **uol.com.br** - **uplynk** diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 451fee7164..868429ffb2 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2025.06.30' +__version__ = '2025.07.21' -RELEASE_GIT_HEAD = 'b0187844988e557c7e1e6bb1aabd4c1176768d86' +RELEASE_GIT_HEAD = '9951fdd0d08b655cb1af8cd7f32a3fb7e2b1324e' VARIANT = None @@ -12,4 +12,4 @@ CHANNEL = 'stable' ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2025.06.30' +_pkg_version = '2025.07.21' From 3e918d825d7ff367812658957b281b8cda8f9ebb Mon Sep 17 00:00:00 2001 From: Roland Crosby Date: Tue, 22 Jul 2025 13:50:42 -0400 Subject: [PATCH 49/81] [pp/XAttrMetadata] Add macOS "Where from" attribute (#12664) Authored by: rolandcrosby --- yt_dlp/postprocessor/xattrpp.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py index e486b797b7..fd83d783ba 100644 --- a/yt_dlp/postprocessor/xattrpp.py +++ b/yt_dlp/postprocessor/xattrpp.py @@ -33,8 +33,17 @@ class XAttrMetadataPP(PostProcessor): # (e.g., 4kB on ext4), and we don't want to have the other ones fail 'user.dublincore.description': 'description', # 'user.xdg.comment': 'description', + 'com.apple.metadata:kMDItemWhereFroms': 'webpage_url', } + APPLE_PLIST_TEMPLATE = ''' + + + +\t%s + +''' + def run(self, info): mtime = os.stat(info['filepath']).st_mtime self.to_screen('Writing metadata to file\'s xattrs') @@ -44,6 +53,8 @@ class XAttrMetadataPP(PostProcessor): if value: if infoname == 'upload_date': value = hyphenate_date(value) + elif xattrname == 'com.apple.metadata:kMDItemWhereFroms': + value = self.APPLE_PLIST_TEMPLATE % value write_xattr(info['filepath'], xattrname, value.encode()) except XAttrUnavailableError as e: From eed94c7306d4ecdba53ad8783b1463a9af5c97f1 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Tue, 22 Jul 2025 20:10:51 +0200 Subject: [PATCH 50/81] [utils] Add `WINDOWS_VT_MODE` to globals (#12460) Authored by: Grub4K --- test/test_compat.py | 3 --- yt_dlp/YoutubeDL.py | 4 ++-- yt_dlp/compat/_legacy.py | 2 +- yt_dlp/globals.py | 2 ++ yt_dlp/utils/_utils.py | 10 +++------- 5 files changed, 8 insertions(+), 13 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index b1cc2a8187..3aa9c0c518 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -21,9 +21,6 @@ class TestCompat(unittest.TestCase): with self.assertWarns(DeprecationWarning): _ = compat.compat_basestring - with self.assertWarns(DeprecationWarning): - _ = compat.WINDOWS_VT_MODE - self.assertEqual(urllib.request.getproxies, getproxies) with self.assertWarns(DeprecationWarning): diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 76fd18c338..a9f347bf4a 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -36,6 +36,7 @@ from .extractor.openload import PhantomJSwrapper from .globals import ( IN_CLI, LAZY_EXTRACTORS, + WINDOWS_VT_MODE, plugin_ies, plugin_ies_overrides, plugin_pps, @@ -4040,8 +4041,7 @@ class YoutubeDL: if os.environ.get('TERM', '').lower() == 'dumb': additional_info.append('dumb') if not supports_terminal_sequences(stream): - from .utils import WINDOWS_VT_MODE # Must be imported locally - additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI') + additional_info.append('No VT' if WINDOWS_VT_MODE.value is False else 'No ANSI') if additional_info: ret = f'{ret} ({",".join(additional_info)})' return ret diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index dae2c14592..2f3e35d4a8 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -37,7 +37,7 @@ from ..dependencies import websockets as compat_websockets # noqa: F401 from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 from ..networking.exceptions import HTTPError as compat_HTTPError -passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) +passthrough_module(__name__, '...utils', ('windows_enable_vt_mode',)) # compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE diff --git a/yt_dlp/globals.py b/yt_dlp/globals.py index 0cf276cc9e..81ad004480 100644 --- a/yt_dlp/globals.py +++ b/yt_dlp/globals.py @@ -1,3 +1,4 @@ +import os from collections import defaultdict # Please Note: Due to necessary changes and the complex nature involved in the plugin/globals system, @@ -28,3 +29,4 @@ plugin_ies_overrides = Indirect(defaultdict(list)) # Misc IN_CLI = Indirect(False) LAZY_EXTRACTORS = Indirect(None) # `False`=force, `None`=disabled, `True`=enabled +WINDOWS_VT_MODE = Indirect(False if os.name == 'nt' else None) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 7d79f417fa..1cb62712ba 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -52,7 +52,7 @@ from ..compat import ( compat_HTMLParseError, ) from ..dependencies import xattr -from ..globals import IN_CLI +from ..globals import IN_CLI, WINDOWS_VT_MODE __name__ = __name__.rsplit('.', 1)[0] # noqa: A001 # Pretend to be the parent module @@ -4759,13 +4759,10 @@ def jwt_decode_hs256(jwt): return json.loads(base64.urlsafe_b64decode(f'{payload_b64}===')) -WINDOWS_VT_MODE = False if os.name == 'nt' else None - - @functools.cache def supports_terminal_sequences(stream): if os.name == 'nt': - if not WINDOWS_VT_MODE: + if not WINDOWS_VT_MODE.value: return False elif not os.getenv('TERM'): return False @@ -4802,8 +4799,7 @@ def windows_enable_vt_mode(): finally: os.close(handle) - global WINDOWS_VT_MODE - WINDOWS_VT_MODE = True + WINDOWS_VT_MODE.value = True supports_terminal_sequences.cache_clear() From c59ad2b066bbccd3cc4eed580842f961bce7dd4a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 22 Jul 2025 16:34:03 -0500 Subject: [PATCH 51/81] [utils] `random_user_agent`: Bump versions (#13543) Closes #5362 Authored by: bashonly --- yt_dlp/extractor/adobepass.py | 8 ++---- yt_dlp/extractor/bilibili.py | 7 ----- yt_dlp/extractor/francaisfacile.py | 13 +-------- yt_dlp/extractor/mitele.py | 2 +- yt_dlp/extractor/sproutvideo.py | 2 +- yt_dlp/extractor/telecinco.py | 13 +-------- yt_dlp/utils/networking.py | 46 +++--------------------------- 7 files changed, 10 insertions(+), 81 deletions(-) diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 8c2d9d9340..eb45734ec0 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -48,7 +48,6 @@ MSO_INFO = { 'username_field': 'user', 'password_field': 'passwd', 'login_hostname': 'login.xfinity.com', - 'needs_newer_ua': True, }, 'TWC': { 'name': 'Time Warner Cable | Spectrum', @@ -1379,11 +1378,8 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en @staticmethod def _get_mso_headers(mso_info): - # yt-dlp's default user-agent is usually too old for some MSO's like Comcast_SSO - # See: https://github.com/yt-dlp/yt-dlp/issues/10848 - return { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0', - } if mso_info.get('needs_newer_ua') else {} + # Not needed currently + return {} @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 2846702f6a..d00ac63176 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -175,13 +175,6 @@ class BilibiliBaseIE(InfoExtractor): else: note = f'Downloading video formats for cid {cid}' - # TODO: remove this patch once utils.networking.random_user_agent() is updated, see #13735 - # playurl requests carrying old UA will be rejected - headers = { - 'User-Agent': f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(118,138)}.0.0.0 Safari/537.36', - **(headers or {}), - } - return self._download_json( 'https://api.bilibili.com/x/player/wbi/playurl', bvid, query=self._sign_wbi(params, bvid), headers=headers, note=note)['data'] diff --git a/yt_dlp/extractor/francaisfacile.py b/yt_dlp/extractor/francaisfacile.py index d3208c2828..c432cf486c 100644 --- a/yt_dlp/extractor/francaisfacile.py +++ b/yt_dlp/extractor/francaisfacile.py @@ -1,9 +1,7 @@ import urllib.parse from .common import InfoExtractor -from ..networking.exceptions import HTTPError from ..utils import ( - ExtractorError, float_or_none, url_or_none, ) @@ -58,16 +56,7 @@ class FrancaisFacileIE(InfoExtractor): def _real_extract(self, url): display_id = urllib.parse.unquote(self._match_id(url)) - - try: # yt-dlp's default user-agents are too old and blocked by the site - webpage = self._download_webpage(url, display_id, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0', - }) - except ExtractorError as e: - if not isinstance(e.cause, HTTPError) or e.cause.status != 403: - raise - # Retry with impersonation if hardcoded UA is insufficient - webpage = self._download_webpage(url, display_id, impersonate=True) + webpage = self._download_webpage(url, display_id) data = self._search_json( r']+\bdata-media-id=[^>]+\btype="application/json"[^>]*>', diff --git a/yt_dlp/extractor/mitele.py b/yt_dlp/extractor/mitele.py index 0dded38c65..76fef337a2 100644 --- a/yt_dlp/extractor/mitele.py +++ b/yt_dlp/extractor/mitele.py @@ -79,7 +79,7 @@ class MiTeleIE(TelecincoBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_akamai_webpage(url, display_id) + webpage = self._download_webpage(url, display_id) pre_player = self._search_json( r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=', webpage, 'Pre Player', display_id)['prePlayer'] diff --git a/yt_dlp/extractor/sproutvideo.py b/yt_dlp/extractor/sproutvideo.py index 494042738d..4afa838715 100644 --- a/yt_dlp/extractor/sproutvideo.py +++ b/yt_dlp/extractor/sproutvideo.py @@ -99,7 +99,7 @@ class SproutVideoIE(InfoExtractor): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) webpage = self._download_webpage( - url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}), impersonate=True) + url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'})) data = self._search_json( r'(?:var|const|let)\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', diff --git a/yt_dlp/extractor/telecinco.py b/yt_dlp/extractor/telecinco.py index 2dbe2a7768..a34f2afd4a 100644 --- a/yt_dlp/extractor/telecinco.py +++ b/yt_dlp/extractor/telecinco.py @@ -63,17 +63,6 @@ class TelecincoBaseIE(InfoExtractor): 'http_headers': headers, } - def _download_akamai_webpage(self, url, display_id): - try: # yt-dlp's default user-agents are too old and blocked by akamai - return self._download_webpage(url, display_id, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0', - }) - except ExtractorError as e: - if not isinstance(e.cause, HTTPError) or e.cause.status != 403: - raise - # Retry with impersonation if hardcoded UA is insufficient to bypass akamai - return self._download_webpage(url, display_id, impersonate=True) - class TelecincoIE(TelecincoBaseIE): IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' @@ -151,7 +140,7 @@ class TelecincoIE(TelecincoBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_akamai_webpage(url, display_id) + webpage = self._download_webpage(url, display_id) article = self._search_json( r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=', webpage, 'article', display_id)['article'] diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py index 9fcab6456f..467312ce75 100644 --- a/yt_dlp/utils/networking.py +++ b/yt_dlp/utils/networking.py @@ -15,48 +15,10 @@ from .traversal import traverse_obj def random_user_agent(): - _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' - _CHROME_VERSIONS = ( - '90.0.4430.212', - '90.0.4430.24', - '90.0.4430.70', - '90.0.4430.72', - '90.0.4430.85', - '90.0.4430.93', - '91.0.4472.101', - '91.0.4472.106', - '91.0.4472.114', - '91.0.4472.124', - '91.0.4472.164', - '91.0.4472.19', - '91.0.4472.77', - '92.0.4515.107', - '92.0.4515.115', - '92.0.4515.131', - '92.0.4515.159', - '92.0.4515.43', - '93.0.4556.0', - '93.0.4577.15', - '93.0.4577.63', - '93.0.4577.82', - '94.0.4606.41', - '94.0.4606.54', - '94.0.4606.61', - '94.0.4606.71', - '94.0.4606.81', - '94.0.4606.85', - '95.0.4638.17', - '95.0.4638.50', - '95.0.4638.54', - '95.0.4638.69', - '95.0.4638.74', - '96.0.4664.18', - '96.0.4664.45', - '96.0.4664.55', - '96.0.4664.93', - '97.0.4692.20', - ) - return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) + USER_AGENT_TMPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{} Safari/537.36' + # Target versions released within the last ~6 months + CHROME_MAJOR_VERSION_RANGE = (132, 138) + return USER_AGENT_TMPL.format(f'{random.randint(*CHROME_MAJOR_VERSION_RANGE)}.0.0.0') class HTTPHeaderDict(dict): From 59765ecbc08d18005de7143fbb1d1caf90239471 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 22 Jul 2025 16:46:46 -0500 Subject: [PATCH 52/81] [ie/sproutvideo] Fix extractor (#13813) Authored by: bashonly --- yt_dlp/extractor/sproutvideo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/sproutvideo.py b/yt_dlp/extractor/sproutvideo.py index 4afa838715..ff9dc7dee2 100644 --- a/yt_dlp/extractor/sproutvideo.py +++ b/yt_dlp/extractor/sproutvideo.py @@ -101,8 +101,8 @@ class SproutVideoIE(InfoExtractor): webpage = self._download_webpage( url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'})) data = self._search_json( - r'(?:var|const|let)\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id, - contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', + r'(?:var|const|let)\s+(?:dat|(?:player|video)Info|)\s*=\s*["\']', webpage, 'player info', + video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode()) # SproutVideo may send player info for 'SMPTE Color Monitor Test' [a791d7b71b12ecc52e] From 7e3f48d64d237281a97b3df1a61980c78a0302fe Mon Sep 17 00:00:00 2001 From: Atsushi2965 <142886283+atsushi2965@users.noreply.github.com> Date: Wed, 23 Jul 2025 06:55:00 +0900 Subject: [PATCH 53/81] [pp/EmbedThumbnail] Fix ffmpeg args for embedding in mp3 (#13720) Authored by: atsushi2965 --- yt_dlp/postprocessor/embedthumbnail.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index d8ba220cab..39e8826c6f 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -90,7 +90,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): if info['ext'] == 'mp3': options = [ '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3', - '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)'] + '-metadata:s:v', 'title=Album cover', '-metadata:s:v', 'comment=Cover (front)'] self._report_run('ffmpeg', filename) self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options) From afaf60d9fd5a0c7a85aeb1374fd97fbc13cd652c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 23 Jul 2025 18:27:20 -0500 Subject: [PATCH 54/81] [ie/vimeo] Fix login support and require authentication (#13823) Closes #13822 Authored by: bashonly --- README.md | 2 +- yt_dlp/extractor/vimeo.py | 59 +++++++++++++++++++++++++++------------ 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index f1d119317c..e5bd21b9ca 100644 --- a/README.md +++ b/README.md @@ -1902,7 +1902,7 @@ The following extractors use this feature: * `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated) #### vimeo -* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `android` client is used by default. If account cookies or credentials are used for authentication, then the `web` client is used by default. The `web` client only works with authentication. The `ios` client only works with previously cached OAuth tokens +* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `web` client is used by default. The `web` client only works with account cookies or login credentials. The `android` and `ios` clients only work with previously cached OAuth tokens * `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the web client's API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability **Note**: These options may be changed/removed in the future without concern for backward compatibility diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 7ffe89f227..c45264bb52 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -49,7 +49,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'Cannot download embed-only video without embedding URL. Please call yt-dlp ' 'with the URL of the page that embeds this video.') - _DEFAULT_CLIENT = 'android' + _DEFAULT_CLIENT = 'web' _DEFAULT_AUTHED_CLIENT = 'web' _CLIENT_HEADERS = { 'Accept': 'application/vnd.vimeo.*+json; version=3.4.10', @@ -58,7 +58,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): _CLIENT_CONFIGS = { 'android': { 'CACHE_KEY': 'oauth-token-android', - 'CACHE_ONLY': False, + 'CACHE_ONLY': True, 'VIEWER_JWT': False, 'REQUIRES_AUTH': False, 'AUTH': 'NzRmYTg5YjgxMWExY2JiNzUwZDg1MjhkMTYzZjQ4YWYyOGEyZGJlMTp4OGx2NFd3QnNvY1lkamI2UVZsdjdDYlNwSDUrdm50YzdNNThvWDcwN1JrenJGZC9tR1lReUNlRjRSVklZeWhYZVpRS0tBcU9YYzRoTGY2Z1dlVkJFYkdJc0dMRHpoZWFZbU0reDRqZ1dkZ1diZmdIdGUrNUM5RVBySlM0VG1qcw==', @@ -88,6 +88,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): ), }, 'web': { + 'CACHE_ONLY': False, 'VIEWER_JWT': True, 'REQUIRES_AUTH': True, 'USER_AGENT': None, @@ -142,7 +143,6 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'service': 'vimeo', 'token': viewer['xsrft'], } - self._set_vimeo_cookie('vuid', viewer['vuid']) try: self._download_webpage( self._LOGIN_URL, None, 'Logging in', @@ -151,16 +151,40 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'Referer': self._LOGIN_URL, }) except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 418: + if isinstance(e.cause, HTTPError) and e.cause.status in (405, 418): raise ExtractorError( 'Unable to log in: bad username or password', expected=True) raise ExtractorError('Unable to log in') + # Clear unauthenticated viewer info + self._viewer_info = None + def _real_initialize(self): - if self._LOGIN_REQUIRED and not self._is_logged_in: + if self._is_logged_in: + return + + if self._LOGIN_REQUIRED: self.raise_login_required() + if self._DEFAULT_CLIENT != 'web': + return + + for client_name, client_config in self._CLIENT_CONFIGS.items(): + if not client_config['CACHE_ONLY']: + continue + + cache_key = client_config['CACHE_KEY'] + if cache_key not in self._oauth_tokens: + if token := self.cache.load(self._NETRC_MACHINE, cache_key): + self._oauth_tokens[cache_key] = token + + if self._oauth_tokens.get(cache_key): + self._DEFAULT_CLIENT = client_name + self.write_debug( + f'Found cached {client_name} token; using {client_name} as default API client') + return + def _get_video_password(self): password = self.get_param('videopassword') if password is None: @@ -200,9 +224,6 @@ class VimeoBaseInfoExtractor(InfoExtractor): if vimeo_config: return self._parse_json(vimeo_config, video_id) - def _set_vimeo_cookie(self, name, value): - self._set_cookie('vimeo.com', name, value) - def _parse_config(self, config, video_id): video_data = config['video'] video_title = video_data.get('title') @@ -363,22 +384,26 @@ class VimeoBaseInfoExtractor(InfoExtractor): return f'Bearer {self._oauth_tokens[cache_key]}' def _get_requested_client(self): - default_client = self._DEFAULT_AUTHED_CLIENT if self._is_logged_in else self._DEFAULT_CLIENT + if client := self._configuration_arg('client', [None], ie_key=VimeoIE)[0]: + if client not in self._CLIENT_CONFIGS: + raise ExtractorError( + f'Unsupported API client "{client}" requested. ' + f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True) + self.write_debug( + f'Using {client} API client as specified by extractor argument', only_once=True) + return client - client = self._configuration_arg('client', [default_client], ie_key=VimeoIE)[0] - if client not in self._CLIENT_CONFIGS: - raise ExtractorError( - f'Unsupported API client "{client}" requested. ' - f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True) + if self._is_logged_in: + return self._DEFAULT_AUTHED_CLIENT - return client + return self._DEFAULT_CLIENT def _call_videos_api(self, video_id, unlisted_hash=None, path=None, *, force_client=None, query=None, **kwargs): client = force_client or self._get_requested_client() client_config = self._CLIENT_CONFIGS[client] if client_config['REQUIRES_AUTH'] and not self._is_logged_in: - self.raise_login_required(f'The {client} client requires authentication') + self.raise_login_required(f'The {client} client only works when logged-in') return self._download_json( join_nonempty( @@ -1192,7 +1217,6 @@ class VimeoIE(VimeoBaseInfoExtractor): raise ExtractorError( 'This album is protected by a password, use the --video-password option', expected=True) - self._set_vimeo_cookie('vuid', viewer['vuid']) try: self._download_json( f'https://vimeo.com/showcase/{album_id}/auth', @@ -1589,7 +1613,6 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor): raise ExtractorError( 'This album is protected by a password, use the --video-password option', expected=True) - self._set_vimeo_cookie('vuid', viewer['vuid']) try: hashed_pass = self._download_json( f'https://vimeo.com/showcase/{album_id}/auth', From 0adeb1e54b2d7e95cd19999e71013877850f8f41 Mon Sep 17 00:00:00 2001 From: ischmidt20 Date: Thu, 24 Jul 2025 18:35:48 -0400 Subject: [PATCH 55/81] [ie/tbs] Fix truTV support (#9683) Closes #3400 Authored by: ischmidt20, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/tbs.py | 113 ++++++++++++++++++++++++++------ yt_dlp/extractor/trutv.py | 71 -------------------- yt_dlp/extractor/turner.py | 5 ++ 4 files changed, 97 insertions(+), 93 deletions(-) delete mode 100644 yt_dlp/extractor/trutv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 59a61e0604..1aa2927f8f 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2166,7 +2166,6 @@ from .trtworld import TrtWorldIE from .trueid import TrueIDIE from .trunews import TruNewsIE from .truth import TruthIE -from .trutv import TruTVIE from .tube8 import Tube8IE from .tubetugraz import ( TubeTuGrazIE, diff --git a/yt_dlp/extractor/tbs.py b/yt_dlp/extractor/tbs.py index 80534731e1..f8891671f1 100644 --- a/yt_dlp/extractor/tbs.py +++ b/yt_dlp/extractor/tbs.py @@ -5,45 +5,110 @@ from .turner import TurnerBaseIE from ..utils import ( float_or_none, int_or_none, + make_archive_id, strip_or_none, ) +from ..utils.traversal import traverse_obj class TBSIE(TurnerBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?Ptbs|tntdrama)\.com(?P/(?:movies|watchtnt|watchtbs|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P[^/?#]+))' + _SITE_INFO = { + 'tbs': ('TBS', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJkZTA0NTYxZS1iMTFhLTRlYTgtYTg5NC01NjI3MGM1NmM2MWIiLCJuYmYiOjE1MzcxODkzOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTg5MzkwfQ.Z7ny66kaqNDdCHf9Y9KsV12LrBxrLkGGxlYe2XGm6qsw2T-k1OCKC1TMzeqiZP735292MMRAQkcJDKrMIzNbAuf9nCdIcv4kE1E2nqUnjPMBduC1bHffZp8zlllyrN2ElDwM8Vhwv_5nElLRwWGEt0Kaq6KJAMZA__WDxKWC18T-wVtsOZWXQpDqO7nByhfj2t-Z8c3TUNVsA_wHgNXlkzJCZ16F2b7yGLT5ZhLPupOScd3MXC5iPh19HSVIok22h8_F_noTmGzmMnIRQi6bWYWK2zC7TQ_MsYHfv7V6EaG5m1RKZTV6JAwwoJQF_9ByzarLV1DGwZxD9-eQdqswvg'), + 'tntdrama': ('TNT', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwOTMxYTU4OS1jZjEzLTRmNjMtYTJmYy03MzhjMjE1NWU5NjEiLCJuYmYiOjE1MzcxOTA4MjcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODI3fQ.AucKvtws7oekTXi80_zX4-BlgJD9GLvlOI9FlBCjdlx7Pa3eJ0AqbogynKMiatMbnLOTMHGjd7tTiq422unmZjBz70dhePAe9BbW0dIo7oQ57vZ-VBYw_tWYRPmON61MwAbLVlqROD3n_zURs85S8TlkQx9aNx9x_riGGELjd8l05CVa_pOluNhYvuIFn6wmrASOKI1hNEblBDWh468UWP571-fe4zzi0rlYeeHd-cjvtWvOB3bQsWrUVbK4pRmqvzEH59j0vNF-ihJF9HncmUicYONe47Mib3elfMok23v4dB1_UAlQY_oawfNcynmEnJQCcqFmbHdEwTW6gMiYsA'), + 'trutv': ('truTV', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhYzQyOTkwMi0xMDYzLTQyNTQtYWJlYS1iZTY2ODM4MTVmZGIiLCJuYmYiOjE1MzcxOTA4NjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODY4fQ.ewXl5LDMDvvx3nDXV4jCdSwUq_sOluKoOVsIjznAo6Zo4zrGe9rjlZ9DOmQKW66g6VRMexJsJ5vM1EkY8TC5-YcQw_BclK1FPGO1rH3Wf7tX_l0b1BVbSJQKIj9UgqDp_QbGcBXz24kN4So3U22mhs6di9PYyyfG68ccKL2iRprcVKWCslIHwUF-T7FaEqb0K57auilxeW1PONG2m-lIAcZ62DUwqXDWvw0CRoWI08aVVqkkhnXaSsQfLs5Ph1Pfh9Oq3g_epUm9Ss45mq6XM7gbOb5omTcKLADRKK-PJVB_JXnZnlsXbG0ttKE1cTKJ738qu7j4aipYTf-W0nKF5Q'), + } + _VALID_URL = fr'''(?x) + https?://(?:www\.)?(?P{"|".join(map(re.escape, _SITE_INFO))})\.com + (?P/(?: + (?Pwatch(?:tnt|tbs|trutv))| + movies|shows/[^/?#]+/(?:clips|season-\d+/episode-\d+) + )/(?P[^/?#]+)) + ''' _TESTS = [{ - 'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster', + 'url': 'https://www.tbs.com/shows/american-dad/season-6/episode-12/you-debt-your-life', 'info_dict': { - 'id': '8d384cde33b89f3a43ce5329de42903ed5099887', + 'id': '984bdcd8db0cc00dc699927f2a411c8c6e0e48f3', 'ext': 'mp4', - 'title': 'Monster', - 'description': 'Get a first look at the theatrical trailer for TNT’s highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.', - 'timestamp': 1508175329, - 'upload_date': '20171016', + 'title': 'You Debt Your Life', + 'description': 'md5:f211cfeb9187fd3cdb53eb0e8930d499', + 'duration': 1231.0, + 'thumbnail': r're:https://images\.tbs\.com/tbs/.+\.(?:jpe?g|png)', + 'chapters': 'count:4', + 'season': 'Season 6', + 'season_number': 6, + 'episode': 'Episode 12', + 'episode_number': 12, + 'timestamp': 1478276239, + 'upload_date': '20161104', }, - 'params': { - # m3u8 download - 'skip_download': True, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.tntdrama.com/shows/the-librarians-the-next-chapter/season-1/episode-10/and-going-medieval', + 'info_dict': { + 'id': 'e487b31b663a8001864f62fd20907782f7b8ccb8', + 'ext': 'mp4', + 'title': 'And Going Medieval', + 'description': 'md5:5aed0ae23a6cf148a02fe3c1be8359fa', + 'duration': 2528.0, + 'thumbnail': r're:https://images\.tntdrama\.com/tnt/.+\.(?:jpe?g|png)', + 'chapters': 'count:7', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 10', + 'episode_number': 10, + 'timestamp': 1743107520, + 'upload_date': '20250327', }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.trutv.com/shows/the-carbonaro-effect/season-1/episode-1/got-the-bug-out', + 'info_dict': { + 'id': 'b457dd7458fd9e64b596355950b13a1ca799dc39', + 'ext': 'mp4', + 'title': 'Got the Bug Out', + 'description': 'md5:9eeddf6248f73517b0e5969b8a43c025', + 'duration': 1283.0, + 'thumbnail': r're:https://images\.trutv\.com/tru/.+\.(?:jpe?g|png)', + 'chapters': 'count:4', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 1', + 'episode_number': 1, + 'timestamp': 1570040829, + 'upload_date': '20191002', + '_old_archive_ids': ['trutv b457dd7458fd9e64b596355950b13a1ca799dc39'], + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster', + 'only_matching': True, }, { 'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew', 'only_matching': True, }, { 'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope', 'only_matching': True, + }, { + 'url': 'https://www.trutv.com/shows/impractical-jokers/season-9/episode-1/you-dirty-dog', + 'only_matching': True, + }, { + 'url': 'https://www.trutv.com/watchtrutv/east', + 'only_matching': True, + }, { + 'url': 'https://www.tbs.com/watchtbs/east', + 'only_matching': True, + }, { + 'url': 'https://www.tntdrama.com/watchtnt/east', + 'only_matching': True, }] - _SOFTWARE_STATEMENT_MAP = { - 'tbs': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJkZTA0NTYxZS1iMTFhLTRlYTgtYTg5NC01NjI3MGM1NmM2MWIiLCJuYmYiOjE1MzcxODkzOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTg5MzkwfQ.Z7ny66kaqNDdCHf9Y9KsV12LrBxrLkGGxlYe2XGm6qsw2T-k1OCKC1TMzeqiZP735292MMRAQkcJDKrMIzNbAuf9nCdIcv4kE1E2nqUnjPMBduC1bHffZp8zlllyrN2ElDwM8Vhwv_5nElLRwWGEt0Kaq6KJAMZA__WDxKWC18T-wVtsOZWXQpDqO7nByhfj2t-Z8c3TUNVsA_wHgNXlkzJCZ16F2b7yGLT5ZhLPupOScd3MXC5iPh19HSVIok22h8_F_noTmGzmMnIRQi6bWYWK2zC7TQ_MsYHfv7V6EaG5m1RKZTV6JAwwoJQF_9ByzarLV1DGwZxD9-eQdqswvg', - 'tntdrama': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwOTMxYTU4OS1jZjEzLTRmNjMtYTJmYy03MzhjMjE1NWU5NjEiLCJuYmYiOjE1MzcxOTA4MjcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODI3fQ.AucKvtws7oekTXi80_zX4-BlgJD9GLvlOI9FlBCjdlx7Pa3eJ0AqbogynKMiatMbnLOTMHGjd7tTiq422unmZjBz70dhePAe9BbW0dIo7oQ57vZ-VBYw_tWYRPmON61MwAbLVlqROD3n_zURs85S8TlkQx9aNx9x_riGGELjd8l05CVa_pOluNhYvuIFn6wmrASOKI1hNEblBDWh468UWP571-fe4zzi0rlYeeHd-cjvtWvOB3bQsWrUVbK4pRmqvzEH59j0vNF-ihJF9HncmUicYONe47Mib3elfMok23v4dB1_UAlQY_oawfNcynmEnJQCcqFmbHdEwTW6gMiYsA', - } def _real_extract(self, url): - site, path, display_id = self._match_valid_url(url).groups() + site, path, display_id, watch = self._match_valid_url(url).group('site', 'path', 'id', 'watch') + is_live = bool(watch) webpage = self._download_webpage(url, display_id) - drupal_settings = self._parse_json(self._search_regex( - r']+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})', - webpage, 'drupal setting'), display_id) - is_live = 'watchtnt' in path or 'watchtbs' in path + drupal_settings = self._search_json( + r']+\bdata-drupal-selector="drupal-settings-json"[^>]*>', + webpage, 'drupal settings', display_id) video_data = next(v for v in drupal_settings['turner_playlist'] if is_live or v.get('url') == path) media_id = video_data['mediaID'] @@ -51,10 +116,14 @@ class TBSIE(TurnerBaseIE): tokenizer_query = urllib.parse.parse_qs(urllib.parse.urlparse( drupal_settings['ngtv_token_url']).query) + auth_info = traverse_obj(drupal_settings, ('top2', {dict})) or {} + site_name = auth_info.get('siteName') or self._SITE_INFO[site][0] + software_statement = auth_info.get('softwareStatement') or self._SITE_INFO[site][1] + info = self._extract_ngtv_info( - media_id, tokenizer_query, self._SOFTWARE_STATEMENT_MAP[site], { + media_id, tokenizer_query, software_statement, { 'url': url, - 'site_name': site[:3].upper(), + 'site_name': site_name, 'auth_required': video_data.get('authRequired') == '1' or is_live, 'is_live': is_live, }) @@ -87,4 +156,6 @@ class TBSIE(TurnerBaseIE): 'thumbnails': thumbnails, 'is_live': is_live, }) + if site == 'trutv': + info['_old_archive_ids'] = [make_archive_id(site, media_id)] return info diff --git a/yt_dlp/extractor/trutv.py b/yt_dlp/extractor/trutv.py deleted file mode 100644 index c1d0cb0d14..0000000000 --- a/yt_dlp/extractor/trutv.py +++ /dev/null @@ -1,71 +0,0 @@ -from .turner import TurnerBaseIE -from ..utils import ( - int_or_none, - parse_iso8601, -) - - -class TruTVIE(TurnerBaseIE): - _VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P[0-9A-Za-z-]+)/(?:videos/(?P[0-9A-Za-z-]+)|(?P\d+))' - _TEST = { - 'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html', - 'info_dict': { - 'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1', - 'ext': 'mp4', - 'title': 'Sunlight-Activated Flower', - 'description': "A customer is stunned when he sees Michael's sunlight-activated flower.", - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - _SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhYzQyOTkwMi0xMDYzLTQyNTQtYWJlYS1iZTY2ODM4MTVmZGIiLCJuYmYiOjE1MzcxOTA4NjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODY4fQ.ewXl5LDMDvvx3nDXV4jCdSwUq_sOluKoOVsIjznAo6Zo4zrGe9rjlZ9DOmQKW66g6VRMexJsJ5vM1EkY8TC5-YcQw_BclK1FPGO1rH3Wf7tX_l0b1BVbSJQKIj9UgqDp_QbGcBXz24kN4So3U22mhs6di9PYyyfG68ccKL2iRprcVKWCslIHwUF-T7FaEqb0K57auilxeW1PONG2m-lIAcZ62DUwqXDWvw0CRoWI08aVVqkkhnXaSsQfLs5Ph1Pfh9Oq3g_epUm9Ss45mq6XM7gbOb5omTcKLADRKK-PJVB_JXnZnlsXbG0ttKE1cTKJ738qu7j4aipYTf-W0nKF5Q' - - def _real_extract(self, url): - series_slug, clip_slug, video_id = self._match_valid_url(url).groups() - - if video_id: - path = 'episode' - display_id = video_id - else: - path = 'series/clip' - display_id = clip_slug - - data = self._download_json( - f'https://api.trutv.com/v2/web/{path}/{series_slug}/{display_id}', - display_id) - video_data = data['episode'] if video_id else data['info'] - media_id = video_data['mediaId'] - title = video_data['title'].strip() - - info = self._extract_ngtv_info( - media_id, {}, self._SOFTWARE_STATEMENT, { - 'url': url, - 'site_name': 'truTV', - 'auth_required': video_data.get('isAuthRequired'), - }) - - thumbnails = [] - for image in video_data.get('images', []): - image_url = image.get('srcUrl') - if not image_url: - continue - thumbnails.append({ - 'url': image_url, - 'width': int_or_none(image.get('width')), - 'height': int_or_none(image.get('height')), - }) - - info.update({ - 'id': media_id, - 'display_id': display_id, - 'title': title, - 'description': video_data.get('description'), - 'thumbnails': thumbnails, - 'timestamp': parse_iso8601(video_data.get('publicationDate')), - 'series': video_data.get('showTitle'), - 'season_number': int_or_none(video_data.get('seasonNum')), - 'episode_number': int_or_none(video_data.get('episodeNum')), - }) - return info diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py index 4493705e99..a1a7fd6906 100644 --- a/yt_dlp/extractor/turner.py +++ b/yt_dlp/extractor/turner.py @@ -251,6 +251,11 @@ class TurnerBaseIE(AdobePassIE): 'end_time': start_time + chapter_duration, }) + if is_live: + for f in formats: + # Prevent ffmpeg from adding its own http headers or else we get HTTP Error 403 + f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-icy', '0']} + return { 'formats': formats, 'chapters': chapters, From 485de69dbfeb7de7bcf9f7fe16d6c6ba9e81e1a0 Mon Sep 17 00:00:00 2001 From: Barry van Oudtshoorn Date: Fri, 25 Jul 2025 12:00:31 +0800 Subject: [PATCH 56/81] [ie/Parlview] Rework extractor (#13788) Closes #13787 Authored by: barryvan --- yt_dlp/extractor/parlview.py | 80 ++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/yt_dlp/extractor/parlview.py b/yt_dlp/extractor/parlview.py index b93b5edacd..9c7efc58f4 100644 --- a/yt_dlp/extractor/parlview.py +++ b/yt_dlp/extractor/parlview.py @@ -1,63 +1,63 @@ +import re + from .common import InfoExtractor -from ..utils import ( - int_or_none, - try_get, - unified_timestamp, -) +from ..utils import parse_duration, parse_iso8601, url_or_none +from ..utils.traversal import traverse_obj class ParlviewIE(InfoExtractor): - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?parlview\.aph\.gov\.au/(?:[^/]+)?\bvideoID=(?P\d{6})' + _VALID_URL = r'https?://(?:www\.)?aph\.gov\.au/News_and_Events/Watch_Read_Listen/ParlView/video/(?P[^/?#]+)' _TESTS = [{ - 'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=542661', + 'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/3406614', 'info_dict': { - 'id': '542661', + 'id': '3406614', 'ext': 'mp4', - 'title': "Australia's Family Law System [Part 2]", - 'duration': 5799, - 'description': 'md5:7099883b391619dbae435891ca871a62', - 'timestamp': 1621430700, - 'upload_date': '20210519', - 'uploader': 'Joint Committee', + 'title': 'Senate Chamber', + 'description': 'Official Recording of Senate Proceedings from the Australian Parliament', + 'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/SenateParlview06.jpg', + 'upload_date': '20250325', + 'duration': 17999, + 'timestamp': 1742939400, }, 'params': { 'skip_download': True, }, }, { - 'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=539936', - 'only_matching': True, + 'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/SV1394.dv', + 'info_dict': { + 'id': 'SV1394.dv', + 'ext': 'mp4', + 'title': 'Senate Select Committee on Uranium Mining and Milling [Part 1]', + 'description': 'Official Recording of Senate Committee Proceedings from the Australian Parliament', + 'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/CommitteeThumbnail06.jpg', + 'upload_date': '19960822', + 'duration': 14765, + 'timestamp': 840754200, + }, + 'params': { + 'skip_download': True, + }, }] - _API_URL = 'https://parlview.aph.gov.au/api_v3/1/playback/getUniversalPlayerConfig?videoID=%s&format=json' - _MEDIA_INFO_URL = 'https://parlview.aph.gov.au/ajaxPlayer.php?videoID=%s&tabNum=4&action=loadTab' def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - media = self._download_json(self._API_URL % video_id, video_id).get('media') - timestamp = try_get(media, lambda x: x['timeMap']['source']['timecode_offsets'][0], str) or '/' + video_details = self._download_json( + f'https://vodapi.aph.gov.au/api/search/parlview/{video_id}', video_id)['videoDetails'] - stream = try_get(media, lambda x: x['renditions'][0], dict) - if not stream: - self.raise_no_formats('No streams were detected') - elif stream.get('streamType') != 'VOD': - self.raise_no_formats('Unknown type of stream was detected: "{}"'.format(str(stream.get('streamType')))) - formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native') + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + video_details['files']['file']['url'], video_id, 'mp4') - media_info = self._download_webpage( - self._MEDIA_INFO_URL % video_id, video_id, note='Downloading media info', fatal=False) + DURATION_RE = re.compile(r'(?P\d+:\d+:\d+):\d+') return { 'id': video_id, - 'url': url, - 'title': self._html_search_regex(r'

([^<]+)<', webpage, 'title', fatal=False), 'formats': formats, - 'duration': int_or_none(media.get('duration')), - 'timestamp': unified_timestamp(timestamp.split('/', 1)[1].replace('_', ' ')), - 'description': self._html_search_regex( - r']+class="descripti?on"[^>]*>[^>]+[^>]+>[^>]+>([^<]+)', - webpage, 'description', fatal=False), - 'uploader': self._html_search_regex( - r'[^>]+>Channel:[^>]+>([^<]+)', media_info, 'channel', fatal=False), - 'thumbnail': media.get('staticImage'), + 'subtitles': subtitles, + **traverse_obj(video_details, { + 'title': (('parlViewTitle', 'title'), {str}, any), + 'description': ('parlViewDescription', {str}), + 'duration': ('files', 'file', 'duration', {DURATION_RE.fullmatch}, 'duration', {parse_duration}), + 'timestamp': ('recordingFrom', {parse_iso8601}), + 'thumbnail': ('thumbUrl', {url_or_none}), + }), } From 4385480795acda35667be008d0bf26b46e9d65b4 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Sat, 26 Jul 2025 03:41:21 +0900 Subject: [PATCH 57/81] [utils] `parse_resolution`: Support width-only pattern (#13802) Authored by: doe1080 --- test/test_utils.py | 1 + yt_dlp/utils/_utils.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index aedb565ec1..44747efda6 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1373,6 +1373,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('ep1x2'), {}) self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('1920w', lenient=True), {'width': 1920}) def test_parse_bitrate(self): self.assertEqual(parse_bitrate(None), None) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 1cb62712ba..a5471da4df 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -1875,6 +1875,11 @@ def parse_resolution(s, *, lenient=False): if mobj: return {'height': int(mobj.group(1)) * 540} + if lenient: + mobj = re.search(r'(? Date: Fri, 25 Jul 2025 20:55:41 +0200 Subject: [PATCH 58/81] [ie/PlyrEmbed] Add extractor (#13836) Closes #13827 Authored by: seproDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/plyr.py | 104 ++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 yt_dlp/extractor/plyr.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 1aa2927f8f..a2042557d7 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1568,6 +1568,7 @@ from .pluralsight import ( ) from .plutotv import PlutoTVIE from .plvideo import PlVideoIE +from .plyr import PlyrEmbedIE from .podbayfm import ( PodbayFMChannelIE, PodbayFMIE, diff --git a/yt_dlp/extractor/plyr.py b/yt_dlp/extractor/plyr.py new file mode 100644 index 0000000000..c5f27cfd95 --- /dev/null +++ b/yt_dlp/extractor/plyr.py @@ -0,0 +1,104 @@ +import re + +from .common import InfoExtractor +from .vimeo import VimeoIE + + +class PlyrEmbedIE(InfoExtractor): + _VALID_URL = False + _WEBPAGE_TESTS = [{ + # data-plyr-embed-id="https://player.vimeo.com/video/522319456/90e5c96063?dnt=1" + 'url': 'https://www.dhm.de/zeughauskino/filmreihen/online-filmreihen/filme-des-marshall-plans/200000000-mouths/', + 'info_dict': { + 'id': '522319456', + 'ext': 'mp4', + 'title': '200.000.000 Mouths (1950–51)', + 'uploader': 'Zeughauskino', + 'uploader_url': '', + 'comment_count': int, + 'like_count': int, + 'duration': 963, + 'thumbnail': 'https://i.vimeocdn.com/video/1081797161-9f09ddb4b7faa86e834e006b8e4b9c2cbaa0baa7da493211bf0796ae133a5ab8-d', + 'timestamp': 1615467405, + 'upload_date': '20210311', + 'release_timestamp': 1615467405, + 'release_date': '20210311', + }, + 'params': {'skip_download': 'm3u8'}, + 'expected_warnings': ['Failed to parse XML: not well-formed'], + }, { + # data-plyr-provider="vimeo" data-plyr-embed-id="803435276" + 'url': 'https://www.inarcassa.it/', + 'info_dict': { + 'id': '803435276', + 'ext': 'mp4', + 'title': 'HOME_Moto_Perpetuo', + 'uploader': 'Inarcassa', + 'uploader_url': '', + 'duration': 38, + 'thumbnail': 'https://i.vimeocdn.com/video/1663734769-945ad7ffabb16dbca009c023fd1d7b36bdb426a3dbae8345ed758136fe28f89a-d', + }, + 'params': {'skip_download': 'm3u8'}, + 'expected_warnings': ['Failed to parse XML: not well-formed'], + }, { + # data-plyr-embed-id="https://youtu.be/GF-BjYKoAqI" + 'url': 'https://www.profile.nl', + 'info_dict': { + 'id': 'GF-BjYKoAqI', + 'ext': 'mp4', + 'title': 'PROFILE: Recruitment Profile', + 'description': '', + 'media_type': 'video', + 'uploader': 'Profile Nederland', + 'uploader_id': '@profilenederland', + 'uploader_url': 'https://www.youtube.com/@profilenederland', + 'channel': 'Profile Nederland', + 'channel_id': 'UC9AUkB0Tv39-TBYjs05n3vg', + 'channel_url': 'https://www.youtube.com/channel/UC9AUkB0Tv39-TBYjs05n3vg', + 'channel_follower_count': int, + 'view_count': int, + 'like_count': int, + 'age_limit': 0, + 'duration': 39, + 'thumbnail': 'https://i.ytimg.com/vi/GF-BjYKoAqI/maxresdefault.jpg', + 'categories': ['Autos & Vehicles'], + 'tags': [], + 'timestamp': 1675692990, + 'upload_date': '20230206', + 'playable_in_embed': True, + 'availability': 'public', + 'live_status': 'not_live', + }, + }, { + # data-plyr-embed-id="B1TZV8rNZoc" data-plyr-provider="youtube" + 'url': 'https://www.vnis.edu.vn', + 'info_dict': { + 'id': 'vnis.edu', + 'title': 'VNIS Education - Master Agent các Trường hàng đầu Bắc Mỹ', + 'description': 'md5:4dafcf7335bb018780e4426da8ab8e4e', + 'age_limit': 0, + 'thumbnail': 'https://vnis.edu.vn/wp-content/uploads/2021/05/ve-welcome-en.png', + 'timestamp': 1753233356, + 'upload_date': '20250723', + }, + 'playlist_count': 3, + }] + + @classmethod + def _extract_embed_urls(cls, url, webpage): + plyr_embeds = re.finditer(r'''(?x) + ]+(?: + data-plyr-embed-id="(?P[^"]+)"[^>]+data-plyr-provider="(?P[^"]+)"| + data-plyr-provider="(?P[^"]+)"[^>]+data-plyr-embed-id="(?P[^"]+)" + )[^>]*>''', webpage) + for mobj in plyr_embeds: + embed_id = mobj.group('id1') or mobj.group('id2') + provider = mobj.group('provider1') or mobj.group('provider2') + if provider == 'vimeo': + if not re.match(r'https?://', embed_id): + embed_id = f'https://player.vimeo.com/video/{embed_id}' + yield VimeoIE._smuggle_referrer(embed_id, url) + elif provider == 'youtube': + if not re.match(r'https?://', embed_id): + embed_id = f'https://youtube.com/watch?v={embed_id}' + yield embed_id From d399505fdf8292332bdc91d33859a0b0d08104fd Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 25 Jul 2025 14:44:39 -0500 Subject: [PATCH 59/81] [fd/external] Work around ffmpeg's `file:` URL handling (#13844) Closes #13781 Authored by: bashonly --- yt_dlp/downloader/external.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index ee73ac043e..65ed83991b 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -572,7 +572,21 @@ class FFmpegFD(ExternalFD): if end_time: args += ['-t', str(end_time - start_time)] - args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']] + url = fmt['url'] + if self.params.get('enable_file_urls') and url.startswith('file:'): + # The default protocol_whitelist is 'file,crypto,data' when reading local m3u8 URLs, + # so only local segments can be read unless we also include 'http,https,tcp,tls' + args += ['-protocol_whitelist', 'file,crypto,data,http,https,tcp,tls'] + # ffmpeg incorrectly handles 'file:' URLs by only removing the + # 'file:' prefix and treating the rest as if it's a normal filepath. + # FFmpegPostProcessor also depends on this behavior, so we need to fixup the URLs: + # - On Windows/Cygwin, replace 'file:///' and 'file://localhost/' with 'file:' + # - On *nix, replace 'file://localhost/' with 'file:/' + # Ref: https://github.com/yt-dlp/yt-dlp/issues/13781 + # https://trac.ffmpeg.org/ticket/2702 + url = re.sub(r'^file://(?:localhost)?/', 'file:' if os.name == 'nt' else 'file:/', url) + + args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', url] if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): args += ['-c', 'copy'] From 3e609b2cedd285739bf82c7af7853735092070a4 Mon Sep 17 00:00:00 2001 From: CasperMcFadden95 <145611964+CasperMcFadden95@users.noreply.github.com> Date: Fri, 25 Jul 2025 21:33:49 +0000 Subject: [PATCH 60/81] [ie/FaulioLive] Add extractor (#13421) Authored by: CasperMcFadden95, seproDev Co-authored-by: sepro --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/faulio.py | 92 +++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 yt_dlp/extractor/faulio.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index a2042557d7..69389671ed 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -640,6 +640,7 @@ from .fancode import ( FancodeVodIE, ) from .fathom import FathomIE +from .faulio import FaulioLiveIE from .faz import FazIE from .fc2 import ( FC2IE, diff --git a/yt_dlp/extractor/faulio.py b/yt_dlp/extractor/faulio.py new file mode 100644 index 0000000000..393023503e --- /dev/null +++ b/yt_dlp/extractor/faulio.py @@ -0,0 +1,92 @@ +import re +import urllib.parse + +from .common import InfoExtractor +from ..utils import js_to_json, url_or_none +from ..utils.traversal import traverse_obj + + +class FaulioLiveIE(InfoExtractor): + _DOMAINS = ( + 'aloula.sba.sa', + 'maraya.sba.net.ae', + 'sat7plus.org', + ) + _VALID_URL = fr'https?://(?:{"|".join(map(re.escape, _DOMAINS))})/(?:(?:en|ar|fa)/)?live/(?P[a-zA-Z0-9-]+)' + _TESTS = [{ + 'url': 'https://aloula.sba.sa/live/saudiatv', + 'info_dict': { + 'id': 'aloula.faulio.com_saudiatv', + 'title': str, + 'description': str, + 'ext': 'mp4', + 'live_status': 'is_live', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://maraya.sba.net.ae/live/1', + 'info_dict': { + 'id': 'maraya.faulio.com_1', + 'title': str, + 'description': str, + 'ext': 'mp4', + 'live_status': 'is_live', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://sat7plus.org/live/pars', + 'info_dict': { + 'id': 'sat7.faulio.com_pars', + 'title': str, + 'description': str, + 'ext': 'mp4', + 'live_status': 'is_live', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://sat7plus.org/fa/live/arabic', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + config_data = self._search_json( + r'window\.__NUXT__\.config=', webpage, 'config', video_id, transform_source=js_to_json) + api_base = config_data['public']['TRANSLATIONS_API_URL'] + + channel = traverse_obj( + self._download_json(f'{api_base}/channels', video_id), + (lambda k, v: v['url'] == video_id, any)) + + formats = [] + subtitles = {} + if hls_url := traverse_obj(channel, ('streams', 'hls', {url_or_none})): + fmts, subs = self._extract_m3u8_formats_and_subtitles( + hls_url, video_id, 'mp4', m3u8_id='hls', live=True, fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + if mpd_url := traverse_obj(channel, ('streams', 'mpd', {url_or_none})): + fmts, subs = self._extract_mpd_formats_and_subtitles( + mpd_url, video_id, mpd_id='dash', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + return { + 'id': f'{urllib.parse.urlparse(api_base).hostname}_{video_id}', + **traverse_obj(channel, { + 'title': ('title', {str}), + 'description': ('description', {str}), + }), + 'formats': formats, + 'subtitles': subtitles, + 'is_live': True, + } From 30302df22b7b431ce920e0f7298cd10be9989967 Mon Sep 17 00:00:00 2001 From: InvalidUsernameException Date: Sat, 26 Jul 2025 00:22:32 +0200 Subject: [PATCH 61/81] [ie/sportdeuschland] Support embedded player URLs (#13833) Closes #13766 Authored by: InvalidUsernameException --- yt_dlp/extractor/sportdeutschland.py | 118 +++++++++++++-------------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/yt_dlp/extractor/sportdeutschland.py b/yt_dlp/extractor/sportdeutschland.py index 8349d96045..0b7d90a071 100644 --- a/yt_dlp/extractor/sportdeutschland.py +++ b/yt_dlp/extractor/sportdeutschland.py @@ -8,70 +8,86 @@ from ..utils import ( class SportDeutschlandIE(InfoExtractor): - _VALID_URL = r'https?://sportdeutschland\.tv/(?P(?:[^/]+/)?[^?#/&]+)' + _VALID_URL = r'https?://(?:player\.)?sportdeutschland\.tv/(?P(?:[^/?#]+/)?[^?#/&]+)' _TESTS = [{ - 'url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport/buchholzer-formationswochenende-2023-samstag-1-bundesliga-landesliga', + # Single-part video, direct link + 'url': 'https://sportdeutschland.tv/rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates', + 'md5': '35c11a19395c938cdd076b93bda54cde', 'info_dict': { - 'id': '9839a5c7-0dbb-48a8-ab63-3b408adc7b54', + 'id': '9f27a97d-1544-4d0b-aa03-48d92d17a03a', 'ext': 'mp4', - 'title': 'Buchholzer Formationswochenende 2023 - Samstag - 1. Bundesliga / Landesliga', - 'display_id': 'blauweissbuchholztanzsport/buchholzer-formationswochenende-2023-samstag-1-bundesliga-landesliga', - 'description': 'md5:a288c794a5ee69e200d8f12982f81a87', + 'title': 'GFL2: Rostock Griffins vs. Elmshorn Fighting Pirates', + 'display_id': 'rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates', + 'channel': 'Rostock Griffins', + 'channel_url': 'https://sportdeutschland.tv/rostock-griffins', 'live_status': 'was_live', - 'channel': 'Blau-Weiss Buchholz Tanzsport', - 'channel_url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport', - 'channel_id': '93ec33c9-48be-43b6-b404-e016b64fdfa3', - 'duration': 32447, - 'upload_date': '20230114', - 'timestamp': 1673733618, + 'description': 'md5:60cb00067e55dafa27b0933a43d72862', + 'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b', + 'timestamp': 1749913117, + 'upload_date': '20250614', + 'duration': 12287.0, }, - 'skip': 'not found', }, { - 'url': 'https://sportdeutschland.tv/deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0', + # Single-part video, embedded player link + 'url': 'https://player.sportdeutschland.tv/9e9619c4-7d77-43c4-926d-49fb57dc06dc', 'info_dict': { - 'id': '95c80c52-6b9a-4ae9-9197-984145adfced', + 'id': '9f27a97d-1544-4d0b-aa03-48d92d17a03a', 'ext': 'mp4', - 'title': 'BWF Tour: 1. Runde Feld 1 - YONEX GAINWARD German Open 2022', - 'display_id': 'deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0', - 'description': 'md5:2afb5996ceb9ac0b2ac81f563d3a883e', + 'title': 'GFL2: Rostock Griffins vs. Elmshorn Fighting Pirates', + 'display_id': '9e9619c4-7d77-43c4-926d-49fb57dc06dc', + 'channel': 'Rostock Griffins', + 'channel_url': 'https://sportdeutschland.tv/rostock-griffins', 'live_status': 'was_live', - 'channel': 'Deutscher Badminton Verband', - 'channel_url': 'https://sportdeutschland.tv/deutscherbadmintonverband', - 'channel_id': '93ca5866-2551-49fc-8424-6db35af58920', - 'duration': 41097, - 'upload_date': '20220309', - 'timestamp': 1646860727.0, + 'description': 'md5:60cb00067e55dafa27b0933a43d72862', + 'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b', + 'timestamp': 1749913117, + 'upload_date': '20250614', + 'duration': 12287.0, }, - 'skip': 'not found', + 'params': {'skip_download': True}, }, { - 'url': 'https://sportdeutschland.tv/ggcbremen/formationswochenende-latein-2023', + # Multi-part video + 'url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2', 'info_dict': { - 'id': '9889785e-55b0-4d97-a72a-ce9a9f157cce', - 'title': 'Formationswochenende Latein 2023 - Samstag', - 'display_id': 'ggcbremen/formationswochenende-latein-2023', - 'description': 'md5:6e4060d40ff6a8f8eeb471b51a8f08b2', + 'id': '9f63d737-2444-4e3a-a1ea-840df73fd481', + 'display_id': 'rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2', + 'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2', + 'description': 'md5:0a17da15e48a687e6019639c3452572b', + 'channel': 'Rhine-Ruhr 2025 FISU World University Games', + 'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334', + 'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games', 'live_status': 'was_live', - 'channel': 'Grün-Gold-Club Bremen e.V.', - 'channel_id': '9888f04e-bb46-4c7f-be47-df960a4167bb', - 'channel_url': 'https://sportdeutschland.tv/ggcbremen', }, - 'playlist_count': 3, + 'playlist_count': 2, 'playlist': [{ 'info_dict': { - 'id': '988e1fea-9d44-4fab-8c72-3085fb667547', + 'id': '9f725a94-d43e-40ff-859d-13da3081bb04', 'ext': 'mp4', - 'channel_url': 'https://sportdeutschland.tv/ggcbremen', - 'channel_id': '9888f04e-bb46-4c7f-be47-df960a4167bb', - 'channel': 'Grün-Gold-Club Bremen e.V.', - 'duration': 86, - 'title': 'Formationswochenende Latein 2023 - Samstag Part 1', - 'upload_date': '20230225', - 'timestamp': 1677349909, + 'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2 Part 1', + 'channel': 'Rhine-Ruhr 2025 FISU World University Games', + 'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334', + 'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games', + 'duration': 14773.0, + 'timestamp': 1753085197, + 'upload_date': '20250721', + 'live_status': 'was_live', + }, + }, { + 'info_dict': { + 'id': '9f725a94-370e-4477-89ac-1751098e3217', + 'ext': 'mp4', + 'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2 Part 2', + 'channel': 'Rhine-Ruhr 2025 FISU World University Games', + 'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334', + 'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games', + 'duration': 14773.0, + 'timestamp': 1753128421, + 'upload_date': '20250721', 'live_status': 'was_live', }, }], - 'skip': 'not found', }, { + # Livestream 'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1', 'info_dict': { 'id': '95d71b8a-370a-4b87-ad16-94680da18528', @@ -85,22 +101,6 @@ class SportDeutschlandIE(InfoExtractor): 'live_status': 'is_live', }, 'skip': 'live', - }, { - 'url': 'https://sportdeutschland.tv/rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates', - 'md5': '35c11a19395c938cdd076b93bda54cde', - 'info_dict': { - 'id': '9f27a97d-1544-4d0b-aa03-48d92d17a03a', - 'ext': 'mp4', - 'title': 'GFL2: Rostock Griffins vs. Elmshorn Fighting Pirates', - 'display_id': 'rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates', - 'channel': 'Rostock Griffins', - 'channel_url': 'https://sportdeutschland.tv/rostock-griffins', - 'live_status': 'was_live', - 'description': 'md5:60cb00067e55dafa27b0933a43d72862', - 'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b', - 'timestamp': 1749913117, - 'upload_date': '20250614', - }, }] def _process_video(self, asset_id, video): From 1fe83b0111277a6f214c5ec1819cfbf943508baf Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Sun, 27 Jul 2025 00:34:22 +0900 Subject: [PATCH 62/81] [ie/eagleplatform] Remove extractors (#13469) Authored by: doe1080 --- yt_dlp/extractor/_extractors.py | 4 - yt_dlp/extractor/eagleplatform.py | 215 ------------------------------ yt_dlp/extractor/generic.py | 32 ----- yt_dlp/extractor/livejournal.py | 1 + 4 files changed, 1 insertion(+), 251 deletions(-) delete mode 100644 yt_dlp/extractor/eagleplatform.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 69389671ed..617c2c5ce0 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -571,10 +571,6 @@ from .dw import ( DWIE, DWArticleIE, ) -from .eagleplatform import ( - ClipYouEmbedIE, - EaglePlatformIE, -) from .ebaumsworld import EbaumsWorldIE from .ebay import EbayIE from .egghead import ( diff --git a/yt_dlp/extractor/eagleplatform.py b/yt_dlp/extractor/eagleplatform.py deleted file mode 100644 index 685f8c0590..0000000000 --- a/yt_dlp/extractor/eagleplatform.py +++ /dev/null @@ -1,215 +0,0 @@ -import functools -import re - -from .common import InfoExtractor -from ..networking.exceptions import HTTPError -from ..utils import ( - ExtractorError, - int_or_none, - smuggle_url, - unsmuggle_url, - url_or_none, -) - - -class EaglePlatformIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - eagleplatform:(?P[^/]+):| - https?://(?P.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id= - ) - (?P\d+) - ''' - _EMBED_REGEX = [r']+src=(["\'])(?P(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1'] - _TESTS = [{ - # http://lenta.ru/news/2015/03/06/navalny/ - 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201', - # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used - 'info_dict': { - 'id': '227304', - 'ext': 'mp4', - 'title': 'Навальный вышел на свободу', - 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 87, - 'view_count': int, - 'age_limit': 0, - }, - }, { - # http://muz-tv.ru/play/7129/ - # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true - 'url': 'eagleplatform:media.clipyou.ru:12820', - 'md5': '358597369cf8ba56675c1df15e7af624', - 'info_dict': { - 'id': '12820', - 'ext': 'mp4', - 'title': "'O Sole Mio", - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 216, - 'view_count': int, - }, - 'skip': 'Georestricted', - }, { - # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/) - 'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306', - 'only_matching': True, - }] - - @classmethod - def _extract_embed_urls(cls, url, webpage): - add_referer = functools.partial(smuggle_url, data={'referrer': url}) - - res = tuple(super()._extract_embed_urls(url, webpage)) - if res: - return map(add_referer, res) - - PLAYER_JS_RE = r''' - ]+ - src=(?P["\'])(?:https?:)?//(?P(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs) - .+? - ''' - # "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/) - mobj = re.search( - rf'''(?xs) - {PLAYER_JS_RE} - ]+ - class=(?P["\'])eagleplayer(?P=qclass)[^>]+ - data-id=["\'](?P\d+) - ''', webpage) - if mobj is not None: - return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))] - # Generalization of "Javascript code usage", "Combined usage" and - # "Usage without attaching to DOM" embeddings (see - # http://dultonmedia.github.io/eplayer/) - mobj = re.search( - r'''(?xs) - %s - - ''' % PLAYER_JS_RE, webpage) # noqa: UP031 - if mobj is not None: - return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))] - - @staticmethod - def _handle_error(response): - status = int_or_none(response.get('status', 200)) - if status != 200: - raise ExtractorError(' '.join(response['errors']), expected=True) - - def _download_json(self, url_or_request, video_id, *args, **kwargs): - try: - response = super()._download_json( - url_or_request, video_id, *args, **kwargs) - except ExtractorError as ee: - if isinstance(ee.cause, HTTPError): - response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id) - self._handle_error(response) - raise - return response - - def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'): - return self._download_json(url_or_request, video_id, note)['data'][0] - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - - mobj = self._match_valid_url(url) - host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id') - - headers = {} - query = { - 'id': video_id, - } - - referrer = smuggled_data.get('referrer') - if referrer: - headers['Referer'] = referrer - query['referrer'] = referrer - - player_data = self._download_json( - f'http://{host}/api/player_data', video_id, - headers=headers, query=query) - - media = player_data['data']['playlist']['viewports'][0]['medialist'][0] - - title = media['title'] - description = media.get('description') - thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:') - duration = int_or_none(media.get('duration')) - view_count = int_or_none(media.get('views')) - - age_restriction = media.get('age_restriction') - age_limit = None - if age_restriction: - age_limit = 0 if age_restriction == 'allow_all' else 18 - - secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:') - - formats = [] - - m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON') - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False) - formats.extend(m3u8_formats) - - m3u8_formats_dict = {} - for f in m3u8_formats: - if f.get('height') is not None: - m3u8_formats_dict[f['height']] = f - - mp4_data = self._download_json( - # Secure mp4 URL is constructed according to Player.prototype.mp4 from - # http://lentaru.media.eagleplatform.com/player/player.js - re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8), - video_id, 'Downloading mp4 JSON', fatal=False) - if mp4_data: - for format_id, format_url in mp4_data.get('data', {}).items(): - if not url_or_none(format_url): - continue - height = int_or_none(format_id) - if height is not None and m3u8_formats_dict.get(height): - f = m3u8_formats_dict[height].copy() - f.update({ - 'format_id': f['format_id'].replace('hls', 'http'), - 'protocol': 'http', - }) - else: - f = { - 'format_id': f'http-{format_id}', - 'height': int_or_none(format_id), - } - f['url'] = format_url - formats.append(f) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'view_count': view_count, - 'age_limit': age_limit, - 'formats': formats, - } - - -class ClipYouEmbedIE(InfoExtractor): - _VALID_URL = False - - @classmethod - def _extract_embed_urls(cls, url, webpage): - mobj = re.search( - r']+src="https?://(?Pmedia\.clipyou\.ru)/index/player\?.*\brecord_id=(?P\d+).*"', webpage) - if mobj is not None: - yield smuggle_url('eagleplatform:{host}:{id}'.format(**mobj.groupdict()), {'referrer': url}) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index d9a666f991..652c3b851b 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1010,38 +1010,6 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, - # referrer protected EaglePlatform embed - { - 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/', - 'info_dict': { - 'id': '582306', - 'ext': 'mp4', - 'title': 'Стас Намин: «Мы нарушили девственность Кремля»', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 3382, - 'view_count': int, - }, - 'params': { - 'skip_download': True, - }, - }, - # ClipYou (EaglePlatform) embed (custom URL) - { - 'url': 'http://muz-tv.ru/play/7129/', - # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used - 'info_dict': { - 'id': '12820', - 'ext': 'mp4', - 'title': "'O Sole Mio", - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 216, - 'view_count': int, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'This video is unavailable.', - }, # Pladform embed { 'url': 'http://muz-tv.ru/kinozal/view/7400/', diff --git a/yt_dlp/extractor/livejournal.py b/yt_dlp/extractor/livejournal.py index c61f9bec7a..ee2dfca0d0 100644 --- a/yt_dlp/extractor/livejournal.py +++ b/yt_dlp/extractor/livejournal.py @@ -3,6 +3,7 @@ from ..utils import int_or_none class LiveJournalIE(InfoExtractor): + _WORKING = False _VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P\d+)' _TEST = { 'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272', From e8c2bf798b6707d27fecde66161172da69c7cd72 Mon Sep 17 00:00:00 2001 From: c-basalt <117849907+c-basalt@users.noreply.github.com> Date: Sat, 26 Jul 2025 14:02:56 -0400 Subject: [PATCH 63/81] [ie/neteasemusic] Support XFF (#11044) Closes #11043 Authored by: c-basalt --- yt_dlp/extractor/neteasemusic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index 900b8b2a30..6c47086b9b 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -34,7 +34,6 @@ class NetEaseMusicBaseIE(InfoExtractor): 'sky', # SVIP tier; 沉浸环绕声 (Surround Audio); flac ) _API_BASE = 'http://music.163.com/api/' - _GEO_BYPASS = False def _create_eapi_cipher(self, api_path, query_body, cookies): request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':')) @@ -64,6 +63,8 @@ class NetEaseMusicBaseIE(InfoExtractor): 'MUSIC_U': ('MUSIC_U', {lambda i: i.value}), }), } + if self._x_forwarded_for_ip: + headers.setdefault('X-Real-IP', self._x_forwarded_for_ip) return self._download_json( urljoin('https://interface3.music.163.com/', f'/eapi{path}'), video_id, data=self._create_eapi_cipher(f'/api{path}', query_body, cookies), headers={ From daa1859be1b0e7d123da8b4e0988f2eb7bd47d15 Mon Sep 17 00:00:00 2001 From: CasperMcFadden95 <145611964+CasperMcFadden95@users.noreply.github.com> Date: Sat, 26 Jul 2025 18:11:57 +0000 Subject: [PATCH 64/81] [ie/FaulioLive] Support Bahry TV (#13850) Authored by: CasperMcFadden95 --- yt_dlp/extractor/faulio.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/yt_dlp/extractor/faulio.py b/yt_dlp/extractor/faulio.py index 393023503e..a5d5c750b4 100644 --- a/yt_dlp/extractor/faulio.py +++ b/yt_dlp/extractor/faulio.py @@ -9,6 +9,7 @@ from ..utils.traversal import traverse_obj class FaulioLiveIE(InfoExtractor): _DOMAINS = ( 'aloula.sba.sa', + 'bahry.com', 'maraya.sba.net.ae', 'sat7plus.org', ) @@ -25,6 +26,18 @@ class FaulioLiveIE(InfoExtractor): 'params': { 'skip_download': 'Livestream', }, + }, { + 'url': 'https://bahry.com/live/1', + 'info_dict': { + 'id': 'bahry.faulio.com_1', + 'title': str, + 'description': str, + 'ext': 'mp4', + 'live_status': 'is_live', + }, + 'params': { + 'skip_download': 'Livestream', + }, }, { 'url': 'https://maraya.sba.net.ae/live/1', 'info_dict': { From 57186f958f164daa50203adcbf7ec74d541151cf Mon Sep 17 00:00:00 2001 From: Tom Hebb Date: Sat, 26 Jul 2025 14:43:38 -0400 Subject: [PATCH 65/81] [fd/hls] Fix `--hls-split-continuity` support (#13321) Authored by: tchebb --- yt_dlp/downloader/hls.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 2256305785..58cfbbf163 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -205,7 +205,7 @@ class HlsFD(FragmentFD): line = line.strip() if line: if not line.startswith('#'): - if format_index and discontinuity_count != format_index: + if format_index is not None and discontinuity_count != format_index: continue if ad_frag_next: continue @@ -231,7 +231,7 @@ class HlsFD(FragmentFD): byte_range = {} elif line.startswith('#EXT-X-MAP'): - if format_index and discontinuity_count != format_index: + if format_index is not None and discontinuity_count != format_index: continue if frag_index > 0: self.report_error( From 66aa21dc5a3b79059c38f3ad1d05dc9b29187701 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 26 Jul 2025 14:39:54 -0500 Subject: [PATCH 66/81] [build] Use `macos-14` runner for `macos` builds (#13814) Ref: https://github.blog/changelog/2025-07-11-upcoming-changes-to-macos-hosted-runners-macos-latest-migration-and-xcode-support-policy-updates/#macos-13-is-closing-down Authored by: bashonly --- .github/workflows/build.yml | 4 +++- bundle/pyinstaller.py | 12 +++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e2411ecfad..b3db8fec1b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -242,7 +242,7 @@ jobs: permissions: contents: read actions: write # For cleaning up cache - runs-on: macos-13 + runs-on: macos-14 steps: - uses: actions/checkout@v4 @@ -261,6 +261,8 @@ jobs: - name: Install Requirements run: | brew install coreutils + # We need to use system Python in order to roll our own universal2 curl_cffi wheel + brew uninstall --ignore-dependencies python3 python3 -m venv ~/yt-dlp-build-venv source ~/yt-dlp-build-venv/bin/activate python3 devscripts/install_deps.py -o --include build diff --git a/bundle/pyinstaller.py b/bundle/pyinstaller.py index c2f6511210..0597f602d0 100755 --- a/bundle/pyinstaller.py +++ b/bundle/pyinstaller.py @@ -62,16 +62,22 @@ def parse_options(): def exe(onedir): """@returns (name, path)""" + platform_name, machine, extension = { + 'win32': (None, MACHINE, '.exe'), + 'darwin': ('macos', None, None), + }.get(OS_NAME, (OS_NAME, MACHINE, None)) + name = '_'.join(filter(None, ( 'yt-dlp', - {'win32': '', 'darwin': 'macos'}.get(OS_NAME, OS_NAME), - MACHINE, + platform_name, + machine, ))) + return name, ''.join(filter(None, ( 'dist/', onedir and f'{name}/', name, - OS_NAME == 'win32' and '.exe', + extension, ))) From cc5a5caac5fbc0d605b52bde0778d6fd5f97b5ab Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 26 Jul 2025 17:12:53 -0500 Subject: [PATCH 67/81] Deprecate `darwin_legacy_exe` support (#13857) Ref: https://github.com/yt-dlp/yt-dlp/issues/13856 Authored by: bashonly --- yt_dlp/update.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index de289cb780..f85be2d08f 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -141,6 +141,17 @@ def _get_binary_name(): def _get_system_deprecation(): MIN_SUPPORTED, MIN_RECOMMENDED = (3, 9), (3, 9) + EXE_MSG_TMPL = ('Support for {} has been deprecated. ' + 'See https://github.com/yt-dlp/yt-dlp/{} for details.\n{}') + STOP_MSG = 'You may stop receiving updates on this version at any time!' + variant = detect_variant() + + # Temporary until macos_legacy executable builds are discontinued + if variant == 'darwin_legacy_exe': + return EXE_MSG_TMPL.format( + f'{variant} (the PyInstaller-bundled executable for macOS versions older than 10.15)', + 'issues/13856', STOP_MSG) + if sys.version_info > MIN_RECOMMENDED: return None From 23c658b9cbe34a151f8f921ab1320bb5d4e40a4d Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 26 Jul 2025 17:59:02 -0500 Subject: [PATCH 68/81] Raise minimum recommended Python version to 3.10 (#13859) Ref: https://github.com/yt-dlp/yt-dlp/issues/13858 Authored by: bashonly --- yt_dlp/update.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index f85be2d08f..30cbf538e9 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -139,7 +139,7 @@ def _get_binary_name(): def _get_system_deprecation(): - MIN_SUPPORTED, MIN_RECOMMENDED = (3, 9), (3, 9) + MIN_SUPPORTED, MIN_RECOMMENDED = (3, 9), (3, 10) EXE_MSG_TMPL = ('Support for {} has been deprecated. ' 'See https://github.com/yt-dlp/yt-dlp/{} for details.\n{}') @@ -161,6 +161,13 @@ def _get_system_deprecation(): if sys.version_info < MIN_SUPPORTED: return f'Python version {major}.{minor} is no longer supported! {PYTHON_MSG}' + # Temporary until aarch64/armv7l build flow is bumped to Ubuntu 22.04 and Python 3.10 + if variant in ('linux_aarch64_exe', 'linux_armv7l_exe'): + libc_ver = version_tuple(os.confstr('CS_GNU_LIBC_VERSION').partition(' ')[2]) + if libc_ver < (2, 35): + return EXE_MSG_TMPL.format('system glibc version < 2.35', 'issues/13858', STOP_MSG) + return None + return f'Support for Python version {major}.{minor} has been deprecated. {PYTHON_MSG}' From b831406a1d3be34c159835079d12bae624c43610 Mon Sep 17 00:00:00 2001 From: Florentin Le Moal Date: Sun, 27 Jul 2025 21:52:05 +0200 Subject: [PATCH 69/81] [ie/rtve.es:program] Add extractor Authored by: meGAmeS1, seproDev Co-authored-by: sepro --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/rtve.py | 61 +++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 617c2c5ce0..9445270858 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1781,6 +1781,7 @@ from .rtve import ( RTVEALaCartaIE, RTVEAudioIE, RTVELiveIE, + RTVEProgramIE, RTVETelevisionIE, ) from .rtvs import RTVSIE diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py index 2812d93059..c2ccf73ddc 100644 --- a/yt_dlp/extractor/rtve.py +++ b/yt_dlp/extractor/rtve.py @@ -6,9 +6,11 @@ import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, + InAdvancePagedList, clean_html, determine_ext, float_or_none, + int_or_none, make_archive_id, parse_iso8601, qualities, @@ -371,3 +373,62 @@ class RTVETelevisionIE(InfoExtractor): raise ExtractorError('The webpage doesn\'t contain any video', expected=True) return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key()) + + +class RTVEProgramIE(RTVEBaseIE): + IE_NAME = 'rtve.es:program' + IE_DESC = 'RTVE.es programs' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/play/videos/(?P[\w-]+)/?(?:[?#]|$)' + _TESTS = [{ + 'url': 'https://www.rtve.es/play/videos/saber-vivir/', + 'info_dict': { + 'id': '111570', + 'title': 'Saber vivir - Programa de ciencia y futuro en RTVE Play', + }, + 'playlist_mincount': 400, + }] + _PAGE_SIZE = 60 + + def _fetch_page(self, program_id, page_num): + return self._download_json( + f'https://www.rtve.es/api/programas/{program_id}/videos', + program_id, note=f'Downloading page {page_num}', + query={ + 'type': 39816, + 'page': page_num, + 'size': 60, + }) + + def _entries(self, page_data): + for video in traverse_obj(page_data, ('page', 'items', lambda _, v: url_or_none(v['htmlUrl']))): + yield self.url_result( + video['htmlUrl'], RTVEALaCartaIE, url_transparent=True, + **traverse_obj(video, { + 'id': ('id', {str}), + 'title': ('longTitle', {str}), + 'description': ('shortDescription', {str}), + 'duration': ('duration', {float_or_none(scale=1000)}), + 'series': (('programInfo', 'title'), {str}, any), + 'season_number': ('temporadaOrden', {int_or_none}), + 'season_id': ('temporadaId', {str}), + 'season': ('temporada', {str}), + 'episode_number': ('episode', {int_or_none}), + 'episode': ('title', {str}), + 'thumbnail': ('thumbnail', {url_or_none}), + }), + ) + + def _real_extract(self, url): + program_slug = self._match_id(url) + program_page = self._download_webpage(url, program_slug) + + program_id = self._html_search_meta('DC.identifier', program_page, 'Program ID', fatal=True) + + first_page = self._fetch_page(program_id, 1) + page_count = traverse_obj(first_page, ('page', 'totalPages', {int})) or 1 + + entries = InAdvancePagedList( + lambda idx: self._entries(self._fetch_page(program_id, idx + 1) if idx else first_page), + page_count, self._PAGE_SIZE) + + return self.playlist_result(entries, program_id, self._html_extract_title(program_page)) From 682334e4b35112f7a5798decdcb5cb12230ef948 Mon Sep 17 00:00:00 2001 From: fries1234 Date: Sun, 27 Jul 2025 13:26:33 -0700 Subject: [PATCH 70/81] [ie/tvw:news] Add extractor (#12907) Authored by: fries1234 --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/tvw.py | 56 +++++++++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 9445270858..3eea0cdf6b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2235,6 +2235,7 @@ from .tvplay import ( from .tvplayer import TVPlayerIE from .tvw import ( TvwIE, + TvwNewsIE, TvwTvChannelsIE, ) from .tweakers import TweakersIE diff --git a/yt_dlp/extractor/tvw.py b/yt_dlp/extractor/tvw.py index 0ab926dbdd..74d9b6424b 100644 --- a/yt_dlp/extractor/tvw.py +++ b/yt_dlp/extractor/tvw.py @@ -10,12 +10,15 @@ from ..utils import ( unified_timestamp, url_or_none, ) -from ..utils.traversal import find_element, traverse_obj +from ..utils.traversal import find_element, find_elements, traverse_obj class TvwIE(InfoExtractor): IE_NAME = 'tvw' - _VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P[^/?#]+)' + _VALID_URL = [ + r'https?://(?:www\.)?tvw\.org/video/(?P[^/?#]+)', + r'https?://(?:www\.)?tvw\.org/watch/?\?(?:[^#]+&)?eventID=(?P\d+)', + ] _TESTS = [{ 'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/', 'md5': '9ceb94fe2bb7fd726f74f16356825703', @@ -75,6 +78,20 @@ class TvwIE(InfoExtractor): 'display_id': 'washington-to-washington-a-new-space-race-2022041111', 'categories': ['Washington to Washington', 'General Interest'], }, + }, { + 'url': 'https://tvw.org/watch?eventID=2025041235', + 'md5': '7d697c02f110b37d6a47622ea608ca90', + 'info_dict': { + 'id': '2025041235', + 'ext': 'mp4', + 'title': 'Legislative Review - Medicaid Postpartum Bill Sparks Debate & Senate Approves Automatic Voter Registration', + 'thumbnail': r're:^https?://.*\.(?:jpe?g|png)$', + 'description': 'md5:37d0f3a9187ae520aac261b3959eaee6', + 'timestamp': 1745006400, + 'upload_date': '20250418', + 'location': 'Hayner Media Center', + 'categories': ['Legislative Review'], + }, }] def _real_extract(self, url): @@ -125,6 +142,41 @@ class TvwIE(InfoExtractor): } +class TvwNewsIE(InfoExtractor): + IE_NAME = 'tvw:news' + _VALID_URL = r'https?://(?:www\.)?tvw\.org/\d{4}/\d{2}/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://tvw.org/2024/01/the-impact-issues-to-watch-in-the-2024-legislative-session/', + 'info_dict': { + 'id': 'the-impact-issues-to-watch-in-the-2024-legislative-session', + 'title': 'The Impact - Issues to Watch in the 2024 Legislative Session', + 'description': 'md5:65f0b33ec8f18ff1cd401c5547aa5441', + }, + 'playlist_count': 6, + }, { + 'url': 'https://tvw.org/2024/06/the-impact-water-rights-and-the-skookumchuck-dam-debate/', + 'info_dict': { + 'id': 'the-impact-water-rights-and-the-skookumchuck-dam-debate', + 'title': 'The Impact - Water Rights and the Skookumchuck Dam Debate', + 'description': 'md5:185f3a2350ef81e3fa159ac3e040a94b', + }, + 'playlist_count': 1, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + video_ids = traverse_obj(webpage, ( + {find_elements(cls='invintus-player', html=True)}, ..., {extract_attributes}, 'data-eventid')) + + return self.playlist_from_matches( + video_ids, playlist_id, + playlist_title=remove_end(self._og_search_title(webpage, default=None), ' - TVW'), + playlist_description=self._og_search_description(webpage, default=None), + getter=lambda x: f'https://tvw.org/watch?eventID={x}', ie=TvwIE) + + class TvwTvChannelsIE(InfoExtractor): IE_NAME = 'tvw:tvchannels' _VALID_URL = r'https?://(?:www\.)?tvw\.org/tvchannels/(?P[^/?#]+)' From 28b68f687561468e0c664dcb430707458970019f Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 29 Jul 2025 14:47:28 -0500 Subject: [PATCH 71/81] [cookies] Load cookies with float `expires` timestamps (#13873) Authored by: bashonly --- yt_dlp/cookies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 5675445ace..459a4b7de0 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -1335,7 +1335,7 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): if len(cookie_list) != self._ENTRY_LEN: raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}') cookie = self._CookieFileEntry(*cookie_list) - if cookie.expires_at and not cookie.expires_at.isdigit(): + if cookie.expires_at and not re.fullmatch(r'[0-9]+(?:\.[0-9]+)?', cookie.expires_at): raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}') return line From 62e2a9c0d55306906f18da2927e05e1cbc31473c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 29 Jul 2025 16:31:35 -0500 Subject: [PATCH 72/81] [ci] Bump supported PyPy version to 3.11 (#13877) Ref: https://pypy.org/posts/2025/07/pypy-v7320-release.html Authored by: bashonly --- .github/workflows/core.yml | 4 ++-- .github/workflows/download.yml | 4 ++-- .github/workflows/signature-tests.yml | 2 +- CONTRIBUTING.md | 2 +- README.md | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index dd2c6f481e..86036989c0 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -37,7 +37,7 @@ jobs: matrix: os: [ubuntu-latest] # CPython 3.9 is in quick-test - python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] + python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.11] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest @@ -49,7 +49,7 @@ jobs: - os: windows-latest python-version: '3.13' - os: windows-latest - python-version: pypy-3.10 + python-version: pypy-3.11 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml index 6849fba9b6..594a664c9c 100644 --- a/.github/workflows/download.yml +++ b/.github/workflows/download.yml @@ -28,13 +28,13 @@ jobs: fail-fast: true matrix: os: [ubuntu-latest] - python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] + python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.11] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest python-version: '3.9' - os: windows-latest - python-version: pypy-3.10 + python-version: pypy-3.11 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/signature-tests.yml b/.github/workflows/signature-tests.yml index 203172e0b9..42c65db353 100644 --- a/.github/workflows/signature-tests.yml +++ b/.github/workflows/signature-tests.yml @@ -25,7 +25,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest] - python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.10, pypy-3.11] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.11] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2c58cdfc94..8822907b79 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -272,7 +272,7 @@ After you have ensured this site is distributing its content legally, you can fo You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`). -1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.10. Backward compatibility is not required for even older versions of Python. +1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.11. Backward compatibility is not required for even older versions of Python. 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: ```shell diff --git a/README.md b/README.md index e5bd21b9ca..12f68e98d8 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,7 @@ python3 -m pip install -U --pre "yt-dlp[default]" ``` ## DEPENDENCIES -Python versions 3.9+ (CPython) and 3.10+ (PyPy) are supported. Other versions and implementations may or may not work correctly. +Python versions 3.9+ (CPython) and 3.11+ (PyPy) are supported. Other versions and implementations may or may not work correctly.