diff --git a/README.md b/README.md index 6e27b0a348..1ee4223856 100644 --- a/README.md +++ b/README.md @@ -1866,6 +1866,9 @@ The following extractors use this feature: #### sonylivseries * `sort_order`: Episode sort order for series extraction - one of `asc` (ascending, oldest first) or `desc` (descending, newest first). Default is `asc` +#### tver +* `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated) + **Note**: These options may be changed/removed in the future without concern for backward compatibility diff --git a/pyproject.toml b/pyproject.toml index b5d77d35b7..5e987a6fd3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,8 +55,7 @@ default = [ "websockets>=13.0", ] curl-cffi = [ - "curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'", - "curl-cffi>=0.5.10,!=0.6.*,<0.7.2; os_name!='nt' and implementation_name=='cpython'", + "curl-cffi>=0.5.10,!=0.6.*,!=0.7.*,!=0.8.*,!=0.9.*,<0.11; implementation_name=='cpython'", ] secretstorage = [ "cffi", diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 54f35ef552..c6ff6209a8 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -638,6 +638,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'img_bipbop_adv_example_fmp4', 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', [{ + # 60kbps (bitrate not provided in m3u8); sorted as worst because it's grouped with lowest bitrate video track 'format_id': 'aud1-English', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a1/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', @@ -645,22 +646,27 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'ext': 'mp4', 'protocol': 'm3u8_native', 'audio_ext': 'mp4', + 'source_preference': 0, }, { - 'format_id': 'aud2-English', - 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a2/prog_index.m3u8', + # 192kbps (bitrate not provided in m3u8) + 'format_id': 'aud3-English', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a3/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'language': 'en', 'ext': 'mp4', 'protocol': 'm3u8_native', 'audio_ext': 'mp4', + 'source_preference': 1, }, { - 'format_id': 'aud3-English', - 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a3/prog_index.m3u8', + # 384kbps (bitrate not provided in m3u8); sorted as best because it's grouped with the highest bitrate video track + 'format_id': 'aud2-English', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a2/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'language': 'en', 'ext': 'mp4', 'protocol': 'm3u8_native', 'audio_ext': 'mp4', + 'source_preference': 2, }, { 'format_id': '530', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8', diff --git a/test/test_networking.py b/test/test_networking.py index 63914bc4ba..3ab60fe836 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -614,7 +614,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode() assert source_address == data - # Not supported by CurlCFFI @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi') def test_gzip_trailing_garbage(self, handler): with handler() as rh: diff --git a/test/test_subtitles.py b/test/test_subtitles.py index f3b0056179..efd69b33d9 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -23,7 +23,6 @@ from yt_dlp.extractor import ( TedTalkIE, ThePlatformFeedIE, ThePlatformIE, - VikiIE, VimeoIE, WallaIE, YoutubeIE, @@ -331,20 +330,6 @@ class TestRaiPlaySubtitles(BaseTestSubtitles): self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd') -@is_download_test -@unittest.skip('IE broken - DRM only') -class TestVikiSubtitles(BaseTestSubtitles): - url = 'http://www.viki.com/videos/1060846v-punch-episode-18' - IE = VikiIE - - def test_allsubtitles(self): - self.DL.params['writesubtitles'] = True - self.DL.params['allsubtitles'] = True - subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), {'en'}) - self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a') - - @is_download_test class TestThePlatformSubtitles(BaseTestSubtitles): # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/ diff --git a/test/test_utils.py b/test/test_utils.py index 42dc7f937e..e60ceed8fd 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1260,6 +1260,7 @@ class TestUtil(unittest.TestCase): def test_js_to_json_malformed(self): self.assertEqual(js_to_json('42a1'), '42"a1"') self.assertEqual(js_to_json('42a-1'), '42"a"-1') + self.assertEqual(js_to_json('{a: `${e("")}`}'), '{"a": "\\"e\\"(\\"\\")"}') def test_js_to_json_template_literal(self): self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"') diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 45dc9113bd..453caacd65 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -83,6 +83,11 @@ _SIG_TESTS = [ '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', 'AAOAOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7vgpDL0QwbdV06sCIEzpWqMGkFR20CFOS21Tp-7vj_EMu-m37KtXJoOy1', ), + ( + 'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js', + '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', + '0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', + ), ] _NSIG_TESTS = [ @@ -234,6 +239,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js', 'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA', ), + ( + 'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js', + 'eWYu5d5YeY_4LyEDc', 'XJQqf-N7Xra3gg', + ), ] diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 8790b326b7..63e6e11b26 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -654,19 +654,21 @@ class YoutubeDL: if not all_plugins_loaded.value: load_all_plugins() - try: - windows_enable_vt_mode() - except Exception as e: - self.write_debug(f'Failed to enable VT mode: {e}') - stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout self._out_files = Namespace( out=stdout, error=sys.stderr, screen=sys.stderr if self.params.get('quiet') else stdout, - console=next(filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None), ) + try: + windows_enable_vt_mode() + except Exception as e: + self.write_debug(f'Failed to enable VT mode: {e}') + + # hehe "immutable" namespace + self._out_files.console = next(filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None) + if self.params.get('no_color'): if self.params.get('color') is not None: self.params.setdefault('_warnings', []).append( @@ -4150,7 +4152,7 @@ class YoutubeDL: (target, rh.RH_NAME) for rh in self._request_director.handlers.values() if isinstance(rh, ImpersonateRequestHandler) - for target in rh.supported_targets + for target in reversed(rh.supported_targets) ] def _impersonate_target_available(self, target): diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 7d8f100474..714d9ad5c2 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -1021,8 +1021,9 @@ def _real_main(argv=None): # List of simplified targets we know are supported, # to help users know what dependencies may be required. (ImpersonateTarget('chrome'), 'curl_cffi'), - (ImpersonateTarget('edge'), 'curl_cffi'), (ImpersonateTarget('safari'), 'curl_cffi'), + (ImpersonateTarget('firefox'), 'curl_cffi>=0.10'), + (ImpersonateTarget('edge'), 'curl_cffi'), ] available_targets = ydl._get_available_impersonate_targets() @@ -1038,12 +1039,12 @@ def _real_main(argv=None): for known_target, known_handler in known_targets: if not any( - known_target in target and handler == known_handler + known_target in target and known_handler.startswith(handler) for target, handler in available_targets ): - rows.append([ + rows.insert(0, [ ydl._format_out(text, ydl.Styles.SUPPRESS) - for text in make_row(known_target, f'{known_handler} (not available)') + for text in make_row(known_target, f'{known_handler} (unavailable)') ]) ydl.to_screen('[info] Available impersonate targets') diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 38e9673cd4..a44601b14d 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -496,10 +496,6 @@ from .daum import ( from .daystar import DaystarClipIE from .dbtv import DBTVIE from .dctp import DctpTvIE -from .deezer import ( - DeezerAlbumIE, - DeezerPlaylistIE, -) from .democracynow import DemocracynowIE from .detik import DetikEmbedIE from .deuxm import ( @@ -843,6 +839,7 @@ from .icareus import IcareusIE from .ichinanalive import ( IchinanaLiveClipIE, IchinanaLiveIE, + IchinanaLiveVODIE, ) from .idolplus import IdolPlusIE from .ign import ( @@ -1989,6 +1986,7 @@ from .storyfire import ( StoryFireSeriesIE, StoryFireUserIE, ) +from .streaks import StreaksIE from .streamable import StreamableIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE @@ -2352,10 +2350,6 @@ from .viewlift import ( ViewLiftIE, ) from .viidea import ViideaIE -from .viki import ( - VikiChannelIE, - VikiIE, -) from .vimeo import ( VHXEmbedIE, VimeoAlbumIE, @@ -2400,6 +2394,12 @@ from .voxmedia import ( VoxMediaIE, VoxMediaVolumeIE, ) +from .vrsquare import ( + VrSquareChannelIE, + VrSquareIE, + VrSquareSearchIE, + VrSquareSectionIE, +) from .vrt import ( VRTIE, DagelijkseKostIE, diff --git a/yt_dlp/extractor/chzzk.py b/yt_dlp/extractor/chzzk.py index aec77ac454..a5daf5ca7c 100644 --- a/yt_dlp/extractor/chzzk.py +++ b/yt_dlp/extractor/chzzk.py @@ -21,7 +21,7 @@ class CHZZKLiveIE(InfoExtractor): 'channel': '진짜도현', 'channel_id': 'c68b8ef525fb3d2fa146344d84991753', 'channel_is_verified': False, - 'thumbnail': r're:^https?://.*\.jpg$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'timestamp': 1705510344, 'upload_date': '20240117', 'live_status': 'is_live', @@ -98,7 +98,7 @@ class CHZZKVideoIE(InfoExtractor): 'channel': '침착맨', 'channel_id': 'bb382c2c0cc9fa7c86ab3b037fb5799c', 'channel_is_verified': False, - 'thumbnail': r're:^https?://.*\.jpg$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'duration': 15577, 'timestamp': 1702970505.417, 'upload_date': '20231219', @@ -115,7 +115,7 @@ class CHZZKVideoIE(InfoExtractor): 'channel': '라디유radiyu', 'channel_id': '68f895c59a1043bc5019b5e08c83a5c5', 'channel_is_verified': False, - 'thumbnail': r're:^https?://.*\.jpg$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'duration': 95, 'timestamp': 1703102631.722, 'upload_date': '20231220', @@ -131,12 +131,30 @@ class CHZZKVideoIE(InfoExtractor): 'channel': '강지', 'channel_id': 'b5ed5db484d04faf4d150aedd362f34b', 'channel_is_verified': True, - 'thumbnail': r're:^https?://.*\.jpg$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'duration': 4433, 'timestamp': 1703307460.214, 'upload_date': '20231223', 'view_count': int, }, + }, { + # video_status == 'NONE' but is downloadable + 'url': 'https://chzzk.naver.com/video/6325166', + 'info_dict': { + 'id': '6325166', + 'ext': 'mp4', + 'title': '와이프 숙제빼주기', + 'channel': '이 다', + 'channel_id': '0076a519f147ee9fd0959bf02f9571ca', + 'channel_is_verified': False, + 'view_count': int, + 'duration': 28167, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'timestamp': 1742139216.86, + 'upload_date': '20250316', + 'live_status': 'was_live', + }, + 'params': {'skip_download': 'm3u8'}, }] def _real_extract(self, url): @@ -147,11 +165,7 @@ class CHZZKVideoIE(InfoExtractor): live_status = 'was_live' if video_meta.get('liveOpenDate') else 'not_live' video_status = video_meta.get('vodStatus') - if video_status == 'UPLOAD': - playback = self._parse_json(video_meta['liveRewindPlaybackJson'], video_id) - formats, subtitles = self._extract_m3u8_formats_and_subtitles( - playback['media'][0]['path'], video_id, 'mp4', m3u8_id='hls') - elif video_status == 'ABR_HLS': + if video_status == 'ABR_HLS': formats, subtitles = self._extract_mpd_formats_and_subtitles( f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id, query={ @@ -161,10 +175,17 @@ class CHZZKVideoIE(InfoExtractor): 'cpl': 'en_US', }) else: - self.raise_no_formats( - f'Unknown video status detected: "{video_status}"', expected=True, video_id=video_id) - formats, subtitles = [], {} - live_status = 'post_live' if live_status == 'was_live' else None + fatal = video_status == 'UPLOAD' + playback = self._parse_json(video_meta['liveRewindPlaybackJson'], video_id, fatal=fatal) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + traverse_obj(playback, ('media', 0, 'path')), video_id, 'mp4', m3u8_id='hls', fatal=fatal) + if formats and video_status != 'UPLOAD': + self.write_debug(f'Video found with status: "{video_status}"') + elif not formats: + self.raise_no_formats( + f'Unknown video status detected: "{video_status}"', expected=True, video_id=video_id) + formats, subtitles = [], {} + live_status = 'post_live' if live_status == 'was_live' else None return { 'id': video_id, diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 0119111816..4c1bc4cf47 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -78,6 +78,7 @@ from ..utils import ( parse_iso8601, parse_m3u8_attributes, parse_resolution, + qualities, sanitize_url, smuggle_url, str_or_none, @@ -2177,6 +2178,8 @@ class InfoExtractor: media_url = media.get('URI') if media_url: manifest_url = format_url(media_url) + is_audio = media_type == 'AUDIO' + is_alternate = media.get('DEFAULT') == 'NO' or media.get('AUTOSELECT') == 'NO' formats.extend({ 'format_id': join_nonempty(m3u8_id, group_id, name, idx), 'format_note': name, @@ -2189,7 +2192,11 @@ class InfoExtractor: 'preference': preference, 'quality': quality, 'has_drm': has_drm, - 'vcodec': 'none' if media_type == 'AUDIO' else None, + 'vcodec': 'none' if is_audio else None, + # Alternate audio formats (e.g. audio description) should be deprioritized + 'source_preference': -2 if is_audio and is_alternate else None, + # Save this to assign source_preference based on associated video stream + '_audio_group_id': group_id if is_audio and not is_alternate else None, } for idx in _extract_m3u8_playlist_indices(manifest_url)) def build_stream_name(): @@ -2284,6 +2291,8 @@ class InfoExtractor: # ignore references to rendition groups and treat them # as complete formats. if audio_group_id and codecs and f.get('vcodec') != 'none': + # Save this to determine quality of audio formats that only have a GROUP-ID + f['_audio_group_id'] = audio_group_id audio_group = groups.get(audio_group_id) if audio_group and audio_group[0].get('URI'): # TODO: update acodec for audio only formats with @@ -2306,6 +2315,28 @@ class InfoExtractor: formats.append(http_f) last_stream_inf = {} + + # Some audio-only formats only have a GROUP-ID without any other quality/bitrate/codec info + # Each audio GROUP-ID corresponds with one or more video formats' AUDIO attribute + # For sorting purposes, set source_preference based on the quality of the video formats they are grouped with + # See https://github.com/yt-dlp/yt-dlp/issues/11178 + audio_groups_by_quality = orderedSet(f['_audio_group_id'] for f in sorted( + traverse_obj(formats, lambda _, v: v.get('vcodec') != 'none' and v['_audio_group_id']), + key=lambda x: (x.get('tbr') or 0, x.get('width') or 0))) + audio_quality_map = { + audio_groups_by_quality[0]: 'low', + audio_groups_by_quality[-1]: 'high', + } if len(audio_groups_by_quality) > 1 else None + audio_preference = qualities(audio_groups_by_quality) + for fmt in formats: + audio_group_id = fmt.pop('_audio_group_id', None) + if not audio_quality_map or not audio_group_id or fmt.get('vcodec') != 'none': + continue + # Use source_preference since quality and preference are set by params + fmt['source_preference'] = audio_preference(audio_group_id) + fmt['format_note'] = join_nonempty( + fmt.get('format_note'), audio_quality_map.get(audio_group_id), delim=', ') + return formats, subtitles def _extract_m3u8_vod_duration( diff --git a/yt_dlp/extractor/deezer.py b/yt_dlp/extractor/deezer.py deleted file mode 100644 index 2ca8be5ca0..0000000000 --- a/yt_dlp/extractor/deezer.py +++ /dev/null @@ -1,142 +0,0 @@ -import json - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - orderedSet, -) - - -class DeezerBaseInfoExtractor(InfoExtractor): - def get_data(self, url): - if not self.get_param('test'): - self.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!') - - mobj = self._match_valid_url(url) - data_id = mobj.group('id') - - webpage = self._download_webpage(url, data_id) - geoblocking_msg = self._html_search_regex( - r'
(.*?)
', webpage, 'geoblocking message', - default=None) - if geoblocking_msg is not None: - raise ExtractorError( - f'Deezer said: {geoblocking_msg}', expected=True) - - data_json = self._search_regex( - (r'__DZR_APP_STATE__\s*=\s*({.+?})\s*', - r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n'), - webpage, 'data JSON') - data = json.loads(data_json) - return data_id, webpage, data - - -class DeezerPlaylistIE(DeezerBaseInfoExtractor): - _VALID_URL = r'https?://(?:www\.)?deezer\.com/(../)?playlist/(?P