From b9c979461b244713bf42691a5bc02834e2ba4b2c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 24 Mar 2025 16:18:51 -0500 Subject: [PATCH 1/7] [ie/youtube] Fix signature and nsig extraction for player `363db69b` (#12725) Closes #12724 Authored by: bashonly --- test/test_youtube_signature.py | 9 +++++++++ yt_dlp/extractor/youtube/_video.py | 7 +++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 45dc9113bd..453caacd65 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -83,6 +83,11 @@ _SIG_TESTS = [ '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', 'AAOAOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7vgpDL0QwbdV06sCIEzpWqMGkFR20CFOS21Tp-7vj_EMu-m37KtXJoOy1', ), + ( + 'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js', + '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', + '0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', + ), ] _NSIG_TESTS = [ @@ -234,6 +239,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js', 'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA', ), + ( + 'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js', + 'eWYu5d5YeY_4LyEDc', 'XJQqf-N7Xra3gg', + ), ] diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index c773ba2f11..ee93a599a4 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -2176,10 +2176,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): """Returns tuple of strings: variable assignment code, variable name, variable value code""" return self._search_regex( r'''(?x) - \'use\s+strict\';\s* + (?P["\'])use\s+strict(?P=q1);\s* (?P var\s+(?P[a-zA-Z0-9_$]+)\s*=\s* - (?P"(?:[^"\\]|\\.)+"\.split\("[^"]+"\)) + (?P + (?P["\'])(?:(?!(?P=q2)).|\\.)+(?P=q2) + \.split\((?P["\'])(?:(?!(?P=q3)).)+(?P=q3)\) + ) )[;,] ''', jscode, 'global variable', group=('code', 'name', 'value'), default=(None, None, None)) From 4054a2b623bd1e277b49d2e9abc3d112a4b1c7be Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 24 Mar 2025 16:22:25 -0500 Subject: [PATCH 2/7] [ie/youtube] Fix PhantomJS nsig fallback (#12728) Also fixes the NSigDeno plugin Closes #12724 Authored by: bashonly --- yt_dlp/extractor/youtube/_video.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index ee93a599a4..b8cc72ab1d 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -2190,7 +2190,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): global_var, varname, _ = self._extract_player_js_global_var(full_code) if global_var: self.write_debug(f'Prepending n function code with global array variable "{varname}"') - code = global_var + ', ' + code + code = global_var + '; ' + code else: self.write_debug('No global array variable found in player JS') return argnames, re.sub( @@ -2199,7 +2199,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_n_function_code(self, video_id, player_url): player_id = self._extract_player_info(player_url) - func_code = self.cache.load('youtube-nsig', player_id, min_ver='2025.03.21') + func_code = self.cache.load('youtube-nsig', player_id, min_ver='2025.03.24') jscode = func_code or self._load_player(video_id, player_url) jsi = JSInterpreter(jscode) From b7fbb5a0a16a8e8d3e29c29e26ebed677d0d6ea3 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Tue, 25 Mar 2025 06:28:09 +0900 Subject: [PATCH 3/7] [ie/vrsquare] Add extractors (#12515) Authored by: doe1080 --- yt_dlp/extractor/_extractors.py | 6 ++ yt_dlp/extractor/vrsquare.py | 185 ++++++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100644 yt_dlp/extractor/vrsquare.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index c56ec9df6a..eb914d2eb7 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2392,6 +2392,12 @@ from .voxmedia import ( VoxMediaIE, VoxMediaVolumeIE, ) +from .vrsquare import ( + VrSquareChannelIE, + VrSquareIE, + VrSquareSearchIE, + VrSquareSectionIE, +) from .vrt import ( VRTIE, DagelijkseKostIE, diff --git a/yt_dlp/extractor/vrsquare.py b/yt_dlp/extractor/vrsquare.py new file mode 100644 index 0000000000..9e8740b421 --- /dev/null +++ b/yt_dlp/extractor/vrsquare.py @@ -0,0 +1,185 @@ +import itertools + +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + clean_html, + extract_attributes, + parse_duration, + parse_qs, +) +from ..utils.traversal import ( + find_element, + find_elements, + traverse_obj, +) + + +class VrSquareIE(InfoExtractor): + IE_NAME = 'vrsquare' + IE_DESC = 'VR SQUARE' + + _BASE_URL = 'https://livr.jp' + _VALID_URL = r'https?://livr\.jp/contents/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://livr.jp/contents/P470896661', + 'info_dict': { + 'id': 'P470896661', + 'ext': 'mp4', + 'title': 'そこ曲がったら、櫻坂? 7年間お疲れ様!菅井友香の卒業を祝う会!前半 2022年11月6日放送分', + 'description': 'md5:523726dc835aa8014dfe1e2b38d36cd1', + 'duration': 1515.0, + 'tags': 'count:2', + 'thumbnail': r're:https?://media\.livr\.jp/vod/img/.+\.jpg', + }, + }, { + 'url': 'https://livr.jp/contents/P589523973', + 'info_dict': { + 'id': 'P589523973', + 'ext': 'mp4', + 'title': '薄闇に仰ぐ しだれ桜の妖艶', + 'description': 'md5:a042f517b2cbb4ed6746707afec4d306', + 'duration': 1084.0, + 'tags': list, + 'thumbnail': r're:https?://media\.livr\.jp/vod/img/.+\.jpg', + }, + 'skip': 'Paid video', + }, { + 'url': 'https://livr.jp/contents/P316939908', + 'info_dict': { + 'id': 'P316939908', + 'ext': 'mp4', + 'title': '2024年5月16日(木) 「今日は誰に恋をする?」公演 小栗有以 生誕祭', + 'description': 'md5:2110bdcf947f28bd7d06ec420e51b619', + 'duration': 8559.0, + 'tags': list, + 'thumbnail': r're:https?://media\.livr\.jp/vod/img/.+\.jpg', + }, + 'skip': 'Premium channel subscribers only', + }, { + # Accessible only in the VR SQUARE app + 'url': 'https://livr.jp/contents/P126481458', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + status = self._download_json( + f'{self._BASE_URL}/webApi/contentsStatus/{video_id}', + video_id, 'Checking contents status', fatal=False) + if traverse_obj(status, 'result_code') == '40407': + self.raise_login_required('Unable to access this video') + + try: + web_api = self._download_json( + f'{self._BASE_URL}/webApi/play/url/{video_id}', video_id) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 500: + raise ExtractorError('VR SQUARE app-only videos are not supported', expected=True) + raise + + return { + 'id': video_id, + 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage), + 'description': self._html_search_meta('description', webpage), + 'formats': self._extract_m3u8_formats(traverse_obj(web_api, ( + 'urls', ..., 'url', any)), video_id, 'mp4', fatal=False), + 'thumbnail': self._html_search_meta('og:image', webpage), + **traverse_obj(webpage, { + 'duration': ({find_element(cls='layout-product-data-time')}, {parse_duration}), + 'tags': ({find_elements(cls='search-tag')}, ..., {clean_html}), + }), + } + + +class VrSquarePlaylistBaseIE(InfoExtractor): + _BASE_URL = 'https://livr.jp' + + def _fetch_vids(self, source, keys=()): + for url_path in traverse_obj(source, ( + *keys, {find_elements(cls='video', html=True)}, ..., + {extract_attributes}, 'data-url', {str}, filter), + ): + yield self.url_result( + f'{self._BASE_URL}/contents/{url_path.removeprefix("/contents/")}', VrSquareIE) + + def _entries(self, path, display_id, query=None): + for page in itertools.count(1): + ajax = self._download_json( + f'{self._BASE_URL}{path}', display_id, + f'Downloading playlist JSON page {page}', + query={'p': page, **(query or {})}) + yield from self._fetch_vids(ajax, ('contents_render_list', ...)) + if not traverse_obj(ajax, (('has_next', 'hasNext'), {bool}, any)): + break + + +class VrSquareChannelIE(VrSquarePlaylistBaseIE): + IE_NAME = 'vrsquare:channel' + + _VALID_URL = r'https?://livr\.jp/channel/(?P\w+)' + _TESTS = [{ + 'url': 'https://livr.jp/channel/H372648599', + 'info_dict': { + 'id': 'H372648599', + 'title': 'AKB48+チャンネル', + }, + 'playlist_mincount': 502, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + return self.playlist_result( + self._entries(f'/ajax/channel/{playlist_id}', playlist_id), + playlist_id, self._html_search_meta('og:title', webpage)) + + +class VrSquareSearchIE(VrSquarePlaylistBaseIE): + IE_NAME = 'vrsquare:search' + + _VALID_URL = r'https?://livr\.jp/web-search/?\?(?:[^#]+&)?w=[^#]+' + _TESTS = [{ + 'url': 'https://livr.jp/web-search?w=%23%E5%B0%8F%E6%A0%97%E6%9C%89%E4%BB%A5', + 'info_dict': { + 'id': '#小栗有以', + }, + 'playlist_mincount': 60, + }] + + def _real_extract(self, url): + search_query = parse_qs(url)['w'][0] + + return self.playlist_result( + self._entries('/ajax/web-search', search_query, {'w': search_query}), search_query) + + +class VrSquareSectionIE(VrSquarePlaylistBaseIE): + IE_NAME = 'vrsquare:section' + + _VALID_URL = r'https?://livr\.jp/(?:category|headline)/(?P\w+)' + _TESTS = [{ + 'url': 'https://livr.jp/category/C133936275', + 'info_dict': { + 'id': 'C133936275', + 'title': 'そこ曲がったら、櫻坂?VR', + }, + 'playlist_mincount': 308, + }, { + 'url': 'https://livr.jp/headline/A296449604', + 'info_dict': { + 'id': 'A296449604', + 'title': 'AKB48 アフターVR', + }, + 'playlist_mincount': 22, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + return self.playlist_result( + self._fetch_vids(webpage), playlist_id, self._html_search_meta('og:title', webpage)) From 9491b44032b330e05bd5eaa546187005d1e8538e Mon Sep 17 00:00:00 2001 From: sepro Date: Mon, 24 Mar 2025 22:28:47 +0100 Subject: [PATCH 4/7] [utils] `js_to_json`: Make function less fatal (#12715) Authored by: seproDev --- test/test_utils.py | 1 + yt_dlp/utils/_utils.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 42dc7f937e..e60ceed8fd 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1260,6 +1260,7 @@ class TestUtil(unittest.TestCase): def test_js_to_json_malformed(self): self.assertEqual(js_to_json('42a1'), '42"a1"') self.assertEqual(js_to_json('42a-1'), '42"a"-1') + self.assertEqual(js_to_json('{a: `${e("")}`}'), '{"a": "\\"e\\"(\\"\\")"}') def test_js_to_json_template_literal(self): self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"') diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 0140acaa3a..24525560ef 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -2767,7 +2767,8 @@ def js_to_json(code, vars={}, *, strict=False): def template_substitute(match): evaluated = js_to_json(match.group(1), vars, strict=strict) if evaluated[0] == '"': - return json.loads(evaluated) + with contextlib.suppress(json.JSONDecodeError): + return json.loads(evaluated) return evaluated def fix_kv(m): From 5086d4aed6aeb3908c62f49e2d8f74cc0cb05110 Mon Sep 17 00:00:00 2001 From: fireattack Date: Tue, 25 Mar 2025 06:24:09 +0800 Subject: [PATCH 5/7] [ie/generic] Fix MPD base URL parsing (#12718) Closes #12709 Authored by: fireattack --- yt_dlp/extractor/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 67c224e502..c144069b3f 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -16,6 +16,7 @@ from ..utils import ( MEDIA_EXTENSIONS, ExtractorError, UnsupportedError, + base_url, determine_ext, determine_protocol, dict_get, @@ -2531,7 +2532,7 @@ class GenericIE(InfoExtractor): elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles( doc, - mpd_base_url=full_response.url.rpartition('/')[0], + mpd_base_url=base_url(full_response.url), mpd_url=url) info_dict['live_status'] = 'is_live' if doc.get('type') == 'dynamic' else None self._extra_manifest_info(info_dict, url) From 3396eb50dcd245b49c0f4aecd6e80ec914095d16 Mon Sep 17 00:00:00 2001 From: Subrat Lima <74418100+subrat-lima@users.noreply.github.com> Date: Tue, 25 Mar 2025 03:56:45 +0530 Subject: [PATCH 6/7] [ie/17live:vod] Add extractor (#12723) Closes #12570 Authored by: subrat-lima --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/ichinanalive.py | 58 +++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index eb914d2eb7..28d410fa86 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -839,6 +839,7 @@ from .icareus import IcareusIE from .ichinanalive import ( IchinanaLiveClipIE, IchinanaLiveIE, + IchinanaLiveVODIE, ) from .idolplus import IdolPlusIE from .ign import ( diff --git a/yt_dlp/extractor/ichinanalive.py b/yt_dlp/extractor/ichinanalive.py index a37cfe77bd..475d33593d 100644 --- a/yt_dlp/extractor/ichinanalive.py +++ b/yt_dlp/extractor/ichinanalive.py @@ -1,5 +1,13 @@ + from .common import InfoExtractor -from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate +from ..utils import ( + ExtractorError, + int_or_none, + str_or_none, + traverse_obj, + unified_strdate, + url_or_none, +) class IchinanaLiveIE(InfoExtractor): @@ -157,3 +165,51 @@ class IchinanaLiveClipIE(InfoExtractor): 'description': view_data.get('caption'), 'upload_date': unified_strdate(str_or_none(view_data.get('createdAt'))), } + + +class IchinanaLiveVODIE(InfoExtractor): + IE_NAME = '17live:vod' + _VALID_URL = r'https?://(?:www\.)?17\.live/ja/vod/[^/?#]+/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://17.live/ja/vod/27323042/2cf84520-e65e-4b22-891e-1d3a00b0f068', + 'md5': '3299b930d7457b069639486998a89580', + 'info_dict': { + 'id': '2cf84520-e65e-4b22-891e-1d3a00b0f068', + 'ext': 'mp4', + 'title': 'md5:b5f8cbf497d54cc6a60eb3b480182f01', + 'uploader': 'md5:29fb12122ab94b5a8495586e7c3085a5', + 'uploader_id': '27323042', + 'channel': '🌟オールナイトニッポン アーカイブ🌟', + 'channel_id': '2b4f85f1-d61e-429d-a901-68d32bdd8645', + 'like_count': int, + 'view_count': int, + 'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)', + 'duration': 549, + 'description': 'md5:116f326579700f00eaaf5581aae1192e', + 'timestamp': 1741058645, + 'upload_date': '20250304', + }, + }, { + 'url': 'https://17.live/ja/vod/27323042/0de11bac-9bea-40b8-9eab-0239a7d88079', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + json_data = self._download_json(f'https://wap-api.17app.co/api/v1/vods/{video_id}', video_id) + + return traverse_obj(json_data, { + 'id': ('vodID', {str}), + 'title': ('title', {str}), + 'formats': ('vodURL', {lambda x: self._extract_m3u8_formats(x, video_id)}), + 'uploader': ('userInfo', 'displayName', {str}), + 'uploader_id': ('userInfo', 'roomID', {int}, {str_or_none}), + 'channel': ('userInfo', 'name', {str}), + 'channel_id': ('userInfo', 'userID', {str}), + 'like_count': ('likeCount', {int_or_none}), + 'view_count': ('viewCount', {int_or_none}), + 'thumbnail': ('imageURL', {url_or_none}), + 'duration': ('duration', {int_or_none}), + 'description': ('description', {str}), + 'timestamp': ('createdAt', {int_or_none}), + }) From 86ab79e1a5182092321102adf6ca34195803b878 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 24 Mar 2025 17:38:22 -0500 Subject: [PATCH 7/7] [ie] Fix sorting of HLS audio formats by `GROUP-ID` (#12714) Closes #11178 Authored by: bashonly --- test/test_InfoExtractor.py | 14 ++++++++++---- yt_dlp/extractor/common.py | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 54f35ef552..c6ff6209a8 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -638,6 +638,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'img_bipbop_adv_example_fmp4', 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', [{ + # 60kbps (bitrate not provided in m3u8); sorted as worst because it's grouped with lowest bitrate video track 'format_id': 'aud1-English', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a1/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', @@ -645,22 +646,27 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'ext': 'mp4', 'protocol': 'm3u8_native', 'audio_ext': 'mp4', + 'source_preference': 0, }, { - 'format_id': 'aud2-English', - 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a2/prog_index.m3u8', + # 192kbps (bitrate not provided in m3u8) + 'format_id': 'aud3-English', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a3/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'language': 'en', 'ext': 'mp4', 'protocol': 'm3u8_native', 'audio_ext': 'mp4', + 'source_preference': 1, }, { - 'format_id': 'aud3-English', - 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a3/prog_index.m3u8', + # 384kbps (bitrate not provided in m3u8); sorted as best because it's grouped with the highest bitrate video track + 'format_id': 'aud2-English', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a2/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'language': 'en', 'ext': 'mp4', 'protocol': 'm3u8_native', 'audio_ext': 'mp4', + 'source_preference': 2, }, { 'format_id': '530', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8', diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 0119111816..4c1bc4cf47 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -78,6 +78,7 @@ from ..utils import ( parse_iso8601, parse_m3u8_attributes, parse_resolution, + qualities, sanitize_url, smuggle_url, str_or_none, @@ -2177,6 +2178,8 @@ class InfoExtractor: media_url = media.get('URI') if media_url: manifest_url = format_url(media_url) + is_audio = media_type == 'AUDIO' + is_alternate = media.get('DEFAULT') == 'NO' or media.get('AUTOSELECT') == 'NO' formats.extend({ 'format_id': join_nonempty(m3u8_id, group_id, name, idx), 'format_note': name, @@ -2189,7 +2192,11 @@ class InfoExtractor: 'preference': preference, 'quality': quality, 'has_drm': has_drm, - 'vcodec': 'none' if media_type == 'AUDIO' else None, + 'vcodec': 'none' if is_audio else None, + # Alternate audio formats (e.g. audio description) should be deprioritized + 'source_preference': -2 if is_audio and is_alternate else None, + # Save this to assign source_preference based on associated video stream + '_audio_group_id': group_id if is_audio and not is_alternate else None, } for idx in _extract_m3u8_playlist_indices(manifest_url)) def build_stream_name(): @@ -2284,6 +2291,8 @@ class InfoExtractor: # ignore references to rendition groups and treat them # as complete formats. if audio_group_id and codecs and f.get('vcodec') != 'none': + # Save this to determine quality of audio formats that only have a GROUP-ID + f['_audio_group_id'] = audio_group_id audio_group = groups.get(audio_group_id) if audio_group and audio_group[0].get('URI'): # TODO: update acodec for audio only formats with @@ -2306,6 +2315,28 @@ class InfoExtractor: formats.append(http_f) last_stream_inf = {} + + # Some audio-only formats only have a GROUP-ID without any other quality/bitrate/codec info + # Each audio GROUP-ID corresponds with one or more video formats' AUDIO attribute + # For sorting purposes, set source_preference based on the quality of the video formats they are grouped with + # See https://github.com/yt-dlp/yt-dlp/issues/11178 + audio_groups_by_quality = orderedSet(f['_audio_group_id'] for f in sorted( + traverse_obj(formats, lambda _, v: v.get('vcodec') != 'none' and v['_audio_group_id']), + key=lambda x: (x.get('tbr') or 0, x.get('width') or 0))) + audio_quality_map = { + audio_groups_by_quality[0]: 'low', + audio_groups_by_quality[-1]: 'high', + } if len(audio_groups_by_quality) > 1 else None + audio_preference = qualities(audio_groups_by_quality) + for fmt in formats: + audio_group_id = fmt.pop('_audio_group_id', None) + if not audio_quality_map or not audio_group_id or fmt.get('vcodec') != 'none': + continue + # Use source_preference since quality and preference are set by params + fmt['source_preference'] = audio_preference(audio_group_id) + fmt['format_note'] = join_nonempty( + fmt.get('format_note'), audio_quality_map.get(audio_group_id), delim=', ') + return formats, subtitles def _extract_m3u8_vod_duration(