Merge branch 'yt-dlp:master' into streaks

4 months ago · 8504c6907d
parent 1058801662 86ab79e1a5
commit 8504c6907d
10 changed files with 312 additions and 12 deletions
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@ -638,6 +638,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                'img_bipbop_adv_example_fmp4',
                'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
                [{
+                    # 60kbps (bitrate not provided in m3u8); sorted as worst because it's grouped with lowest bitrate video track
                    'format_id': 'aud1-English',
                    'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a1/prog_index.m3u8',
                    'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
@ -645,22 +646,27 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                    'ext': 'mp4',
                    'protocol': 'm3u8_native',
                    'audio_ext': 'mp4',
+                    'source_preference': 0,
                }, {
-                    'format_id': 'aud2-English',
-                    'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a2/prog_index.m3u8',
+                    # 192kbps (bitrate not provided in m3u8)
+                    'format_id': 'aud3-English',
+                    'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a3/prog_index.m3u8',
                    'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
                    'language': 'en',
                    'ext': 'mp4',
                    'protocol': 'm3u8_native',
                    'audio_ext': 'mp4',
+                    'source_preference': 1,
                }, {
-                    'format_id': 'aud3-English',
-                    'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a3/prog_index.m3u8',
+                    # 384kbps (bitrate not provided in m3u8); sorted as best because it's grouped with the highest bitrate video track
+                    'format_id': 'aud2-English',
+                    'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a2/prog_index.m3u8',
                    'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
                    'language': 'en',
                    'ext': 'mp4',
                    'protocol': 'm3u8_native',
                    'audio_ext': 'mp4',
+                    'source_preference': 2,
                }, {
                    'format_id': '530',
                    'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8',
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -1260,6 +1260,7 @@ class TestUtil(unittest.TestCase):
    def test_js_to_json_malformed(self):
        self.assertEqual(js_to_json('42a1'), '42"a1"')
        self.assertEqual(js_to_json('42a-1'), '42"a"-1')
+        self.assertEqual(js_to_json('{a: `${e("")}`}'), '{"a": "\\"e\\"(\\"\\")"}')

    def test_js_to_json_template_literal(self):
        self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"')
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@ -83,6 +83,11 @@ _SIG_TESTS = [
        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
        'AAOAOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7vgpDL0QwbdV06sCIEzpWqMGkFR20CFOS21Tp-7vj_EMu-m37KtXJoOy1',
    ),
+    (
+        'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
+        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+        '0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+    ),
 ]

 _NSIG_TESTS = [
@ -234,6 +239,10 @@ _NSIG_TESTS = [
        'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
        'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
    ),
+    (
+        'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
+        'eWYu5d5YeY_4LyEDc', 'XJQqf-N7Xra3gg',
+    ),
 ]


--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -839,6 +839,7 @@ from .icareus import IcareusIE
 from .ichinanalive import (
    IchinanaLiveClipIE,
    IchinanaLiveIE,
+    IchinanaLiveVODIE,
 )
 from .idolplus import IdolPlusIE
 from .ign import (
@ -2393,6 +2394,12 @@ from .voxmedia import (
    VoxMediaIE,
    VoxMediaVolumeIE,
 )
+from .vrsquare import (
+    VrSquareChannelIE,
+    VrSquareIE,
+    VrSquareSearchIE,
+    VrSquareSectionIE,
+)
 from .vrt import (
    VRTIE,
    DagelijkseKostIE,
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -78,6 +78,7 @@ from ..utils import (
    parse_iso8601,
    parse_m3u8_attributes,
    parse_resolution,
+    qualities,
    sanitize_url,
    smuggle_url,
    str_or_none,
@ -2177,6 +2178,8 @@ class InfoExtractor:
            media_url = media.get('URI')
            if media_url:
                manifest_url = format_url(media_url)
+                is_audio = media_type == 'AUDIO'
+                is_alternate = media.get('DEFAULT') == 'NO' or media.get('AUTOSELECT') == 'NO'
                formats.extend({
                    'format_id': join_nonempty(m3u8_id, group_id, name, idx),
                    'format_note': name,
@ -2189,7 +2192,11 @@ class InfoExtractor:
                    'preference': preference,
                    'quality': quality,
                    'has_drm': has_drm,
-                    'vcodec': 'none' if media_type == 'AUDIO' else None,
+                    'vcodec': 'none' if is_audio else None,
+                    # Alternate audio formats (e.g. audio description) should be deprioritized
+                    'source_preference': -2 if is_audio and is_alternate else None,
+                    # Save this to assign source_preference based on associated video stream
+                    '_audio_group_id': group_id if is_audio and not is_alternate else None,
                } for idx in _extract_m3u8_playlist_indices(manifest_url))

        def build_stream_name():
@ -2284,6 +2291,8 @@ class InfoExtractor:
                    # ignore references to rendition groups and treat them
                    # as complete formats.
                    if audio_group_id and codecs and f.get('vcodec') != 'none':
+                        # Save this to determine quality of audio formats that only have a GROUP-ID
+                        f['_audio_group_id'] = audio_group_id
                        audio_group = groups.get(audio_group_id)
                        if audio_group and audio_group[0].get('URI'):
                            # TODO: update acodec for audio only formats with
@ -2306,6 +2315,28 @@ class InfoExtractor:
                        formats.append(http_f)

                last_stream_inf = {}
+
+        # Some audio-only formats only have a GROUP-ID without any other quality/bitrate/codec info
+        # Each audio GROUP-ID corresponds with one or more video formats' AUDIO attribute
+        # For sorting purposes, set source_preference based on the quality of the video formats they are grouped with
+        # See https://github.com/yt-dlp/yt-dlp/issues/11178
+        audio_groups_by_quality = orderedSet(f['_audio_group_id'] for f in sorted(
+            traverse_obj(formats, lambda _, v: v.get('vcodec') != 'none' and v['_audio_group_id']),
+            key=lambda x: (x.get('tbr') or 0, x.get('width') or 0)))
+        audio_quality_map = {
+            audio_groups_by_quality[0]: 'low',
+            audio_groups_by_quality[-1]: 'high',
+        } if len(audio_groups_by_quality) > 1 else None
+        audio_preference = qualities(audio_groups_by_quality)
+        for fmt in formats:
+            audio_group_id = fmt.pop('_audio_group_id', None)
+            if not audio_quality_map or not audio_group_id or fmt.get('vcodec') != 'none':
+                continue
+            # Use source_preference since quality and preference are set by params
+            fmt['source_preference'] = audio_preference(audio_group_id)
+            fmt['format_note'] = join_nonempty(
+                fmt.get('format_note'), audio_quality_map.get(audio_group_id), delim=', ')
+
        return formats, subtitles

    def _extract_m3u8_vod_duration(
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@ -16,6 +16,7 @@ from ..utils import (
    MEDIA_EXTENSIONS,
    ExtractorError,
    UnsupportedError,
+    base_url,
    determine_ext,
    determine_protocol,
    dict_get,
@ -2531,7 +2532,7 @@ class GenericIE(InfoExtractor):
            elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
                info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
                    doc,
-                    mpd_base_url=full_response.url.rpartition('/')[0],
+                    mpd_base_url=base_url(full_response.url),
                    mpd_url=url)
                info_dict['live_status'] = 'is_live' if doc.get('type') == 'dynamic' else None
                self._extra_manifest_info(info_dict, url)
--- a/yt_dlp/extractor/ichinanalive.py
+++ b/yt_dlp/extractor/ichinanalive.py
@ -1,5 +1,13 @@
+
 from .common import InfoExtractor
-from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    str_or_none,
+    traverse_obj,
+    unified_strdate,
+    url_or_none,
+)


 class IchinanaLiveIE(InfoExtractor):
@ -157,3 +165,51 @@ class IchinanaLiveClipIE(InfoExtractor):
            'description': view_data.get('caption'),
            'upload_date': unified_strdate(str_or_none(view_data.get('createdAt'))),
        }
+
+
+class IchinanaLiveVODIE(InfoExtractor):
+    IE_NAME = '17live:vod'
+    _VALID_URL = r'https?://(?:www\.)?17\.live/ja/vod/[^/?#]+/(?P<id>[^/?#]+)'
+    _TESTS = [{
+        'url': 'https://17.live/ja/vod/27323042/2cf84520-e65e-4b22-891e-1d3a00b0f068',
+        'md5': '3299b930d7457b069639486998a89580',
+        'info_dict': {
+            'id': '2cf84520-e65e-4b22-891e-1d3a00b0f068',
+            'ext': 'mp4',
+            'title': 'md5:b5f8cbf497d54cc6a60eb3b480182f01',
+            'uploader': 'md5:29fb12122ab94b5a8495586e7c3085a5',
+            'uploader_id': '27323042',
+            'channel': '🌟オールナイトニッポン アーカイブ🌟',
+            'channel_id': '2b4f85f1-d61e-429d-a901-68d32bdd8645',
+            'like_count': int,
+            'view_count': int,
+            'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)',
+            'duration': 549,
+            'description': 'md5:116f326579700f00eaaf5581aae1192e',
+            'timestamp': 1741058645,
+            'upload_date': '20250304',
+        },
+    }, {
+        'url': 'https://17.live/ja/vod/27323042/0de11bac-9bea-40b8-9eab-0239a7d88079',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        json_data = self._download_json(f'https://wap-api.17app.co/api/v1/vods/{video_id}', video_id)
+
+        return traverse_obj(json_data, {
+            'id': ('vodID', {str}),
+            'title': ('title', {str}),
+            'formats': ('vodURL', {lambda x: self._extract_m3u8_formats(x, video_id)}),
+            'uploader': ('userInfo', 'displayName', {str}),
+            'uploader_id': ('userInfo', 'roomID', {int}, {str_or_none}),
+            'channel': ('userInfo', 'name', {str}),
+            'channel_id': ('userInfo', 'userID', {str}),
+            'like_count': ('likeCount', {int_or_none}),
+            'view_count': ('viewCount', {int_or_none}),
+            'thumbnail': ('imageURL', {url_or_none}),
+            'duration': ('duration', {int_or_none}),
+            'description': ('description', {str}),
+            'timestamp': ('createdAt', {int_or_none}),
+        })
--- a/yt_dlp/extractor/vrsquare.py
+++ b/yt_dlp/extractor/vrsquare.py
@ -0,0 +1,185 @@
+import itertools
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+    ExtractorError,
+    clean_html,
+    extract_attributes,
+    parse_duration,
+    parse_qs,
+)
+from ..utils.traversal import (
+    find_element,
+    find_elements,
+    traverse_obj,
+)
+
+
+class VrSquareIE(InfoExtractor):
+    IE_NAME = 'vrsquare'
+    IE_DESC = 'VR SQUARE'
+
+    _BASE_URL = 'https://livr.jp'
+    _VALID_URL = r'https?://livr\.jp/contents/(?P<id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://livr.jp/contents/P470896661',
+        'info_dict': {
+            'id': 'P470896661',
+            'ext': 'mp4',
+            'title': 'そこ曲がったら、櫻坂？ ７年間お疲れ様！菅井友香の卒業を祝う会！前半 2022年11月6日放送分',
+            'description': 'md5:523726dc835aa8014dfe1e2b38d36cd1',
+            'duration': 1515.0,
+            'tags': 'count:2',
+            'thumbnail': r're:https?://media\.livr\.jp/vod/img/.+\.jpg',
+        },
+    }, {
+        'url': 'https://livr.jp/contents/P589523973',
+        'info_dict': {
+            'id': 'P589523973',
+            'ext': 'mp4',
+            'title': '薄闇に仰ぐ しだれ桜の妖艶',
+            'description': 'md5:a042f517b2cbb4ed6746707afec4d306',
+            'duration': 1084.0,
+            'tags': list,
+            'thumbnail': r're:https?://media\.livr\.jp/vod/img/.+\.jpg',
+        },
+        'skip': 'Paid video',
+    }, {
+        'url': 'https://livr.jp/contents/P316939908',
+        'info_dict': {
+            'id': 'P316939908',
+            'ext': 'mp4',
+            'title': '2024年5月16日（木） 「今日は誰に恋をする？」公演 小栗有以 生誕祭',
+            'description': 'md5:2110bdcf947f28bd7d06ec420e51b619',
+            'duration': 8559.0,
+            'tags': list,
+            'thumbnail': r're:https?://media\.livr\.jp/vod/img/.+\.jpg',
+        },
+        'skip': 'Premium channel subscribers only',
+    }, {
+        # Accessible only in the VR SQUARE app
+        'url': 'https://livr.jp/contents/P126481458',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        status = self._download_json(
+            f'{self._BASE_URL}/webApi/contentsStatus/{video_id}',
+            video_id, 'Checking contents status', fatal=False)
+        if traverse_obj(status, 'result_code') == '40407':
+            self.raise_login_required('Unable to access this video')
+
+        try:
+            web_api = self._download_json(
+                f'{self._BASE_URL}/webApi/play/url/{video_id}', video_id)
+        except ExtractorError as e:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 500:
+                raise ExtractorError('VR SQUARE app-only videos are not supported', expected=True)
+            raise
+
+        return {
+            'id': video_id,
+            'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
+            'description': self._html_search_meta('description', webpage),
+            'formats': self._extract_m3u8_formats(traverse_obj(web_api, (
+                'urls', ..., 'url', any)), video_id, 'mp4', fatal=False),
+            'thumbnail': self._html_search_meta('og:image', webpage),
+            **traverse_obj(webpage, {
+                'duration': ({find_element(cls='layout-product-data-time')}, {parse_duration}),
+                'tags': ({find_elements(cls='search-tag')}, ..., {clean_html}),
+            }),
+        }
+
+
+class VrSquarePlaylistBaseIE(InfoExtractor):
+    _BASE_URL = 'https://livr.jp'
+
+    def _fetch_vids(self, source, keys=()):
+        for url_path in traverse_obj(source, (
+            *keys, {find_elements(cls='video', html=True)}, ...,
+            {extract_attributes}, 'data-url', {str}, filter),
+        ):
+            yield self.url_result(
+                f'{self._BASE_URL}/contents/{url_path.removeprefix("/contents/")}', VrSquareIE)
+
+    def _entries(self, path, display_id, query=None):
+        for page in itertools.count(1):
+            ajax = self._download_json(
+                f'{self._BASE_URL}{path}', display_id,
+                f'Downloading playlist JSON page {page}',
+                query={'p': page, **(query or {})})
+            yield from self._fetch_vids(ajax, ('contents_render_list', ...))
+            if not traverse_obj(ajax, (('has_next', 'hasNext'), {bool}, any)):
+                break
+
+
+class VrSquareChannelIE(VrSquarePlaylistBaseIE):
+    IE_NAME = 'vrsquare:channel'
+
+    _VALID_URL = r'https?://livr\.jp/channel/(?P<id>\w+)'
+    _TESTS = [{
+        'url': 'https://livr.jp/channel/H372648599',
+        'info_dict': {
+            'id': 'H372648599',
+            'title': 'AKB48＋チャンネル',
+        },
+        'playlist_mincount': 502,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
+
+        return self.playlist_result(
+            self._entries(f'/ajax/channel/{playlist_id}', playlist_id),
+            playlist_id, self._html_search_meta('og:title', webpage))
+
+
+class VrSquareSearchIE(VrSquarePlaylistBaseIE):
+    IE_NAME = 'vrsquare:search'
+
+    _VALID_URL = r'https?://livr\.jp/web-search/?\?(?:[^#]+&)?w=[^#]+'
+    _TESTS = [{
+        'url': 'https://livr.jp/web-search?w=%23%E5%B0%8F%E6%A0%97%E6%9C%89%E4%BB%A5',
+        'info_dict': {
+            'id': '#小栗有以',
+        },
+        'playlist_mincount': 60,
+    }]
+
+    def _real_extract(self, url):
+        search_query = parse_qs(url)['w'][0]
+
+        return self.playlist_result(
+            self._entries('/ajax/web-search', search_query, {'w': search_query}), search_query)
+
+
+class VrSquareSectionIE(VrSquarePlaylistBaseIE):
+    IE_NAME = 'vrsquare:section'
+
+    _VALID_URL = r'https?://livr\.jp/(?:category|headline)/(?P<id>\w+)'
+    _TESTS = [{
+        'url': 'https://livr.jp/category/C133936275',
+        'info_dict': {
+            'id': 'C133936275',
+            'title': 'そこ曲がったら、櫻坂？VR',
+        },
+        'playlist_mincount': 308,
+    }, {
+        'url': 'https://livr.jp/headline/A296449604',
+        'info_dict': {
+            'id': 'A296449604',
+            'title': 'AKB48 アフターVR',
+        },
+        'playlist_mincount': 22,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
+
+        return self.playlist_result(
+            self._fetch_vids(webpage), playlist_id, self._html_search_meta('og:title', webpage))
--- a/yt_dlp/extractor/youtube/_video.py
+++ b/yt_dlp/extractor/youtube/_video.py
@ -2176,10 +2176,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        """Returns tuple of strings: variable assignment code, variable name, variable value code"""
        return self._search_regex(
            r'''(?x)
-                \'use\s+strict\';\s*
+                (?P<q1>["\'])use\s+strict(?P=q1);\s*
                (?P<code>
                    var\s+(?P<name>[a-zA-Z0-9_$]+)\s*=\s*
-                    (?P<value>"(?:[^"\\]|\\.)+"\.split\("[^"]+"\))
+                    (?P<value>
+                        (?P<q2>["\'])(?:(?!(?P=q2)).|\\.)+(?P=q2)
+                        \.split\((?P<q3>["\'])(?:(?!(?P=q3)).)+(?P=q3)\)
+                    )
                )[;,]
            ''', jscode, 'global variable', group=('code', 'name', 'value'), default=(None, None, None))

@ -2187,7 +2190,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        global_var, varname, _ = self._extract_player_js_global_var(full_code)
        if global_var:
            self.write_debug(f'Prepending n function code with global array variable "{varname}"')
-            code = global_var + ', ' + code
+            code = global_var + '; ' + code
        else:
            self.write_debug('No global array variable found in player JS')
        return argnames, re.sub(
@ -2196,7 +2199,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):

    def _extract_n_function_code(self, video_id, player_url):
        player_id = self._extract_player_info(player_url)
-        func_code = self.cache.load('youtube-nsig', player_id, min_ver='2025.03.21')
+        func_code = self.cache.load('youtube-nsig', player_id, min_ver='2025.03.24')
        jscode = func_code or self._load_player(video_id, player_url)
        jsi = JSInterpreter(jscode)

--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@ -2767,7 +2767,8 @@ def js_to_json(code, vars={}, *, strict=False):
    def template_substitute(match):
        evaluated = js_to_json(match.group(1), vars, strict=strict)
        if evaluated[0] == '"':
-            return json.loads(evaluated)
+            with contextlib.suppress(json.JSONDecodeError):
+                return json.loads(evaluated)
        return evaluated

    def fix_kv(m):