From 830fcde8d58927d658910e8f71f70e5f51ad2306 Mon Sep 17 00:00:00 2001 From: Junyi Lou <15688661+junyilou@users.noreply.github.com> Date: Sat, 16 Aug 2025 18:12:39 +0800 Subject: [PATCH 1/6] [ie/bilibili] URL Redirect (yt-dlp#13924) --- yt_dlp/extractor/bilibili.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index d00ac63176..b7696e93e2 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -644,7 +644,19 @@ class BiliBiliIE(BilibiliBaseIE): headers['Referer'] = url - initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) + initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id, default=None) + if not initial_state: + query = {} + if groups := re.search(r"[bB][vV](?P[^/?#&]+)", url): + query["bvid"] = f"BV{groups.group("id")}" + elif groups := re.search(r"[aA][vV](?P[^/?#&]+)", url): + query["aid"] = groups.group("id") + if query: + ep_url = traverse_obj( + self._download_json('https://api.bilibili.com/x/web-interface/wbi/view/detail', 'vid', + query=self._sign_wbi(query, 'vid'), headers=headers), ('data', 'View', 'redirect_url')) + if ep_url and BiliBiliBangumiIE._match_valid_url(ep_url): + return BiliBiliBangumiIE(self._downloader).extract(ep_url) if traverse_obj(initial_state, ('error', 'trueCode')) == -403: self.raise_login_required() From ffb6d6a28b1803e2ba6d5cd869b13b4791a0747a Mon Sep 17 00:00:00 2001 From: Junyi Lou <15688661+junyilou@users.noreply.github.com> Date: Sat, 16 Aug 2025 18:17:48 +0800 Subject: [PATCH 2/6] Add fallback ExtractorError --- yt_dlp/extractor/bilibili.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index b7696e93e2..f9f61830c0 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -657,6 +657,7 @@ class BiliBiliIE(BilibiliBaseIE): query=self._sign_wbi(query, 'vid'), headers=headers), ('data', 'View', 'redirect_url')) if ep_url and BiliBiliBangumiIE._match_valid_url(ep_url): return BiliBiliBangumiIE(self._downloader).extract(ep_url) + raise ExtractorError('Unable to extract initial state!') if traverse_obj(initial_state, ('error', 'trueCode')) == -403: self.raise_login_required() From f01d2eba9c0e69be0647556116c9c99feae643be Mon Sep 17 00:00:00 2001 From: Junyi Lou <15688661+junyilou@users.noreply.github.com> Date: Sat, 16 Aug 2025 18:22:57 +0800 Subject: [PATCH 3/6] Fix f-string quote --- yt_dlp/extractor/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index f9f61830c0..8d6139854b 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -648,7 +648,7 @@ class BiliBiliIE(BilibiliBaseIE): if not initial_state: query = {} if groups := re.search(r"[bB][vV](?P[^/?#&]+)", url): - query["bvid"] = f"BV{groups.group("id")}" + query["bvid"] = f"BV{groups.group('id')}" elif groups := re.search(r"[aA][vV](?P[^/?#&]+)", url): query["aid"] = groups.group("id") if query: From 21d8e3d22ee2b66425c475c00bc43222e98dde15 Mon Sep 17 00:00:00 2001 From: Junyi Lou <15688661+junyilou@users.noreply.github.com> Date: Sat, 16 Aug 2025 18:24:42 +0800 Subject: [PATCH 4/6] Fix single quote convention --- yt_dlp/extractor/bilibili.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 8d6139854b..bad566300d 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -647,10 +647,10 @@ class BiliBiliIE(BilibiliBaseIE): initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id, default=None) if not initial_state: query = {} - if groups := re.search(r"[bB][vV](?P[^/?#&]+)", url): - query["bvid"] = f"BV{groups.group('id')}" - elif groups := re.search(r"[aA][vV](?P[^/?#&]+)", url): - query["aid"] = groups.group("id") + if groups := re.search(r'[bB][vV](?P[^/?#&]+)', url): + query['bvid'] = 'BV' + groups.group('id') + elif groups := re.search(r'[aA][vV](?P[^/?#&]+)', url): + query['aid'] = groups.group('id') if query: ep_url = traverse_obj( self._download_json('https://api.bilibili.com/x/web-interface/wbi/view/detail', 'vid', From 82abbe0628fd3c70714749c7c1a2d7c44c2b2f89 Mon Sep 17 00:00:00 2001 From: Junyi Lou <15688661+junyilou@users.noreply.github.com> Date: Sat, 16 Aug 2025 18:27:15 +0800 Subject: [PATCH 5/6] Fix autopep8 --- yt_dlp/extractor/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index bad566300d..7867b7fa4d 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -654,7 +654,7 @@ class BiliBiliIE(BilibiliBaseIE): if query: ep_url = traverse_obj( self._download_json('https://api.bilibili.com/x/web-interface/wbi/view/detail', 'vid', - query=self._sign_wbi(query, 'vid'), headers=headers), ('data', 'View', 'redirect_url')) + query=self._sign_wbi(query, 'vid'), headers=headers), ('data', 'View', 'redirect_url')) if ep_url and BiliBiliBangumiIE._match_valid_url(ep_url): return BiliBiliBangumiIE(self._downloader).extract(ep_url) raise ExtractorError('Unable to extract initial state!') From cc5a9c75f2bbaa00202bc6f7bc8975bc177da2ea Mon Sep 17 00:00:00 2001 From: Junyi Lou <15688661+junyilou@users.noreply.github.com> Date: Sun, 17 Aug 2025 09:32:54 +0800 Subject: [PATCH 6/6] Fix per review --- yt_dlp/extractor/bilibili.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 7867b7fa4d..9e4bdb871c 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -304,7 +304,7 @@ class BilibiliBaseIE(InfoExtractor): class BiliBiliIE(BilibiliBaseIE): - _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/[^/?#]+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/[^/?#]+\?(?:[^#]*&)?bvid=)(?P[aAbB][vV])(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.bilibili.com/video/BV13x41117TL', @@ -636,7 +636,7 @@ class BiliBiliIE(BilibiliBaseIE): }] def _real_extract(self, url): - video_id = self._match_id(url) + video_id, prefix = self._match_valid_url(url).group('id', 'prefix') headers = self.geo_verification_headers() webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers) if not self._match_valid_url(urlh.url): @@ -646,18 +646,19 @@ class BiliBiliIE(BilibiliBaseIE): initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id, default=None) if not initial_state: - query = {} - if groups := re.search(r'[bB][vV](?P[^/?#&]+)', url): - query['bvid'] = 'BV' + groups.group('id') - elif groups := re.search(r'[aA][vV](?P[^/?#&]+)', url): - query['aid'] = groups.group('id') - if query: - ep_url = traverse_obj( - self._download_json('https://api.bilibili.com/x/web-interface/wbi/view/detail', 'vid', - query=self._sign_wbi(query, 'vid'), headers=headers), ('data', 'View', 'redirect_url')) - if ep_url and BiliBiliBangumiIE._match_valid_url(ep_url): - return BiliBiliBangumiIE(self._downloader).extract(ep_url) - raise ExtractorError('Unable to extract initial state!') + query = {'platform': 'web'} + prefix = prefix.upper() + if prefix == 'BV': + query['bvid'] = prefix + video_id + elif prefix == 'AV': + query['aid'] = video_id + detail = self._download_json( + 'https://api.bilibili.com/x/web-interface/wbi/view/detail', video_id, + query=self._sign_wbi(query, video_id), headers=headers) + new_url = traverse_obj(detail, ('data', 'View', 'redirect_url', {url_or_none})) + if new_url and BiliBiliBangumiIE.suitable(new_url): + return self.url_result(new_url, BiliBiliBangumiIE) + raise ExtractorError('Unable to extract initial state') if traverse_obj(initial_state, ('error', 'trueCode')) == -403: self.raise_login_required()