From 830fcde8d58927d658910e8f71f70e5f51ad2306 Mon Sep 17 00:00:00 2001 From: Junyi Lou <15688661+junyilou@users.noreply.github.com> Date: Sat, 16 Aug 2025 18:12:39 +0800 Subject: [PATCH 01/11] [ie/bilibili] URL Redirect (yt-dlp#13924) --- yt_dlp/extractor/bilibili.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index d00ac63176..b7696e93e2 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -644,7 +644,19 @@ class BiliBiliIE(BilibiliBaseIE): headers['Referer'] = url - initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) + initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id, default=None) + if not initial_state: + query = {} + if groups := re.search(r"[bB][vV](?P[^/?#&]+)", url): + query["bvid"] = f"BV{groups.group("id")}" + elif groups := re.search(r"[aA][vV](?P[^/?#&]+)", url): + query["aid"] = groups.group("id") + if query: + ep_url = traverse_obj( + self._download_json('https://api.bilibili.com/x/web-interface/wbi/view/detail', 'vid', + query=self._sign_wbi(query, 'vid'), headers=headers), ('data', 'View', 'redirect_url')) + if ep_url and BiliBiliBangumiIE._match_valid_url(ep_url): + return BiliBiliBangumiIE(self._downloader).extract(ep_url) if traverse_obj(initial_state, ('error', 'trueCode')) == -403: self.raise_login_required() From ffb6d6a28b1803e2ba6d5cd869b13b4791a0747a Mon Sep 17 00:00:00 2001 From: Junyi Lou <15688661+junyilou@users.noreply.github.com> Date: Sat, 16 Aug 2025 18:17:48 +0800 Subject: [PATCH 02/11] Add fallback ExtractorError --- yt_dlp/extractor/bilibili.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index b7696e93e2..f9f61830c0 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -657,6 +657,7 @@ class BiliBiliIE(BilibiliBaseIE): query=self._sign_wbi(query, 'vid'), headers=headers), ('data', 'View', 'redirect_url')) if ep_url and BiliBiliBangumiIE._match_valid_url(ep_url): return BiliBiliBangumiIE(self._downloader).extract(ep_url) + raise ExtractorError('Unable to extract initial state!') if traverse_obj(initial_state, ('error', 'trueCode')) == -403: self.raise_login_required() From f01d2eba9c0e69be0647556116c9c99feae643be Mon Sep 17 00:00:00 2001 From: Junyi Lou <15688661+junyilou@users.noreply.github.com> Date: Sat, 16 Aug 2025 18:22:57 +0800 Subject: [PATCH 03/11] Fix f-string quote --- yt_dlp/extractor/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index f9f61830c0..8d6139854b 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -648,7 +648,7 @@ class BiliBiliIE(BilibiliBaseIE): if not initial_state: query = {} if groups := re.search(r"[bB][vV](?P[^/?#&]+)", url): - query["bvid"] = f"BV{groups.group("id")}" + query["bvid"] = f"BV{groups.group('id')}" elif groups := re.search(r"[aA][vV](?P[^/?#&]+)", url): query["aid"] = groups.group("id") if query: From 21d8e3d22ee2b66425c475c00bc43222e98dde15 Mon Sep 17 00:00:00 2001 From: Junyi Lou <15688661+junyilou@users.noreply.github.com> Date: Sat, 16 Aug 2025 18:24:42 +0800 Subject: [PATCH 04/11] Fix single quote convention --- yt_dlp/extractor/bilibili.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 8d6139854b..bad566300d 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -647,10 +647,10 @@ class BiliBiliIE(BilibiliBaseIE): initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id, default=None) if not initial_state: query = {} - if groups := re.search(r"[bB][vV](?P[^/?#&]+)", url): - query["bvid"] = f"BV{groups.group('id')}" - elif groups := re.search(r"[aA][vV](?P[^/?#&]+)", url): - query["aid"] = groups.group("id") + if groups := re.search(r'[bB][vV](?P[^/?#&]+)', url): + query['bvid'] = 'BV' + groups.group('id') + elif groups := re.search(r'[aA][vV](?P[^/?#&]+)', url): + query['aid'] = groups.group('id') if query: ep_url = traverse_obj( self._download_json('https://api.bilibili.com/x/web-interface/wbi/view/detail', 'vid', From 82abbe0628fd3c70714749c7c1a2d7c44c2b2f89 Mon Sep 17 00:00:00 2001 From: Junyi Lou <15688661+junyilou@users.noreply.github.com> Date: Sat, 16 Aug 2025 18:27:15 +0800 Subject: [PATCH 05/11] Fix autopep8 --- yt_dlp/extractor/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index bad566300d..7867b7fa4d 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -654,7 +654,7 @@ class BiliBiliIE(BilibiliBaseIE): if query: ep_url = traverse_obj( self._download_json('https://api.bilibili.com/x/web-interface/wbi/view/detail', 'vid', - query=self._sign_wbi(query, 'vid'), headers=headers), ('data', 'View', 'redirect_url')) + query=self._sign_wbi(query, 'vid'), headers=headers), ('data', 'View', 'redirect_url')) if ep_url and BiliBiliBangumiIE._match_valid_url(ep_url): return BiliBiliBangumiIE(self._downloader).extract(ep_url) raise ExtractorError('Unable to extract initial state!') From cc5a9c75f2bbaa00202bc6f7bc8975bc177da2ea Mon Sep 17 00:00:00 2001 From: Junyi Lou <15688661+junyilou@users.noreply.github.com> Date: Sun, 17 Aug 2025 09:32:54 +0800 Subject: [PATCH 06/11] Fix per review --- yt_dlp/extractor/bilibili.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 7867b7fa4d..9e4bdb871c 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -304,7 +304,7 @@ class BilibiliBaseIE(InfoExtractor): class BiliBiliIE(BilibiliBaseIE): - _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/[^/?#]+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/[^/?#]+\?(?:[^#]*&)?bvid=)(?P[aAbB][vV])(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.bilibili.com/video/BV13x41117TL', @@ -636,7 +636,7 @@ class BiliBiliIE(BilibiliBaseIE): }] def _real_extract(self, url): - video_id = self._match_id(url) + video_id, prefix = self._match_valid_url(url).group('id', 'prefix') headers = self.geo_verification_headers() webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers) if not self._match_valid_url(urlh.url): @@ -646,18 +646,19 @@ class BiliBiliIE(BilibiliBaseIE): initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id, default=None) if not initial_state: - query = {} - if groups := re.search(r'[bB][vV](?P[^/?#&]+)', url): - query['bvid'] = 'BV' + groups.group('id') - elif groups := re.search(r'[aA][vV](?P[^/?#&]+)', url): - query['aid'] = groups.group('id') - if query: - ep_url = traverse_obj( - self._download_json('https://api.bilibili.com/x/web-interface/wbi/view/detail', 'vid', - query=self._sign_wbi(query, 'vid'), headers=headers), ('data', 'View', 'redirect_url')) - if ep_url and BiliBiliBangumiIE._match_valid_url(ep_url): - return BiliBiliBangumiIE(self._downloader).extract(ep_url) - raise ExtractorError('Unable to extract initial state!') + query = {'platform': 'web'} + prefix = prefix.upper() + if prefix == 'BV': + query['bvid'] = prefix + video_id + elif prefix == 'AV': + query['aid'] = video_id + detail = self._download_json( + 'https://api.bilibili.com/x/web-interface/wbi/view/detail', video_id, + query=self._sign_wbi(query, video_id), headers=headers) + new_url = traverse_obj(detail, ('data', 'View', 'redirect_url', {url_or_none})) + if new_url and BiliBiliBangumiIE.suitable(new_url): + return self.url_result(new_url, BiliBiliBangumiIE) + raise ExtractorError('Unable to extract initial state') if traverse_obj(initial_state, ('error', 'trueCode')) == -403: self.raise_login_required() From 9e84a71aa05c18383f9160a556c6aee572f0ae2f Mon Sep 17 00:00:00 2001 From: Junyi Lou <15688661+junyilou@users.noreply.github.com> Date: Sun, 17 Aug 2025 20:24:13 +0800 Subject: [PATCH 07/11] Fixes #12476 --- yt_dlp/extractor/bilibili.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 9e4bdb871c..0f88af9657 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -646,6 +646,9 @@ class BiliBiliIE(BilibiliBaseIE): initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id, default=None) if not initial_state: + if traverse_obj(self._search_json( + r'\s*window\._riskdata_\s*=', webpage, 'risk data', video_id, default=None), 'v_voucher'): + raise ExtractorError('You are downloading too frequently, please wait and try later', expected=True) query = {'platform': 'web'} prefix = prefix.upper() if prefix == 'BV': From 54fa1c4b42c5f6346b8b7639ecad7f108763141a Mon Sep 17 00:00:00 2001 From: Junyi Lou <15688661+junyilou@users.noreply.github.com> Date: Sun, 17 Aug 2025 21:36:39 +0800 Subject: [PATCH 08/11] Add redirecting test and note --- yt_dlp/extractor/bilibili.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 0f88af9657..4d880daf8b 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -580,7 +580,27 @@ class BiliBiliIE(BilibiliBaseIE): 'duration': 1183.957, 'timestamp': 1571648124, 'upload_date': '20191021', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + 'thumbnail': r're:https?://.*\.(jpg|jpeg|png)$', + }, + }, { + 'note': 'redirect from aid to bangumi link via redirect_url', + 'url': 'https://www.bilibili.com/video/av114868162141203', + 'info_dict': { + 'id': '1933368', + 'title': 'PV 引爆变革的起点', + 'ext': 'mp4', + 'duration': 63.139, + 'series': '时光代理人', + 'series_id': '5183', + 'season': '第三季', + 'season_number': 4, + 'season_id': '105212', + 'episode': '引爆变革的起点', + 'episode_number': 1, + 'episode_id': '1933368', + 'timestamp': 1752849001, + 'upload_date': '20250718', + 'thumbnail': r're:https?://.*\.(jpg|jpeg|png)$', }, }, { 'note': 'video has subtitles, which requires login', @@ -657,6 +677,7 @@ class BiliBiliIE(BilibiliBaseIE): query['aid'] = video_id detail = self._download_json( 'https://api.bilibili.com/x/web-interface/wbi/view/detail', video_id, + note='Downloading redirection URL', errnote='Failed to download redirection URL', query=self._sign_wbi(query, video_id), headers=headers) new_url = traverse_obj(detail, ('data', 'View', 'redirect_url', {url_or_none})) if new_url and BiliBiliBangumiIE.suitable(new_url): From 34631c50f1657241b42e4493a98e99d708cab832 Mon Sep 17 00:00:00 2001 From: Junyi Lou <15688661+junyilou@users.noreply.github.com> Date: Mon, 18 Aug 2025 13:48:04 +0800 Subject: [PATCH 09/11] Fix per suggestion Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/bilibili.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 4d880daf8b..d5666418b3 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -666,9 +666,8 @@ class BiliBiliIE(BilibiliBaseIE): initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id, default=None) if not initial_state: - if traverse_obj(self._search_json( - r'\s*window\._riskdata_\s*=', webpage, 'risk data', video_id, default=None), 'v_voucher'): - raise ExtractorError('You are downloading too frequently, please wait and try later', expected=True) + if self._search_json(r'\bwindow\._riskdata_\s*=', webpage, 'risk', video_id, default={}).get('v_voucher'): + raise ExtractorError('You have exceeded the rate limit. Try again later', expected=True) query = {'platform': 'web'} prefix = prefix.upper() if prefix == 'BV': From 7d68e8e5c00dc3549beaa5d13c755a64146f4d11 Mon Sep 17 00:00:00 2001 From: Junyi Lou <15688661+junyilou@users.noreply.github.com> Date: Mon, 18 Aug 2025 13:54:47 +0800 Subject: [PATCH 10/11] Add BVID redirect test --- yt_dlp/extractor/bilibili.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index d5666418b3..e603b05908 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -602,6 +602,26 @@ class BiliBiliIE(BilibiliBaseIE): 'upload_date': '20250718', 'thumbnail': r're:https?://.*\.(jpg|jpeg|png)$', }, + }, { + 'note': 'redirect from bvid to bangumi link via redirect_url', + 'url': 'https://www.bilibili.com/video/BV11s411S7zX', + 'info_dict': { + 'id': '28637', + 'title': '12 波澜和动荡的日常结局', + 'ext': 'mp4', + 'duration': 1383.184, + 'series': '路人女主的养成方法', + 'series_id': '1037', + 'season': '第一季', + 'season_number': 1, + 'season_id': '1512', + 'episode': '波澜和动荡的日常结局', + 'episode_number': 12, + 'episode_id': '28637', + 'timestamp': 1427392200, + 'upload_date': '20150326', + 'thumbnail': r're:https?://.*\.(jpg|jpeg|png)$', + }, }, { 'note': 'video has subtitles, which requires login', 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh', From a1ba59111737ca1390e4ab3da1c7ac0b7bbb39ab Mon Sep 17 00:00:00 2001 From: Junyi Lou <15688661+junyilou@users.noreply.github.com> Date: Mon, 18 Aug 2025 13:59:05 +0800 Subject: [PATCH 11/11] Reuse existing bvid test --- yt_dlp/extractor/bilibili.py | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index e603b05908..cd9bf6b165 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -563,7 +563,7 @@ class BiliBiliIE(BilibiliBaseIE): }, }], }, { - 'note': '301 redirect to bangumi link', + 'note': 'redirect from bvid to bangumi link via redirect_url', 'url': 'https://www.bilibili.com/video/BV1TE411f7f1', 'info_dict': { 'id': '288525', @@ -602,26 +602,6 @@ class BiliBiliIE(BilibiliBaseIE): 'upload_date': '20250718', 'thumbnail': r're:https?://.*\.(jpg|jpeg|png)$', }, - }, { - 'note': 'redirect from bvid to bangumi link via redirect_url', - 'url': 'https://www.bilibili.com/video/BV11s411S7zX', - 'info_dict': { - 'id': '28637', - 'title': '12 波澜和动荡的日常结局', - 'ext': 'mp4', - 'duration': 1383.184, - 'series': '路人女主的养成方法', - 'series_id': '1037', - 'season': '第一季', - 'season_number': 1, - 'season_id': '1512', - 'episode': '波澜和动荡的日常结局', - 'episode_number': 12, - 'episode_id': '28637', - 'timestamp': 1427392200, - 'upload_date': '20150326', - 'thumbnail': r're:https?://.*\.(jpg|jpeg|png)$', - }, }, { 'note': 'video has subtitles, which requires login', 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',