diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index fcebc4511..717932448 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -1530,38 +1530,44 @@ class BiliBiliSearchAllIE(SearchInfoExtractor): def _search_results(self, query): headers = self.geo_verification_headers() + page_size = 50 live_room_prefix = 'https://live.bilibili.com/' bili_user_prefix = 'https://space.bilibili.com/' if not self._get_cookies('https://api.bilibili.com').get('buvid3'): self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc') for page_num in itertools.count(1): + query_params = { + 'keyword': query, + 'page': page_num, + 'dynamic_offset': (page_num - 1) * page_size, + 'platform': 'pc', + } + api_url = r'https://api.bilibili.com/x/web-interface/search/all/v2' try: search_all_result = self._download_json( - r'https://api.bilibili.com/x/web-interface/search/all/v2', - video_id=query, query={ - 'keyword': query, - 'page': page_num, - }, headers=headers) + api_url, video_id=query, query=query_params, headers=headers) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 412: raise ExtractorError('Request is blocked by server (-412).', expected=True) + raise status_code = search_all_result['code'] if status_code == -400: raise ExtractorError('Invalid request (-400).', expected=True) result_list = search_all_result['data'].get('result') - if result_list is None: + if not result_list: self.write_debug(f'Response: {search_all_result}') raise ExtractorError(f'Result not found in the response ({status_code}).', expected=True) for result_type_dict in result_list: for result_data in result_type_dict['data']: - if result_data['type'] == 'video': + result_type = result_data.get('type') + if result_type == 'video': yield self.url_result(result_data['arcurl']) - elif result_data['type'] == 'live_room': + elif result_type == 'live_room': yield self.url_result(live_room_prefix + str(result_data['roomid'])) - elif result_data['type'] in ['media_ft', 'media_bangumi']: + elif result_type in ['media_ft', 'media_bangumi']: yield self.url_result(result_data['url']) - elif result_data['type'] == 'bili_user': + elif result_type == 'bili_user': yield self.url_result(bili_user_prefix + str(result_data['mid'])) @@ -2315,7 +2321,7 @@ class BiliBiliSearchPageIE(BilibiliBaseIE): _VALID_URL = r'https?://search\.bilibili\.com/(?Pall|video|bangumi|pgc|live|upuser).*' _TESTS = [{ 'url': r'https://search.bilibili.com/all?keyword=yt+-+dlp+%E4%B8%8B%E8%BD%BD%E5%99%A8', - 'playlist_count': 20, + 'playlist_count': 36, 'info_dict': { 'id': 'yt - dlp 下载器', 'title': 'yt - dlp 下载器', @@ -2330,7 +2336,7 @@ class BiliBiliSearchPageIE(BilibiliBaseIE): 'skip': 'geo-restricted', }, { 'url': r'https://search.bilibili.com/video?keyword=%E8%AE%A9%E5%AD%90%E5%BC%B9%E9%A3%9E&from_source=webtop_search&spm_id_from=333.1007&search_source=5&order=dm&duration=4&tids=181&page=3&o=72', - 'playlist_mincount': 5, + 'playlist_count': 4, 'info_dict': { 'id': '让子弹飞', 'title': '让子弹飞', @@ -2338,15 +2344,20 @@ class BiliBiliSearchPageIE(BilibiliBaseIE): }] def _real_extract(self, url): + live_room_prefix = 'https://live.bilibili.com/' + bili_user_prefix = 'https://space.bilibili.com/' headers = self.geo_verification_headers() entries = [] params = parse_qs(url) - query = {} + query = { + 'platform': 'pc', + 'page_size': 36, + } if not self._get_cookies('https://api.bilibili.com').get('buvid3'): self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc') search_type = self._match_valid_url(url).group('type') raw_playlist_id = traverse_obj(params, ('keyword', 0)) - if raw_playlist_id is None: + if not raw_playlist_id: raise ExtractorError('Please specify the keyword to search for!', expected=True) playlist_id = urllib.parse.unquote_plus(raw_playlist_id) search_type_mapping = { @@ -2355,6 +2366,7 @@ class BiliBiliSearchPageIE(BilibiliBaseIE): 'pgc': 'media_ft', 'live': 'live_room', 'upuser': 'bili_user', + 'all': 'video', # 'all' search calls video search after page 1 } valid_params = [ 'keyword', @@ -2362,7 +2374,7 @@ class BiliBiliSearchPageIE(BilibiliBaseIE): 'order', 'duration', 'tids', - 'search_type', + 'search_type', # Only when searching for live_room or live_user 'order_sort', 'user_type', ] @@ -2370,11 +2382,17 @@ class BiliBiliSearchPageIE(BilibiliBaseIE): param_value = traverse_obj(params, (valid_param, 0)) if param_value is not None: query[valid_param] = param_value - live_room_prefix = 'https://live.bilibili.com/' - bili_user_prefix = 'https://space.bilibili.com/' + page_num = int(query.get('page', 1)) + param_offset = int_or_none(traverse_obj(params, ('o', 0))) + if page_num == 1: + query['dynamic_offset'] = 0 + elif param_offset is not None: + query['dynamic_offset'] = param_offset + else: + query['dynamic_offset'] = query['page_size'] * (page_num - 1) if search_type == 'live' and traverse_obj(params, ('search_type', 0)) == 'live_user': raise ExtractorError('Live users are not downloadable!', expected=True) - if search_type == 'all': + if search_type == 'all' and page_num == 1: try: search_all_result = self._download_json( r'https://api.bilibili.com/x/web-interface/search/all/v2', @@ -2387,18 +2405,20 @@ class BiliBiliSearchPageIE(BilibiliBaseIE): if status_code == -400: raise ExtractorError('Invalid request (-400).', expected=True) result_list = search_all_result['data'].get('result') - if result_list is None: + if not result_list: + self.write_debug(f'Response: {search_all_result}') raise ExtractorError(f'Result not found in the response ({status_code}).', expected=True) for result_type_dict in result_list: for result_data in result_type_dict['data']: - if result_data['type'] == 'video': + result_type = result_data.get('type') + if result_type == 'video': entries.append(self.url_result(result_data['arcurl'])) - elif result_data['type'] == 'live_room': + elif result_type == 'live_room': entries.append(self.url_result(live_room_prefix + str(result_data['roomid']))) - elif result_data['type'] in ['media_ft', 'media_bangumi']: + elif result_type in ['media_ft', 'media_bangumi']: entries.append(self.url_result(result_data['url'])) - elif result_data['type'] == 'bili_user': + elif result_type == 'bili_user': entries.append(self.url_result(bili_user_prefix + str(result_data['mid']))) else: try: @@ -2416,18 +2436,20 @@ class BiliBiliSearchPageIE(BilibiliBaseIE): if status_code == -400: raise ExtractorError('Invalid request (-400).') result_list = search_type_result['data'].get('result') - if result_list is None: + if not result_list: + self.write_debug(f'Response: {search_type_result}') raise ExtractorError( f'Result not found in the response ({status_code}). ' 'You might want to try a VPN or a proxy server (with --proxy)', expected=True) for result_data in result_list: - if result_data['type'] == 'video': + result_type = result_data.get('type') + if result_type == 'video': entries.append(self.url_result(result_data['arcurl'])) - elif result_data['type'] == 'live_room': + elif result_type == 'live_room': entries.append(self.url_result(live_room_prefix + str(result_data['roomid']))) - elif result_data['type'] in ['media_ft', 'media_bangumi']: + elif result_type in ['media_ft', 'media_bangumi']: entries.append(self.url_result(result_data['url'])) - elif result_data['type'] == 'bili_user': + elif result_type == 'bili_user': entries.append(self.url_result(bili_user_prefix + str(result_data['mid']))) return self.playlist_result(entries, playlist_id=playlist_id, playlist_title=playlist_id)