From 8e3f8065af1415caeff788c5c430703dd0d8f576 Mon Sep 17 00:00:00 2001 From: "Arseniy D." <110495618+AzartX47@users.noreply.github.com> Date: Fri, 15 Aug 2025 17:07:35 -1000 Subject: [PATCH] [ie/weibo] Fix extractors (#14012) Closes #14012 Authored by: AzartX47, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/weibo.py | 43 +++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py index 420ac38299..d5210850cc 100644 --- a/yt_dlp/extractor/weibo.py +++ b/yt_dlp/extractor/weibo.py @@ -52,13 +52,16 @@ class WeiboBaseIE(InfoExtractor): '_rand': random.random(), }) - def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs): - # XXX: Always fatal; _download_webpage_handle only returns False (not a tuple) on error - webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs) + def _weibo_download_json(self, url, video_id, note='Downloading JSON metadata', data=None, headers=None, query=None): + headers = { + 'Referer': 'https://weibo.com/', + **(headers or {}), + } + webpage, urlh = self._download_webpage_handle(url, video_id, note=note, data=data, headers=headers, query=query) if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com': self._update_visitor_cookies(urlh.url, video_id) - webpage = self._download_webpage(url, video_id, *args, fatal=fatal, note=note, **kwargs) - return self._parse_json(webpage, video_id, fatal=fatal) + webpage = self._download_webpage(url, video_id, note=note, data=data, headers=headers, query=query) + return self._parse_json(webpage, video_id) def _extract_formats(self, video_info): media_info = traverse_obj(video_info, ('page_info', 'media_info')) @@ -189,7 +192,8 @@ class WeiboIE(WeiboBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - meta = self._weibo_download_json(f'https://weibo.com/ajax/statuses/show?id={video_id}', video_id) + meta = self._weibo_download_json( + 'https://weibo.com/ajax/statuses/show', video_id, query={'id': video_id}) mix_media_info = traverse_obj(meta, ('mix_media_info', 'items', ...)) if not mix_media_info: return self._parse_video_info(meta) @@ -205,7 +209,7 @@ class WeiboIE(WeiboBaseIE): class WeiboVideoIE(WeiboBaseIE): - _VALID_URL = r'https?://(?:www\.)?weibo\.com/tv/show/(?P\d+:\d+)' + _VALID_URL = r'https?://(?:www\.)?weibo\.com/tv/show/(?P\d+:(?:[\da-f]{32}|\d{16,}))' _TESTS = [{ 'url': 'https://weibo.com/tv/show/1034:4797699866951785?from=old_pc_videoshow', 'info_dict': { @@ -227,6 +231,27 @@ class WeiboVideoIE(WeiboBaseIE): 'repost_count': int, '_old_archive_ids': ['weibomobile 4797700463137878'], }, + }, { + 'url': 'https://weibo.com/tv/show/1034:633c288cc043d0ca7808030f1157da64', + 'info_dict': { + 'id': '4189191225395228', + 'ext': 'mp4', + 'display_id': 'FBqgOmDxO', + 'title': '柴犬柴犬的秒拍视频', + 'alt_title': '柴犬柴犬的秒拍视频', + 'description': '午睡当然是要甜甜蜜蜜的啦![坏笑] Instagram:shibainu.gaku http://t.cn/RHbmjzW \u200B\u200B\u200B', + 'uploader': '柴犬柴犬', + 'uploader_id': '5926682210', + 'uploader_url': 'https://weibo.com/u/5926682210', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'duration': 53, + 'thumbnail': 'https://wx1.sinaimg.cn/large/006t5KMygy1fmu31fsqbej30hs0hstav.jpg', + 'timestamp': 1514264429, + 'upload_date': '20171226', + '_old_archive_ids': ['weibomobile 4189191225395228'], + }, }] def _real_extract(self, url): @@ -234,8 +259,8 @@ class WeiboVideoIE(WeiboBaseIE): post_data = f'data={{"Component_Play_Playinfo":{{"oid":"{video_id}"}}}}'.encode() video_info = self._weibo_download_json( - f'https://weibo.com/tv/api/component?page=%2Ftv%2Fshow%2F{video_id.replace(":", "%3A")}', - video_id, headers={'Referer': url}, data=post_data)['data']['Component_Play_Playinfo'] + 'https://weibo.com/tv/api/component', video_id, data=post_data, headers={'Referer': url}, + query={'page': f'/tv/show/{video_id}'})['data']['Component_Play_Playinfo'] return self.url_result(f'https://weibo.com/0/{video_info["mid"]}', WeiboIE)