diff --git a/youtube_dl/extractor/weibo.py b/youtube_dl/extractor/weibo.py index cbe0c3228..3cb4d71a6 100644 --- a/youtube_dl/extractor/weibo.py +++ b/youtube_dl/extractor/weibo.py @@ -8,7 +8,8 @@ import random import re from ..compat import ( - compat_urlparse, + compat_parse_qs, + compat_str, ) from ..utils import ( js_to_json, @@ -31,70 +32,71 @@ class WeiboIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) # to get Referer url for genvisitor - webpage, urlh = self._download_webpage_handle(url, video_id, note="first visit the page") + webpage, urlh = self._download_webpage_handle(url, video_id) visitor_url = urlh.geturl() - headers = { - 'Referer': visitor_url - } - fp = { - "os": "2", - "browser": "Gecko57,0,0,0", - "fonts": "undefined", - "screenInfo": "1440*900*24", - "plugins": "" - } - data = urlencode_postdata({ - "cb": "gen_callback", - "fp": json.dumps(fp), - }) - - genvisitor_url = 'https://passport.weibo.com/visitor/genvisitor' - webpage = self._download_webpage(genvisitor_url, video_id, data=data, headers=headers, note="gen visitor") - - p = strip_jsonp(webpage) - i1 = p.find('{') - i2 = p.rfind('}') - j = p[i1:i2 + 1] # get JSON object - d = json.loads(j) - tid = d["data"]["tid"] - cnfd = "%03d" % d["data"]["confidence"] - - query = { - 'a': 'incarnate', - 't': tid, - 'w': 2, - 'c': cnfd, - 'cb': 'cross_domain', - 'from': 'weibo', - '_rand': random.random() - } - gencallback_url = "https://passport.weibo.com/visitor/visitor" - self._download_webpage(gencallback_url, video_id, note="gen callback", query=query) + if 'passport.weibo.com' in visitor_url: + # first visit + visitor_data = self._download_json( + 'https://passport.weibo.com/visitor/genvisitor', video_id, + note='Generating first-visit data', + transform_source=strip_jsonp, + headers={'Referer': visitor_url}, + data=urlencode_postdata({ + 'cb': 'gen_callback', + 'fp': json.dumps({ + 'os': '2', + 'browser': 'Gecko57,0,0,0', + 'fonts': 'undefined', + 'screenInfo': '1440*900*24', + 'plugins': '', + }), + })) + + tid = visitor_data['data']['tid'] + cnfd = '%03d' % visitor_data['data']['confidence'] + + self._download_webpage( + 'https://passport.weibo.com/visitor/visitor', video_id, + note='Running first-visit callback', + query={ + 'a': 'incarnate', + 't': tid, + 'w': 2, + 'c': cnfd, + 'cb': 'cross_domain', + 'from': 'weibo', + '_rand': random.random(), + }) + + webpage = self._download_webpage( + url, video_id, note='Revisiting webpage') + + title = self._html_search_regex( + r'