From 8d0ea5f95532bfe940d6cc74520b449af3334e46 Mon Sep 17 00:00:00 2001 From: Bepis <36346617+bbepis@users.noreply.github.com> Date: Wed, 30 Dec 2020 19:25:24 +1100 Subject: [PATCH] [Youtube] Improve comment API requests co-authored by bbepis --- youtube_dlc/extractor/youtube.py | 50 +++++++++++++++----------------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index 265c29a5a..e5e36075e 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -2485,17 +2485,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): try: ncd = next(search_dict(yt_initial_data, 'nextContinuationData')) - continuations = [(ncd['continuation'], ncd['clickTrackingParams'])] + continuations = [ncd['continuation']] # Handle videos where comments have been disabled entirely except StopIteration: continuations = [] - def get_continuation(continuation, itct, session_token, replies=False): + def get_continuation(continuation, session_token, replies=False): query = { - 'pbj': 1, - 'ctoken': continuation, - 'continuation': continuation, - 'itct': itct, + 'pbj': 1, + 'ctoken': continuation, } if replies: query['action_get_comment_replies'] = 1 @@ -2523,23 +2521,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor): response_code = handle.getcode() if (response_code == 200): return self._parse_json(content, video_id) - if (response_code == 413): # Sometimes google makes continuations that are too big to be accepted by themselves. Grade A engineering - # self.to_screen(json.dumps(query)) - # self.to_screen('Google API rate limit detected; waiting 30 seconds before continuing') - # time.sleep(30) - # continue + if (response_code == 413): return None raise ExtractorError('Unexpected HTTP error code: %s' % response_code) first_continuation = True while continuations: continuation, itct = continuations.pop() - comment_response = get_continuation(continuation, itct, xsrf_token) + comment_response = get_continuation(continuation, xsrf_token) if not comment_response: continue if list(search_dict(comment_response, 'externalErrorMessage')): raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage'))) + if 'continuationContents' not in comment_response['response']: + # Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?) + continue + # not sure if this actually helps + if 'xsrf_token' in comment_response: + xsrf_token = comment_response['xsrf_token'] + item_section = comment_response['response']['continuationContents']['itemSectionContinuation'] if first_continuation: expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', '')) @@ -2554,7 +2555,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_comments.append({ 'id': comment['commentId'], 'text': ''.join([c['text'] for c in comment['contentText']['runs']]), - 'time_text': comment['publishedTimeText']['runs'][0]['text'], + 'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]), 'author': comment.get('authorText', {}).get('simpleText', ''), 'votes': comment.get('voteCount', {}).get('simpleText', '0'), 'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'], @@ -2563,14 +2564,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if 'replies' not in meta_comment['commentThreadRenderer']: continue - reply_continuation = meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations'][0]['nextContinuationData'] - continuation = reply_continuation['continuation'] - itct = reply_continuation['clickTrackingParams'] - while True: + reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']] + while reply_continuations: time.sleep(1) - replies_data = get_continuation(continuation, itct, xsrf_token, True) + continuation = reply_continuations.pop() + replies_data = get_continuation(continuation, xsrf_token, True) if not replies_data or 'continuationContents' not in replies_data[1]['response']: - break + continue if self._downloader.params.get('verbose', False): self.to_screen('[debug] Comments downloaded (chain %s) %s of ~%s' % (comment['commentId'], len(video_comments), expected_video_comment_count)) @@ -2580,25 +2580,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_comments.append({ 'id': reply_comment['commentId'], 'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]), - 'time_text': reply_comment['publishedTimeText']['runs'][0]['text'], + 'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]), 'author': reply_comment.get('authorText', {}).get('simpleText', ''), 'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'), 'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'], 'parent': comment['commentId'] }) if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0: - break + continue - continuation = reply_comment_meta['continuations'][0]['nextContinuationData']['continuation'] - itct = reply_comment_meta['continuations'][0]['nextContinuationData']['clickTrackingParams'] + reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']] self.to_screen('Comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count)) if 'continuations' in item_section: - new_continuations = [ - (ncd['nextContinuationData']['continuation'], ncd['nextContinuationData']['clickTrackingParams']) - for ncd in item_section['continuations']] - continuations += new_continuations + continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']] time.sleep(1) self.to_screen('Total comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))