#86 [youtube_live_chat] Use POST API (Closes #82)

YouTube has removed support for the old GET based live chat API, and it's now returning 404 Authored by siikamiika
5 years ago · 273762c8d0
parent 7620cd46c3
commit 273762c8d0
3 changed files with 45 additions and 38 deletions
--- a/youtube_dlc/downloader/fragment.py
+++ b/youtube_dlc/downloader/fragment.py
@ -95,11 +95,12 @@ class FragmentFD(FileDownloader):
        frag_index_stream.write(json.dumps({'downloader': downloader}))
        frag_index_stream.close()
-    def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
+    def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None):
        fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
        fragment_info_dict = {
            'url': frag_url,
            'http_headers': headers or info_dict.get('http_headers'),
            'request_data': request_data,
        }
        success = ctx['dl'].download(fragment_filename, fragment_info_dict)
        if not success:
--- a/youtube_dlc/downloader/http.py
+++ b/youtube_dlc/downloader/http.py
@ -27,6 +27,7 @@ from ..utils import (
 class HttpFD(FileDownloader):
    def real_download(self, filename, info_dict):
        url = info_dict['url']
        request_data = info_dict.get('request_data', None)
        class DownloadContext(dict):
            __getattr__ = dict.get
@ -101,7 +102,7 @@ class HttpFD(FileDownloader):
                range_end = ctx.data_len - 1
            has_range = range_start is not None
            ctx.has_range = has_range
-            request = sanitized_Request(url, None, headers)
+            request = sanitized_Request(url, request_data, headers)
            if has_range:
                set_range(request, range_start, range_end)
            # Establish connection
@ -152,7 +153,7 @@ class HttpFD(FileDownloader):
                    try:
                        # Open the connection again without the range header
                        ctx.data = self.ydl.urlopen(
-                            sanitized_Request(url, None, headers))
+                            sanitized_Request(url, request_data, headers))
                        content_length = ctx.data.info()['Content-Length']
                    except (compat_urllib_error.HTTPError, ) as err:
                        if err.code < 500 or err.code >= 600:
--- a/youtube_dlc/downloader/youtube_live_chat.py
+++ b/youtube_dlc/downloader/youtube_live_chat.py
@ -1,11 +1,13 @@
 from __future__ import division, unicode_literals
 import re
 import json
 from .fragment import FragmentFD
 from ..compat import compat_urllib_error
-from ..utils import try_get
+from ..utils import (
    try_get,
    RegexNotFoundError,
 )
 from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
@ -27,40 +29,28 @@ class YoutubeLiveChatReplayFD(FragmentFD):
            'total_frags': None,
        }
-        def dl_fragment(url):
+        ie = YT_BaseIE(self.ydl)
            headers = info_dict.get('http_headers', {})
            return self._download_fragment(ctx, url, info_dict, headers)
-        def parse_yt_initial_data(data):
+        def dl_fragment(url, data=None, headers=None):
-            patterns = (
+            http_headers = info_dict.get('http_headers', {})
-                r'%s\\s*%s' % (YT_BaseIE._YT_INITIAL_DATA_RE, YT_BaseIE._YT_INITIAL_BOUNDARY_RE),
+            if headers:
-                r'%s' % YT_BaseIE._YT_INITIAL_DATA_RE)
+                http_headers = http_headers.copy()
-            data = data.decode('utf-8', 'replace')
+                http_headers.update(headers)
-            for patt in patterns:
+            return self._download_fragment(ctx, url, info_dict, http_headers, data)
                try:
                    raw_json = re.search(patt, data).group(1)
                    return json.loads(raw_json)
                except AttributeError:
                    continue
-        def download_and_parse_fragment(url, frag_index):
+        def download_and_parse_fragment(url, frag_index, request_data):
            count = 0
            while count <= fragment_retries:
                try:
-                    success, raw_fragment = dl_fragment(url)
+                    success, raw_fragment = dl_fragment(url, request_data, {'content-type': 'application/json'})
                    if not success:
                        return False, None, None
-                    data = parse_yt_initial_data(raw_fragment)
+                    try:
                        data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
                    except RegexNotFoundError:
                        data = None
                    if not data:
-                        raw_data = json.loads(raw_fragment)
+                        data = json.loads(raw_fragment)
                        # sometimes youtube replies with a list
                        if not isinstance(raw_data, list):
                            raw_data = [raw_data]
                        try:
                            data = next(item['response'] for item in raw_data if 'response' in item)
                        except StopIteration:
                            data = {}
                    live_chat_continuation = try_get(
                        data,
                        lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
@ -93,22 +83,37 @@ class YoutubeLiveChatReplayFD(FragmentFD):
            'https://www.youtube.com/watch?v={}'.format(video_id))
        if not success:
            return False
-        data = parse_yt_initial_data(raw_fragment)
+        try:
            data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
        except RegexNotFoundError:
            return False
        continuation_id = try_get(
            data,
            lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
        # no data yet but required to call _append_fragment
        self._append_fragment(ctx, b'')
        ytcfg = ie._extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
        if not ytcfg:
            return False
        api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
        innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
        if not api_key or not innertube_context:
            return False
        url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
        frag_index = offset = 0
        while continuation_id is not None:
            frag_index += 1
-            url = ''.join((
+            request_data = {
-                'https://www.youtube.com/live_chat_replay',
+                'context': innertube_context,
-                '/get_live_chat_replay' if frag_index > 1 else '',
+                'continuation': continuation_id,
-                '?continuation=%s' % continuation_id,
+            }
-                '&playerOffsetMs=%d&hidden=false&pbj=1' % max(offset - 5000, 0) if frag_index > 1 else ''))
+            if frag_index > 1:
-            success, continuation_id, offset = download_and_parse_fragment(url, frag_index)
+                request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
            success, continuation_id, offset = download_and_parse_fragment(
                url, frag_index, json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n')
            if not success:
                return False
            if test: