|
|
@ -384,6 +384,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|
|
|
def _real_initialize(self):
|
|
|
|
def _real_initialize(self):
|
|
|
|
self._initialize_pref()
|
|
|
|
self._initialize_pref()
|
|
|
|
self._initialize_consent()
|
|
|
|
self._initialize_consent()
|
|
|
|
|
|
|
|
self._check_login_required()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _check_login_required(self):
|
|
|
|
if (self._LOGIN_REQUIRED
|
|
|
|
if (self._LOGIN_REQUIRED
|
|
|
|
and self.get_param('cookiefile') is None
|
|
|
|
and self.get_param('cookiefile') is None
|
|
|
|
and self.get_param('cookiesfrombrowser') is None):
|
|
|
|
and self.get_param('cookiesfrombrowser') is None):
|
|
|
@ -563,6 +566,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|
|
|
headers['X-Origin'] = origin
|
|
|
|
headers['X-Origin'] = origin
|
|
|
|
return {h: v for h, v in headers.items() if v is not None}
|
|
|
|
return {h: v for h, v in headers.items() if v is not None}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _download_ytcfg(self, client, video_id):
|
|
|
|
|
|
|
|
url = {
|
|
|
|
|
|
|
|
'web': 'https://www.youtube.com',
|
|
|
|
|
|
|
|
'web_music': 'https://music.youtube.com',
|
|
|
|
|
|
|
|
'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
|
|
|
|
|
|
|
|
}.get(client)
|
|
|
|
|
|
|
|
if not url:
|
|
|
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
webpage = self._download_webpage(
|
|
|
|
|
|
|
|
url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
|
|
|
|
|
|
|
|
return self.extract_ytcfg(video_id, webpage) or {}
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
@staticmethod
|
|
|
|
def _build_api_continuation_query(continuation, ctp=None):
|
|
|
|
def _build_api_continuation_query(continuation, ctp=None):
|
|
|
|
query = {
|
|
|
|
query = {
|
|
|
@ -728,6 +743,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|
|
|
return None
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def _extract_time_text(self, renderer, *path_list):
|
|
|
|
def _extract_time_text(self, renderer, *path_list):
|
|
|
|
|
|
|
|
"""@returns (timestamp, time_text)"""
|
|
|
|
text = self._get_text(renderer, *path_list) or ''
|
|
|
|
text = self._get_text(renderer, *path_list) or ''
|
|
|
|
dt = self.extract_relative_time(text)
|
|
|
|
dt = self.extract_relative_time(text)
|
|
|
|
timestamp = None
|
|
|
|
timestamp = None
|
|
|
@ -2959,16 +2975,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
|
|
|
|
|
|
|
|
return orderedSet(requested_clients)
|
|
|
|
return orderedSet(requested_clients)
|
|
|
|
|
|
|
|
|
|
|
|
def _extract_player_ytcfg(self, client, video_id):
|
|
|
|
|
|
|
|
url = {
|
|
|
|
|
|
|
|
'web_music': 'https://music.youtube.com',
|
|
|
|
|
|
|
|
'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
|
|
|
|
|
|
|
|
}.get(client)
|
|
|
|
|
|
|
|
if not url:
|
|
|
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip())
|
|
|
|
|
|
|
|
return self.extract_ytcfg(video_id, webpage) or {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
|
|
|
|
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
|
|
|
|
initial_pr = None
|
|
|
|
initial_pr = None
|
|
|
|
if webpage:
|
|
|
|
if webpage:
|
|
|
@ -3005,8 +3011,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
while clients:
|
|
|
|
while clients:
|
|
|
|
client, base_client, variant = _split_innertube_client(clients.pop())
|
|
|
|
client, base_client, variant = _split_innertube_client(clients.pop())
|
|
|
|
player_ytcfg = master_ytcfg if client == 'web' else {}
|
|
|
|
player_ytcfg = master_ytcfg if client == 'web' else {}
|
|
|
|
if 'configs' not in self._configuration_arg('player_skip'):
|
|
|
|
if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
|
|
|
|
player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
|
|
|
|
player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
|
|
|
|
|
|
|
|
|
|
|
|
player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
|
|
|
|
player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
|
|
|
|
require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
|
|
|
|
require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
|
|
|
@ -4347,6 +4353,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
|
|
|
check_get_keys='contents', fatal=False, ytcfg=ytcfg,
|
|
|
|
check_get_keys='contents', fatal=False, ytcfg=ytcfg,
|
|
|
|
note='Downloading API JSON with unavailable videos')
|
|
|
|
note='Downloading API JSON with unavailable videos')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
|
|
|
def skip_webpage(self):
|
|
|
|
|
|
|
|
return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
|
|
|
|
|
|
|
|
|
|
|
|
def _extract_webpage(self, url, item_id, fatal=True):
|
|
|
|
def _extract_webpage(self, url, item_id, fatal=True):
|
|
|
|
retries = self.get_param('extractor_retries', 3)
|
|
|
|
retries = self.get_param('extractor_retries', 3)
|
|
|
|
count = -1
|
|
|
|
count = -1
|
|
|
@ -4393,9 +4403,21 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
|
|
|
|
|
|
|
|
|
|
|
return webpage, data
|
|
|
|
return webpage, data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _report_playlist_authcheck(self, ytcfg, fatal=True):
|
|
|
|
|
|
|
|
"""Use if failed to extract ytcfg (and data) from initial webpage"""
|
|
|
|
|
|
|
|
if not ytcfg and self.is_authenticated:
|
|
|
|
|
|
|
|
msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
|
|
|
|
|
|
|
|
if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
|
|
|
|
|
|
|
|
raise ExtractorError(
|
|
|
|
|
|
|
|
f'{msg}. If you are not downloading private content, or '
|
|
|
|
|
|
|
|
'your cookies are only for the first account and channel,'
|
|
|
|
|
|
|
|
' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
|
|
|
|
|
|
|
|
expected=True)
|
|
|
|
|
|
|
|
self.report_warning(msg, only_once=True)
|
|
|
|
|
|
|
|
|
|
|
|
def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
|
|
|
|
def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
|
|
|
|
data = None
|
|
|
|
data = None
|
|
|
|
if 'webpage' not in self._configuration_arg('skip'):
|
|
|
|
if not self.skip_webpage:
|
|
|
|
webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
|
|
|
|
webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
|
|
|
|
ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
|
|
|
|
ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
|
|
|
|
# Reject webpage data if redirected to home page without explicitly requesting
|
|
|
|
# Reject webpage data if redirected to home page without explicitly requesting
|
|
|
@ -4409,14 +4431,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
|
|
|
raise ExtractorError(msg, expected=True)
|
|
|
|
raise ExtractorError(msg, expected=True)
|
|
|
|
self.report_warning(msg, only_once=True)
|
|
|
|
self.report_warning(msg, only_once=True)
|
|
|
|
if not data:
|
|
|
|
if not data:
|
|
|
|
if not ytcfg and self.is_authenticated:
|
|
|
|
self._report_playlist_authcheck(ytcfg, fatal=fatal)
|
|
|
|
msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
|
|
|
|
|
|
|
|
if 'authcheck' not in self._configuration_arg('skip') and fatal:
|
|
|
|
|
|
|
|
raise ExtractorError(
|
|
|
|
|
|
|
|
msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
|
|
|
|
|
|
|
|
' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
|
|
|
|
|
|
|
|
expected=True)
|
|
|
|
|
|
|
|
self.report_warning(msg, only_once=True)
|
|
|
|
|
|
|
|
data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
|
|
|
|
data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
|
|
|
|
return data, ytcfg
|
|
|
|
return data, ytcfg
|
|
|
|
|
|
|
|
|
|
|
@ -4454,14 +4469,20 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
|
|
|
('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
|
|
|
|
('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
|
|
|
|
('continuationContents', ),
|
|
|
|
('continuationContents', ),
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
display_id = f'query "{query}"'
|
|
|
|
check_get_keys = tuple(set(keys[0] for keys in content_keys))
|
|
|
|
check_get_keys = tuple(set(keys[0] for keys in content_keys))
|
|
|
|
|
|
|
|
ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
|
|
|
|
|
|
|
|
self._report_playlist_authcheck(ytcfg, fatal=False)
|
|
|
|
|
|
|
|
|
|
|
|
continuation_list = [None]
|
|
|
|
continuation_list = [None]
|
|
|
|
|
|
|
|
search = None
|
|
|
|
for page_num in itertools.count(1):
|
|
|
|
for page_num in itertools.count(1):
|
|
|
|
data.update(continuation_list[0] or {})
|
|
|
|
data.update(continuation_list[0] or {})
|
|
|
|
|
|
|
|
headers = self.generate_api_headers(
|
|
|
|
|
|
|
|
ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
|
|
|
|
search = self._extract_response(
|
|
|
|
search = self._extract_response(
|
|
|
|
item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
|
|
|
|
item_id=f'{display_id} page {page_num}', ep='search', query=data,
|
|
|
|
default_client=default_client, check_get_keys=check_get_keys)
|
|
|
|
default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
|
|
|
|
slr_contents = traverse_obj(search, *content_keys)
|
|
|
|
slr_contents = traverse_obj(search, *content_keys)
|
|
|
|
yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
|
|
|
|
yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
|
|
|
|
if not continuation_list[0]:
|
|
|
|
if not continuation_list[0]:
|
|
|
@ -5634,7 +5655,9 @@ class YoutubeFeedsInfoExtractor(InfoExtractor):
|
|
|
|
Subclasses must define the _FEED_NAME property.
|
|
|
|
Subclasses must define the _FEED_NAME property.
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
_LOGIN_REQUIRED = True
|
|
|
|
_LOGIN_REQUIRED = True
|
|
|
|
_TESTS = []
|
|
|
|
|
|
|
|
|
|
|
|
def _real_initialize(self):
|
|
|
|
|
|
|
|
YoutubeBaseInfoExtractor._check_login_required(self)
|
|
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
@property
|
|
|
|
def IE_NAME(self):
|
|
|
|
def IE_NAME(self):
|
|
|
|