|
|
@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter
|
|
|
|
from ..swfinterp import SWFInterpreter
|
|
|
|
from ..swfinterp import SWFInterpreter
|
|
|
|
from ..compat import (
|
|
|
|
from ..compat import (
|
|
|
|
compat_chr,
|
|
|
|
compat_chr,
|
|
|
|
|
|
|
|
compat_HTTPError,
|
|
|
|
compat_kwargs,
|
|
|
|
compat_kwargs,
|
|
|
|
compat_parse_qs,
|
|
|
|
compat_parse_qs,
|
|
|
|
compat_urllib_parse_unquote,
|
|
|
|
compat_urllib_parse_unquote,
|
|
|
@ -64,7 +65,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|
|
|
_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
|
|
|
|
_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
|
|
|
|
|
|
|
|
|
|
|
|
_RESERVED_NAMES = (
|
|
|
|
_RESERVED_NAMES = (
|
|
|
|
r'embed|e|channel|c|user|playlist|watch|w|v|results|shared|'
|
|
|
|
r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|'
|
|
|
|
r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
|
|
|
|
r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
|
|
|
|
r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
|
|
|
|
r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
|
|
|
|
|
|
|
|
|
|
|
@ -303,6 +304,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|
|
|
|
|
|
|
|
|
|
|
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
|
|
|
|
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
|
|
|
|
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
|
|
|
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
|
|
|
|
|
|
|
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
|
|
|
|
|
|
|
|
|
|
|
|
def _call_api(self, ep, query, video_id):
|
|
|
|
def _call_api(self, ep, query, video_id):
|
|
|
|
data = self._DEFAULT_API_DATA.copy()
|
|
|
|
data = self._DEFAULT_API_DATA.copy()
|
|
|
@ -320,7 +322,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|
|
|
def _extract_yt_initial_data(self, video_id, webpage):
|
|
|
|
def _extract_yt_initial_data(self, video_id, webpage):
|
|
|
|
return self._parse_json(
|
|
|
|
return self._parse_json(
|
|
|
|
self._search_regex(
|
|
|
|
self._search_regex(
|
|
|
|
(r'%s\s*\n' % self._YT_INITIAL_DATA_RE,
|
|
|
|
(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
|
|
|
|
self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
|
|
|
|
self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
|
|
|
|
video_id)
|
|
|
|
video_id)
|
|
|
|
|
|
|
|
|
|
|
@ -345,7 +347,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
|
|
|
|
# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
|
|
|
|
(?:(?:www|dev)\.)?invidio\.us/|
|
|
|
|
(?:(?:www|dev)\.)?invidio\.us/|
|
|
|
|
(?:(?:www|no)\.)?invidiou\.sh/|
|
|
|
|
(?:(?:www|no)\.)?invidiou\.sh/|
|
|
|
|
(?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
|
|
|
|
(?:(?:www|fi)\.)?invidious\.snopyta\.org/|
|
|
|
|
(?:www\.)?invidious\.kabi\.tk/|
|
|
|
|
(?:www\.)?invidious\.kabi\.tk/|
|
|
|
|
(?:www\.)?invidious\.13ad\.de/|
|
|
|
|
(?:www\.)?invidious\.13ad\.de/|
|
|
|
|
(?:www\.)?invidious\.mastodon\.host/|
|
|
|
|
(?:www\.)?invidious\.mastodon\.host/|
|
|
|
@ -1120,6 +1122,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
'skip_download': True,
|
|
|
|
'skip_download': True,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
# another example of '};' in ytInitialData
|
|
|
|
|
|
|
|
'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
|
|
|
|
|
|
|
|
'only_matching': True,
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
|
|
|
|
|
|
|
|
'only_matching': True,
|
|
|
|
|
|
|
|
},
|
|
|
|
]
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
@ -1779,7 +1790,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
if not video_info and not player_response:
|
|
|
|
if not video_info and not player_response:
|
|
|
|
player_response = extract_player_response(
|
|
|
|
player_response = extract_player_response(
|
|
|
|
self._search_regex(
|
|
|
|
self._search_regex(
|
|
|
|
(r'%s\s*(?:var\s+meta|</script|\n)' % self._YT_INITIAL_PLAYER_RESPONSE_RE,
|
|
|
|
(r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
|
|
|
|
self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
|
|
|
|
self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
|
|
|
|
'initial player response', default='{}'),
|
|
|
|
'initial player response', default='{}'),
|
|
|
|
video_id)
|
|
|
|
video_id)
|
|
|
@ -2830,6 +2841,11 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
|
|
'only_matching': True,
|
|
|
|
'only_matching': True,
|
|
|
|
}]
|
|
|
|
}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
|
|
def suitable(cls, url):
|
|
|
|
|
|
|
|
return False if YoutubeIE.suitable(url) else super(
|
|
|
|
|
|
|
|
YoutubeTabIE, cls).suitable(url)
|
|
|
|
|
|
|
|
|
|
|
|
def _extract_channel_id(self, webpage):
|
|
|
|
def _extract_channel_id(self, webpage):
|
|
|
|
channel_id = self._html_search_meta(
|
|
|
|
channel_id = self._html_search_meta(
|
|
|
|
'channelId', webpage, 'channel id', default=None)
|
|
|
|
'channelId', webpage, 'channel id', default=None)
|
|
|
@ -3143,10 +3159,24 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
|
|
for page_num in itertools.count(1):
|
|
|
|
for page_num in itertools.count(1):
|
|
|
|
if not continuation:
|
|
|
|
if not continuation:
|
|
|
|
break
|
|
|
|
break
|
|
|
|
browse = self._download_json(
|
|
|
|
count = 0
|
|
|
|
'https://www.youtube.com/browse_ajax', None,
|
|
|
|
retries = 3
|
|
|
|
'Downloading page %d' % page_num,
|
|
|
|
while count <= retries:
|
|
|
|
headers=headers, query=continuation, fatal=False)
|
|
|
|
try:
|
|
|
|
|
|
|
|
# Downloading page may result in intermittent 5xx HTTP error
|
|
|
|
|
|
|
|
# that is usually worked around with a retry
|
|
|
|
|
|
|
|
browse = self._download_json(
|
|
|
|
|
|
|
|
'https://www.youtube.com/browse_ajax', None,
|
|
|
|
|
|
|
|
'Downloading page %d%s'
|
|
|
|
|
|
|
|
% (page_num, ' (retry #%d)' % count if count else ''),
|
|
|
|
|
|
|
|
headers=headers, query=continuation)
|
|
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
except ExtractorError as e:
|
|
|
|
|
|
|
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
|
|
|
|
|
|
|
|
count += 1
|
|
|
|
|
|
|
|
if count <= retries:
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
raise
|
|
|
|
if not browse:
|
|
|
|
if not browse:
|
|
|
|
break
|
|
|
|
break
|
|
|
|
response = try_get(browse, lambda x: x[1]['response'], dict)
|
|
|
|
response = try_get(browse, lambda x: x[1]['response'], dict)
|
|
|
|