|
|
@ -53,6 +53,10 @@ from ..utils import (
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_qs(url):
|
|
|
|
|
|
|
|
return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class YoutubeBaseInfoExtractor(InfoExtractor):
|
|
|
|
class YoutubeBaseInfoExtractor(InfoExtractor):
|
|
|
|
"""Provide base functions for Youtube extractors"""
|
|
|
|
"""Provide base functions for Youtube extractors"""
|
|
|
|
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
|
|
|
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
|
|
@ -438,14 +442,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
r'(?:(?:www|dev)\.)?invidio\.us',
|
|
|
|
r'(?:(?:www|dev)\.)?invidio\.us',
|
|
|
|
# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
|
|
|
|
# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
|
|
|
|
r'(?:www\.)?invidious\.pussthecat\.org',
|
|
|
|
r'(?:www\.)?invidious\.pussthecat\.org',
|
|
|
|
r'(?:www\.)?invidious\.048596\.xyz',
|
|
|
|
|
|
|
|
r'(?:www\.)?invidious\.zee\.li',
|
|
|
|
r'(?:www\.)?invidious\.zee\.li',
|
|
|
|
r'(?:www\.)?vid\.puffyan\.us',
|
|
|
|
|
|
|
|
r'(?:(?:www|au)\.)?ytprivate\.com',
|
|
|
|
r'(?:(?:www|au)\.)?ytprivate\.com',
|
|
|
|
r'(?:www\.)?invidious\.namazso\.eu',
|
|
|
|
r'(?:www\.)?invidious\.namazso\.eu',
|
|
|
|
r'(?:www\.)?invidious\.ethibox\.fr',
|
|
|
|
r'(?:www\.)?invidious\.ethibox\.fr',
|
|
|
|
r'(?:www\.)?inv\.skyn3t\.in',
|
|
|
|
|
|
|
|
r'(?:www\.)?invidious\.himiko\.cloud',
|
|
|
|
|
|
|
|
r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
|
|
|
|
r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
|
|
|
|
r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
|
|
|
|
r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
|
|
|
|
r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
|
|
|
|
r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
|
|
|
@ -454,25 +454,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
r'(?:(?:www|no)\.)?invidiou\.sh',
|
|
|
|
r'(?:(?:www|no)\.)?invidiou\.sh',
|
|
|
|
r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
|
|
|
|
r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
|
|
|
|
r'(?:www\.)?invidious\.kabi\.tk',
|
|
|
|
r'(?:www\.)?invidious\.kabi\.tk',
|
|
|
|
r'(?:www\.)?invidious\.13ad\.de',
|
|
|
|
|
|
|
|
r'(?:www\.)?invidious\.mastodon\.host',
|
|
|
|
r'(?:www\.)?invidious\.mastodon\.host',
|
|
|
|
r'(?:www\.)?invidious\.zapashcanon\.fr',
|
|
|
|
r'(?:www\.)?invidious\.zapashcanon\.fr',
|
|
|
|
r'(?:www\.)?invidious\.kavin\.rocks',
|
|
|
|
r'(?:www\.)?invidious\.kavin\.rocks',
|
|
|
|
|
|
|
|
r'(?:www\.)?invidious\.tinfoil-hat\.net',
|
|
|
|
|
|
|
|
r'(?:www\.)?invidious\.himiko\.cloud',
|
|
|
|
|
|
|
|
r'(?:www\.)?invidious\.reallyancient\.tech',
|
|
|
|
r'(?:www\.)?invidious\.tube',
|
|
|
|
r'(?:www\.)?invidious\.tube',
|
|
|
|
r'(?:www\.)?invidiou\.site',
|
|
|
|
r'(?:www\.)?invidiou\.site',
|
|
|
|
r'(?:www\.)?invidious\.site',
|
|
|
|
r'(?:www\.)?invidious\.site',
|
|
|
|
r'(?:www\.)?invidious\.xyz',
|
|
|
|
r'(?:www\.)?invidious\.xyz',
|
|
|
|
r'(?:www\.)?invidious\.nixnet\.xyz',
|
|
|
|
r'(?:www\.)?invidious\.nixnet\.xyz',
|
|
|
|
|
|
|
|
r'(?:www\.)?invidious\.048596\.xyz',
|
|
|
|
r'(?:www\.)?invidious\.drycat\.fr',
|
|
|
|
r'(?:www\.)?invidious\.drycat\.fr',
|
|
|
|
|
|
|
|
r'(?:www\.)?inv\.skyn3t\.in',
|
|
|
|
r'(?:www\.)?tube\.poal\.co',
|
|
|
|
r'(?:www\.)?tube\.poal\.co',
|
|
|
|
r'(?:www\.)?tube\.connect\.cafe',
|
|
|
|
r'(?:www\.)?tube\.connect\.cafe',
|
|
|
|
r'(?:www\.)?vid\.wxzm\.sx',
|
|
|
|
r'(?:www\.)?vid\.wxzm\.sx',
|
|
|
|
r'(?:www\.)?vid\.mint\.lgbt',
|
|
|
|
r'(?:www\.)?vid\.mint\.lgbt',
|
|
|
|
|
|
|
|
r'(?:www\.)?vid\.puffyan\.us',
|
|
|
|
r'(?:www\.)?yewtu\.be',
|
|
|
|
r'(?:www\.)?yewtu\.be',
|
|
|
|
r'(?:www\.)?yt\.elukerio\.org',
|
|
|
|
r'(?:www\.)?yt\.elukerio\.org',
|
|
|
|
r'(?:www\.)?yt\.lelux\.fi',
|
|
|
|
r'(?:www\.)?yt\.lelux\.fi',
|
|
|
|
r'(?:www\.)?invidious\.ggc-project\.de',
|
|
|
|
r'(?:www\.)?invidious\.ggc-project\.de',
|
|
|
|
r'(?:www\.)?yt\.maisputain\.ovh',
|
|
|
|
r'(?:www\.)?yt\.maisputain\.ovh',
|
|
|
|
|
|
|
|
r'(?:www\.)?ytprivate\.com',
|
|
|
|
|
|
|
|
r'(?:www\.)?invidious\.13ad\.de',
|
|
|
|
r'(?:www\.)?invidious\.toot\.koeln',
|
|
|
|
r'(?:www\.)?invidious\.toot\.koeln',
|
|
|
|
r'(?:www\.)?invidious\.fdn\.fr',
|
|
|
|
r'(?:www\.)?invidious\.fdn\.fr',
|
|
|
|
r'(?:www\.)?watch\.nettohikari\.com',
|
|
|
|
r'(?:www\.)?watch\.nettohikari\.com',
|
|
|
@ -516,15 +523,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)? # all until now is optional -> you can pass the naked ID
|
|
|
|
)? # all until now is optional -> you can pass the naked ID
|
|
|
|
(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
|
|
|
|
(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
|
|
|
|
(?!.*?\blist=
|
|
|
|
|
|
|
|
(?:
|
|
|
|
|
|
|
|
%(playlist_id)s| # combined list/video URLs are handled by the playlist IE
|
|
|
|
|
|
|
|
WL # WL are handled by the watch later IE
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
(?(1).+)? # if we found the ID, everything can follow
|
|
|
|
(?(1).+)? # if we found the ID, everything can follow
|
|
|
|
$""" % {
|
|
|
|
$""" % {
|
|
|
|
'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
|
|
|
|
|
|
|
|
'invidious': '|'.join(_INVIDIOUS_SITES),
|
|
|
|
'invidious': '|'.join(_INVIDIOUS_SITES),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_PLAYER_INFO_RE = (
|
|
|
|
_PLAYER_INFO_RE = (
|
|
|
@ -1009,6 +1009,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
},
|
|
|
|
},
|
|
|
|
'skip': 'This video does not exist.',
|
|
|
|
'skip': 'This video does not exist.',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
# Video with incomplete 'yt:stretch=16:'
|
|
|
|
|
|
|
|
'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
|
|
|
|
|
|
|
|
'only_matching': True,
|
|
|
|
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
# Video licensed under Creative Commons
|
|
|
|
# Video licensed under Creative Commons
|
|
|
|
'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
|
|
|
|
'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
|
|
|
@ -1304,6 +1309,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
},
|
|
|
|
},
|
|
|
|
]
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
|
|
def suitable(cls, url):
|
|
|
|
|
|
|
|
qs = parse_qs(url)
|
|
|
|
|
|
|
|
if qs.get('list', [None])[0]:
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
return super(YoutubeIE, cls).suitable(url)
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
super(YoutubeIE, self).__init__(*args, **kwargs)
|
|
|
|
super(YoutubeIE, self).__init__(*args, **kwargs)
|
|
|
|
self._code_cache = {}
|
|
|
|
self._code_cache = {}
|
|
|
@ -2079,15 +2091,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
|
|
|
|
for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
|
|
|
|
for keyword in keywords:
|
|
|
|
for keyword in keywords:
|
|
|
|
if keyword.startswith('yt:stretch='):
|
|
|
|
if keyword.startswith('yt:stretch='):
|
|
|
|
stretch_ratio = map(
|
|
|
|
mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
|
|
|
|
lambda x: int_or_none(x, default=0),
|
|
|
|
if mobj:
|
|
|
|
keyword.split('=')[1].split(':'))
|
|
|
|
# NB: float is intentional for forcing float division
|
|
|
|
w, h = (list(stretch_ratio) + [0])[:2]
|
|
|
|
w, h = (float(v) for v in mobj.groups())
|
|
|
|
if w > 0 and h > 0:
|
|
|
|
if w > 0 and h > 0:
|
|
|
|
ratio = w / h
|
|
|
|
ratio = w / h
|
|
|
|
for f in formats:
|
|
|
|
for f in formats:
|
|
|
|
if f.get('vcodec') != 'none':
|
|
|
|
if f.get('vcodec') != 'none':
|
|
|
|
f['stretched_ratio'] = ratio
|
|
|
|
f['stretched_ratio'] = ratio
|
|
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
thumbnails = []
|
|
|
|
thumbnails = []
|
|
|
|
for container in (video_details, microformat):
|
|
|
|
for container in (video_details, microformat):
|
|
|
@ -2484,6 +2497,15 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
|
|
'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
|
|
|
'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
|
|
|
'uploader': 'Игорь Клейнер',
|
|
|
|
'uploader': 'Игорь Клейнер',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
|
|
|
|
}, {
|
|
|
|
|
|
|
|
# playlists, series
|
|
|
|
|
|
|
|
'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
|
|
|
|
|
|
|
|
'playlist_mincount': 5,
|
|
|
|
|
|
|
|
'info_dict': {
|
|
|
|
|
|
|
|
'id': 'UCYO_jab_esuFRV4b17AJtAw',
|
|
|
|
|
|
|
|
'title': '3Blue1Brown - Playlists',
|
|
|
|
|
|
|
|
'description': 'md5:e1384e8a133307dd10edee76e875d62f',
|
|
|
|
|
|
|
|
},
|
|
|
|
}, {
|
|
|
|
}, {
|
|
|
|
# playlists, singlepage
|
|
|
|
# playlists, singlepage
|
|
|
|
'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
|
|
|
|
'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
|
|
|
@ -2790,6 +2812,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
|
|
'title': '#cctv9',
|
|
|
|
'title': '#cctv9',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
'playlist_mincount': 350,
|
|
|
|
'playlist_mincount': 350,
|
|
|
|
|
|
|
|
}, {
|
|
|
|
|
|
|
|
'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
|
|
|
|
|
|
|
|
'only_matching': True,
|
|
|
|
}]
|
|
|
|
}]
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
@classmethod
|
|
|
@ -2813,13 +2838,15 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
|
|
@staticmethod
|
|
|
|
@staticmethod
|
|
|
|
def _extract_basic_item_renderer(item):
|
|
|
|
def _extract_basic_item_renderer(item):
|
|
|
|
# Modified from _extract_grid_item_renderer
|
|
|
|
# Modified from _extract_grid_item_renderer
|
|
|
|
known_renderers = (
|
|
|
|
known_basic_renderers = (
|
|
|
|
'playlistRenderer', 'videoRenderer', 'channelRenderer',
|
|
|
|
'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
|
|
|
|
'gridPlaylistRenderer', 'gridVideoRenderer', 'gridChannelRenderer'
|
|
|
|
|
|
|
|
)
|
|
|
|
)
|
|
|
|
for key, renderer in item.items():
|
|
|
|
for key, renderer in item.items():
|
|
|
|
if key not in known_renderers:
|
|
|
|
if not isinstance(renderer, dict):
|
|
|
|
continue
|
|
|
|
continue
|
|
|
|
|
|
|
|
elif key in known_basic_renderers:
|
|
|
|
|
|
|
|
return renderer
|
|
|
|
|
|
|
|
elif key.startswith('grid') and key.endswith('Renderer'):
|
|
|
|
return renderer
|
|
|
|
return renderer
|
|
|
|
|
|
|
|
|
|
|
|
def _grid_entries(self, grid_renderer):
|
|
|
|
def _grid_entries(self, grid_renderer):
|
|
|
@ -2830,7 +2857,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
|
|
if not isinstance(renderer, dict):
|
|
|
|
if not isinstance(renderer, dict):
|
|
|
|
continue
|
|
|
|
continue
|
|
|
|
title = try_get(
|
|
|
|
title = try_get(
|
|
|
|
renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
|
|
|
|
renderer, (lambda x: x['title']['runs'][0]['text'],
|
|
|
|
|
|
|
|
lambda x: x['title']['simpleText']), compat_str)
|
|
|
|
# playlist
|
|
|
|
# playlist
|
|
|
|
playlist_id = renderer.get('playlistId')
|
|
|
|
playlist_id = renderer.get('playlistId')
|
|
|
|
if playlist_id:
|
|
|
|
if playlist_id:
|
|
|
@ -2838,10 +2866,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
|
|
'https://www.youtube.com/playlist?list=%s' % playlist_id,
|
|
|
|
'https://www.youtube.com/playlist?list=%s' % playlist_id,
|
|
|
|
ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
|
|
|
|
ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
|
|
|
|
video_title=title)
|
|
|
|
video_title=title)
|
|
|
|
|
|
|
|
continue
|
|
|
|
# video
|
|
|
|
# video
|
|
|
|
video_id = renderer.get('videoId')
|
|
|
|
video_id = renderer.get('videoId')
|
|
|
|
if video_id:
|
|
|
|
if video_id:
|
|
|
|
yield self._extract_video(renderer)
|
|
|
|
yield self._extract_video(renderer)
|
|
|
|
|
|
|
|
continue
|
|
|
|
# channel
|
|
|
|
# channel
|
|
|
|
channel_id = renderer.get('channelId')
|
|
|
|
channel_id = renderer.get('channelId')
|
|
|
|
if channel_id:
|
|
|
|
if channel_id:
|
|
|
@ -2850,6 +2880,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
|
|
yield self.url_result(
|
|
|
|
yield self.url_result(
|
|
|
|
'https://www.youtube.com/channel/%s' % channel_id,
|
|
|
|
'https://www.youtube.com/channel/%s' % channel_id,
|
|
|
|
ie=YoutubeTabIE.ie_key(), video_title=title)
|
|
|
|
ie=YoutubeTabIE.ie_key(), video_title=title)
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
# generic endpoint URL support
|
|
|
|
|
|
|
|
ep_url = urljoin('https://www.youtube.com/', try_get(
|
|
|
|
|
|
|
|
renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
|
|
|
|
|
|
|
|
compat_str))
|
|
|
|
|
|
|
|
if ep_url:
|
|
|
|
|
|
|
|
for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
|
|
|
|
|
|
|
|
if ie.suitable(ep_url):
|
|
|
|
|
|
|
|
yield self.url_result(
|
|
|
|
|
|
|
|
ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
|
|
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
def _shelf_entries_from_content(self, shelf_renderer):
|
|
|
|
def _shelf_entries_from_content(self, shelf_renderer):
|
|
|
|
content = shelf_renderer.get('content')
|
|
|
|
content = shelf_renderer.get('content')
|
|
|
@ -3444,7 +3485,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
|
|
url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
|
|
|
|
url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
|
|
|
|
|
|
|
|
|
|
|
|
# Handle both video/playlist URLs
|
|
|
|
# Handle both video/playlist URLs
|
|
|
|
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
|
|
|
qs = parse_qs(url)
|
|
|
|
video_id = qs.get('v', [None])[0]
|
|
|
|
video_id = qs.get('v', [None])[0]
|
|
|
|
playlist_id = qs.get('list', [None])[0]
|
|
|
|
playlist_id = qs.get('list', [None])[0]
|
|
|
|
|
|
|
|
|
|
|
@ -3550,12 +3591,16 @@ class YoutubePlaylistIE(InfoExtractor):
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
@classmethod
|
|
|
|
def suitable(cls, url):
|
|
|
|
def suitable(cls, url):
|
|
|
|
return False if YoutubeTabIE.suitable(url) else super(
|
|
|
|
if YoutubeTabIE.suitable(url):
|
|
|
|
YoutubePlaylistIE, cls).suitable(url)
|
|
|
|
return False
|
|
|
|
|
|
|
|
qs = parse_qs(url)
|
|
|
|
|
|
|
|
if qs.get('v', [None])[0]:
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
return super(YoutubePlaylistIE, cls).suitable(url)
|
|
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
def _real_extract(self, url):
|
|
|
|
playlist_id = self._match_id(url)
|
|
|
|
playlist_id = self._match_id(url)
|
|
|
|
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
|
|
|
qs = parse_qs(url)
|
|
|
|
if not qs:
|
|
|
|
if not qs:
|
|
|
|
qs = {'list': playlist_id}
|
|
|
|
qs = {'list': playlist_id}
|
|
|
|
return self.url_result(
|
|
|
|
return self.url_result(
|
|
|
|