[ie/vimeo:album] Support embed-only albums

Authored by: bashonly
pull/14021/head
bashonly 3 days ago
parent aea85d525e
commit 472dcdd6e1
No known key found for this signature in database
GPG Key ID: 783F096F253D15B0

@ -117,13 +117,13 @@ class VimeoBaseInfoExtractor(InfoExtractor):
def _jwt_is_expired(self, token): def _jwt_is_expired(self, token):
return jwt_decode_hs256(token)['exp'] - time.time() < 120 return jwt_decode_hs256(token)['exp'] - time.time() < 120
def _fetch_viewer_info(self, display_id=None, fatal=True): def _fetch_viewer_info(self, display_id=None):
if self._viewer_info and not self._jwt_is_expired(self._viewer_info['jwt']): if self._viewer_info and not self._jwt_is_expired(self._viewer_info['jwt']):
return self._viewer_info return self._viewer_info
self._viewer_info = self._download_json( self._viewer_info = self._download_json(
'https://vimeo.com/_next/viewer', display_id, 'Downloading web token info', 'https://vimeo.com/_next/viewer', display_id, 'Downloading web token info',
'Failed to download web token info', fatal=fatal, headers={'Accept': 'application/json'}) 'Failed to download web token info', headers={'Accept': 'application/json'})
return self._viewer_info return self._viewer_info
@ -502,6 +502,43 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'quality': 1, 'quality': 1,
} }
@staticmethod
def _get_embed_params(is_embed, referer):
return {
'is_embed': 'true' if is_embed else 'false',
'referrer': urllib.parse.urlparse(referer).hostname if referer and is_embed else '',
}
def _get_album_data_and_hashed_pass(self, album_id, is_embed, referer):
viewer = self._fetch_viewer_info(album_id)
jwt = viewer['jwt']
album = self._download_json(
'https://api.vimeo.com/albums/' + album_id,
album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
query={**self._get_embed_params(is_embed, referer), 'fields': 'description,name,privacy'})
hashed_pass = None
if traverse_obj(album, ('privacy', 'view')) == 'password':
password = self.get_param('videopassword')
if not password:
raise ExtractorError(
'This album is protected by a password, use the --video-password option',
expected=True)
try:
hashed_pass = self._download_json(
f'https://vimeo.com/showcase/{album_id}/auth',
album_id, 'Verifying the password', data=urlencode_postdata({
'password': password,
'token': viewer['xsrft'],
}), headers={
'X-Requested-With': 'XMLHttpRequest',
})['hashed_pass']
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
raise ExtractorError('Wrong password', expected=True)
raise
return album, hashed_pass
class VimeoIE(VimeoBaseInfoExtractor): class VimeoIE(VimeoBaseInfoExtractor):
"""Information extractor for vimeo.com.""" """Information extractor for vimeo.com."""
@ -1188,41 +1225,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total'])) info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total']))
return info return info
def _try_album_password(self, url): def _try_album_password(self, url, is_embed, referer):
album_id = self._search_regex( album_id = self._search_regex(
r'vimeo\.com/(?:album|showcase)/([^/]+)', url, 'album id', default=None) r'vimeo\.com/(?:album|showcase)/([^/]+)', url, 'album id', default=None)
if not album_id: if not album_id:
return return
viewer = self._fetch_viewer_info(album_id, fatal=False) self._get_album_data_and_hashed_pass(album_id, is_embed, referer)
if not viewer:
webpage = self._download_webpage(url, album_id)
viewer = self._parse_json(self._search_regex(
r'bootstrap_data\s*=\s*({.+?})</script>',
webpage, 'bootstrap data'), album_id)['viewer']
jwt = viewer['jwt']
album = self._download_json(
'https://api.vimeo.com/albums/' + album_id,
album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
query={'fields': 'description,name,privacy'})
if try_get(album, lambda x: x['privacy']['view']) == 'password':
password = self.get_param('videopassword')
if not password:
raise ExtractorError(
'This album is protected by a password, use the --video-password option',
expected=True)
try:
self._download_json(
f'https://vimeo.com/showcase/{album_id}/auth',
album_id, 'Verifying the password', data=urlencode_postdata({
'password': password,
'token': viewer['xsrft'],
}), headers={
'X-Requested-With': 'XMLHttpRequest',
})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
raise ExtractorError('Wrong password', expected=True)
raise
def _real_extract(self, url): def _real_extract(self, url):
url, data, headers = self._unsmuggle_headers(url) url, data, headers = self._unsmuggle_headers(url)
@ -1238,7 +1246,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
if any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): if any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
url = 'https://vimeo.com/' + video_id url = 'https://vimeo.com/' + video_id
self._try_album_password(url) is_player_url = urllib.parse.urlparse(url).hostname == 'player.vimeo.com'
self._try_album_password(url, is_player_url, headers['Referer'])
is_secure = urllib.parse.urlparse(url).scheme == 'https' is_secure = urllib.parse.urlparse(url).scheme == 'https'
try: try:
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
@ -1265,7 +1274,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
f'{self._downloader._format_err("compromising your security/cookies", "light red")}, ' f'{self._downloader._format_err("compromising your security/cookies", "light red")}, '
f'try replacing "https:" with "http:" in the input URL. {dcip_msg}.', expected=True) f'try replacing "https:" with "http:" in the input URL. {dcip_msg}.', expected=True)
if '://player.vimeo.com/video/' in url: if is_player_url:
config = self._search_json( config = self._search_json(
r'\b(?:playerC|c)onfig\s*=', webpage, 'info section', video_id) r'\b(?:playerC|c)onfig\s*=', webpage, 'info section', video_id)
if config.get('view') == 4: if config.get('view') == 4:
@ -1531,7 +1540,7 @@ class VimeoUserIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
class VimeoAlbumIE(VimeoBaseInfoExtractor): class VimeoAlbumIE(VimeoBaseInfoExtractor):
IE_NAME = 'vimeo:album' IE_NAME = 'vimeo:album'
_VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))' _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|(?P<is_embed>/embed))'
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>' _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
_TESTS = [{ _TESTS = [{
'url': 'https://vimeo.com/album/2632481', 'url': 'https://vimeo.com/album/2632481',
@ -1549,12 +1558,54 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
}, },
'playlist_count': 1, 'playlist_count': 1,
'params': {'videopassword': 'youtube-dl'}, 'params': {'videopassword': 'youtube-dl'},
}, {
'note': 'embedded album that requires "referrer" in query (smuggled)',
'url': 'https://vimeo.com/showcase/10677689/embed#__youtubedl_smuggle=%7B%22referer%22%3A+%22https%3A%2F%2Fwww.riccardomutimusic.com%2F%22%7D',
'info_dict': {
'title': 'La Traviata - la serie completa',
'id': '10677689',
},
'playlist': [{
'url': 'https://player.vimeo.com/video/505682113#__youtubedl_smuggle=%7B%22referer%22%3A+%22https%3A%2F%2Fwww.riccardomutimusic.com%2F%22%7D',
'info_dict': {
'id': '505682113',
'ext': 'mp4',
'title': 'La Traviata - Episodio 7',
'uploader': 'RMMusic',
'uploader_id': 'user62556494',
'uploader_url': 'https://vimeo.com/user62556494',
'duration': 3202,
'thumbnail': r're:https?://i\.vimeocdn\.com/video/.+',
},
}],
'params': {
'playlist_items': '1',
'skip_download': 'm3u8',
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, {
'note': 'embedded album that requires "referrer" in query (passed as param)',
'url': 'https://vimeo.com/showcase/10677689/embed',
'info_dict': {
'title': 'La Traviata - la serie completa',
'id': '10677689',
},
'playlist_mincount': 9,
'params': {'http_headers': {'Referer': 'https://www.riccardomutimusic.com/'}},
}, {
'url': 'https://vimeo.com/showcase/11803104/embed2',
'info_dict': {
'title': 'Romans Video Ministry',
'id': '11803104',
},
'playlist_mincount': 41,
}] }]
_PAGE_SIZE = 100 _PAGE_SIZE = 100
def _fetch_page(self, album_id, authorization, hashed_pass, page): def _fetch_page(self, album_id, hashed_pass, is_embed, referer, page):
api_page = page + 1 api_page = page + 1
query = { query = {
**self._get_embed_params(is_embed, referer),
'fields': 'link,uri', 'fields': 'link,uri',
'page': api_page, 'page': api_page,
'per_page': self._PAGE_SIZE, 'per_page': self._PAGE_SIZE,
@ -1565,7 +1616,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
videos = self._download_json( videos = self._download_json(
f'https://api.vimeo.com/albums/{album_id}/videos', f'https://api.vimeo.com/albums/{album_id}/videos',
album_id, f'Downloading page {api_page}', query=query, headers={ album_id, f'Downloading page {api_page}', query=query, headers={
'Authorization': 'jwt ' + authorization, 'Authorization': 'jwt ' + self._fetch_viewer_info(album_id)['jwt'],
'Accept': 'application/json', 'Accept': 'application/json',
})['data'] })['data']
except ExtractorError as e: except ExtractorError as e:
@ -1577,44 +1628,24 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
if not link: if not link:
continue continue
uri = video.get('uri') uri = video.get('uri')
video_id = self._search_regex(r'/videos/(\d+)', uri, 'video_id', default=None) if uri else None video_id = self._search_regex(r'/videos/(\d+)', uri, 'id', default=None) if uri else None
if is_embed:
if not video_id:
self.report_warning(f'Skipping due to missing video ID: {link}')
continue
link = f'https://player.vimeo.com/video/{video_id}'
if referer:
link = self._smuggle_referrer(link, referer)
yield self.url_result(link, VimeoIE.ie_key(), video_id) yield self.url_result(link, VimeoIE.ie_key(), video_id)
def _real_extract(self, url): def _real_extract(self, url):
album_id = self._match_id(url) url, _, http_headers = self._unsmuggle_headers(url)
viewer = self._fetch_viewer_info(album_id, fatal=False) album_id, is_embed = self._match_valid_url(url).group('id', 'is_embed')
if not viewer: referer = http_headers.get('Referer')
webpage = self._download_webpage(url, album_id) album, hashed_pass = self._get_album_data_and_hashed_pass(album_id, is_embed, referer)
viewer = self._parse_json(self._search_regex(
r'bootstrap_data\s*=\s*({.+?})</script>',
webpage, 'bootstrap data'), album_id)['viewer']
jwt = viewer['jwt']
album = self._download_json(
'https://api.vimeo.com/albums/' + album_id,
album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
query={'fields': 'description,name,privacy'})
hashed_pass = None
if try_get(album, lambda x: x['privacy']['view']) == 'password':
password = self.get_param('videopassword')
if not password:
raise ExtractorError(
'This album is protected by a password, use the --video-password option',
expected=True)
try:
hashed_pass = self._download_json(
f'https://vimeo.com/showcase/{album_id}/auth',
album_id, 'Verifying the password', data=urlencode_postdata({
'password': password,
'token': viewer['xsrft'],
}), headers={
'X-Requested-With': 'XMLHttpRequest',
})['hashed_pass']
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
raise ExtractorError('Wrong password', expected=True)
raise
entries = OnDemandPagedList(functools.partial( entries = OnDemandPagedList(functools.partial(
self._fetch_page, album_id, jwt, hashed_pass), self._PAGE_SIZE) self._fetch_page, album_id, hashed_pass, is_embed, referer), self._PAGE_SIZE)
return self.playlist_result( return self.playlist_result(
entries, album_id, album.get('name'), album.get('description')) entries, album_id, album.get('name'), album.get('description'))

Loading…
Cancel
Save