diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index b22bea5204..a438d148ab 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -28,7 +28,6 @@ from ..utils import ( qualities, smuggle_url, str_or_none, - traverse_obj, try_call, try_get, unified_timestamp, @@ -39,6 +38,7 @@ from ..utils import ( urlhandle_detect_ext, urljoin, ) +from ..utils.traversal import require, traverse_obj class VimeoBaseInfoExtractor(InfoExtractor): @@ -117,13 +117,13 @@ class VimeoBaseInfoExtractor(InfoExtractor): def _jwt_is_expired(self, token): return jwt_decode_hs256(token)['exp'] - time.time() < 120 - def _fetch_viewer_info(self, display_id=None, fatal=True): + def _fetch_viewer_info(self, display_id=None): if self._viewer_info and not self._jwt_is_expired(self._viewer_info['jwt']): return self._viewer_info self._viewer_info = self._download_json( 'https://vimeo.com/_next/viewer', display_id, 'Downloading web token info', - 'Failed to download web token info', fatal=fatal, headers={'Accept': 'application/json'}) + 'Failed to download web token info', headers={'Accept': 'application/json'}) return self._viewer_info @@ -502,6 +502,43 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'quality': 1, } + @staticmethod + def _get_embed_params(is_embed, referer): + return { + 'is_embed': 'true' if is_embed else 'false', + 'referrer': urllib.parse.urlparse(referer).hostname if referer and is_embed else '', + } + + def _get_album_data_and_hashed_pass(self, album_id, is_embed, referer): + viewer = self._fetch_viewer_info(album_id) + jwt = viewer['jwt'] + album = self._download_json( + 'https://api.vimeo.com/albums/' + album_id, + album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'}, + query={**self._get_embed_params(is_embed, referer), 'fields': 'description,name,privacy'}) + hashed_pass = None + if traverse_obj(album, ('privacy', 'view')) == 'password': + password = self.get_param('videopassword') + if not password: + raise ExtractorError( + 'This album is protected by a password, use the --video-password option', + expected=True) + try: + hashed_pass = self._download_json( + f'https://vimeo.com/showcase/{album_id}/auth', + album_id, 'Verifying the password', data=urlencode_postdata({ + 'password': password, + 'token': viewer['xsrft'], + }), headers={ + 'X-Requested-With': 'XMLHttpRequest', + })['hashed_pass'] + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 401: + raise ExtractorError('Wrong password', expected=True) + raise + + return album, hashed_pass + class VimeoIE(VimeoBaseInfoExtractor): """Information extractor for vimeo.com.""" @@ -1188,42 +1225,6 @@ class VimeoIE(VimeoBaseInfoExtractor): info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total'])) return info - def _try_album_password(self, url): - album_id = self._search_regex( - r'vimeo\.com/(?:album|showcase)/([^/]+)', url, 'album id', default=None) - if not album_id: - return - viewer = self._fetch_viewer_info(album_id, fatal=False) - if not viewer: - webpage = self._download_webpage(url, album_id) - viewer = self._parse_json(self._search_regex( - r'bootstrap_data\s*=\s*({.+?})', - webpage, 'bootstrap data'), album_id)['viewer'] - jwt = viewer['jwt'] - album = self._download_json( - 'https://api.vimeo.com/albums/' + album_id, - album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'}, - query={'fields': 'description,name,privacy'}) - if try_get(album, lambda x: x['privacy']['view']) == 'password': - password = self.get_param('videopassword') - if not password: - raise ExtractorError( - 'This album is protected by a password, use the --video-password option', - expected=True) - try: - self._download_json( - f'https://vimeo.com/showcase/{album_id}/auth', - album_id, 'Verifying the password', data=urlencode_postdata({ - 'password': password, - 'token': viewer['xsrft'], - }), headers={ - 'X-Requested-With': 'XMLHttpRequest', - }) - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 401: - raise ExtractorError('Wrong password', expected=True) - raise - def _real_extract(self, url): url, data, headers = self._unsmuggle_headers(url) if 'Referer' not in headers: @@ -1238,8 +1239,14 @@ class VimeoIE(VimeoBaseInfoExtractor): if any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): url = 'https://vimeo.com/' + video_id - self._try_album_password(url) - is_secure = urllib.parse.urlparse(url).scheme == 'https' + album_id = self._search_regex( + r'vimeo\.com/(?:album|showcase)/([0-9]+)/', url, 'album id', default=None) + if album_id: + # Detect password-protected showcase video => POST album password => set cookies + self._get_album_data_and_hashed_pass(album_id, False, None) + + parsed_url = urllib.parse.urlparse(url) + is_secure = parsed_url.scheme == 'https' try: # Retrieve video webpage to extract further information webpage, urlh = self._download_webpage_handle( @@ -1265,7 +1272,7 @@ class VimeoIE(VimeoBaseInfoExtractor): f'{self._downloader._format_err("compromising your security/cookies", "light red")}, ' f'try replacing "https:" with "http:" in the input URL. {dcip_msg}.', expected=True) - if '://player.vimeo.com/video/' in url: + if parsed_url.hostname == 'player.vimeo.com': config = self._search_json( r'\b(?:playerC|c)onfig\s*=', webpage, 'info section', video_id) if config.get('view') == 4: @@ -1293,7 +1300,7 @@ class VimeoIE(VimeoBaseInfoExtractor): config_url = None channel_id = self._search_regex( - r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None) + r'vimeo\.com/channels/([^/?#]+)', url, 'channel id', default=None) if channel_id: config_url = self._extract_config_url(webpage, default=None) video_description = clean_html(get_element_by_class('description', webpage)) @@ -1531,7 +1538,7 @@ class VimeoUserIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE class VimeoAlbumIE(VimeoBaseInfoExtractor): IE_NAME = 'vimeo:album' - _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P\d+)(?:$|[?#]|/(?!video))' + _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P[^/?#]+)(?:$|[?#]|(?P/embed))' _TITLE_RE = r'