From c1c9bb4adb42d0d93a2fb5d93a7de0a87b6ba884 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 5 Jul 2024 13:32:53 -0500 Subject: [PATCH] [ie/vimeo] Fix password-protected video extraction (#10341) Closes #6603 Authored by: bashonly --- yt_dlp/extractor/vimeo.py | 87 ++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 43 deletions(-) diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index a4ab7e24a..18eb08444 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -1,6 +1,7 @@ import base64 import functools import itertools +import json import re import urllib.parse @@ -14,6 +15,7 @@ from ..utils import ( determine_ext, get_element_by_class, int_or_none, + join_nonempty, js_to_json, merge_dicts, parse_filesize, @@ -84,29 +86,23 @@ class VimeoBaseInfoExtractor(InfoExtractor): expected=True) return password - def _verify_video_password(self, url, video_id, password, token, vuid): - if url.startswith('http://'): - # vimeo only supports https now, but the user can give an http url - url = url.replace('http://', 'https://') - self._set_vimeo_cookie('vuid', vuid) - return self._download_webpage( - url + '/password', video_id, 'Verifying the password', - 'Wrong password', data=urlencode_postdata({ - 'password': password, - 'token': token, - }), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - 'Referer': url, - }) - - def _extract_xsrft_and_vuid(self, webpage): - xsrft = self._search_regex( - r'(?:(?P["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P["\'])(?P.+?)(?P=q)', - webpage, 'login token', group='xsrft') - vuid = self._search_regex( - r'["\']vuid["\']\s*:\s*(["\'])(?P.+?)\1', - webpage, 'vuid', group='vuid') - return xsrft, vuid + def _verify_video_password(self, video_id, password, token): + url = f'https://vimeo.com/{video_id}' + try: + return self._download_webpage( + f'{url}/password', video_id, + 'Submitting video password', data=json.dumps({ + 'password': password, + 'token': token, + }, separators=(',', ':')).encode(), headers={ + 'Accept': '*/*', + 'Content-Type': 'application/json', + 'Referer': url, + }, impersonate=True) + except ExtractorError as error: + if isinstance(error.cause, HTTPError) and error.cause.status == 418: + raise ExtractorError('Wrong password', expected=True) + raise def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): vimeo_config = self._search_regex( @@ -745,21 +741,34 @@ class VimeoIE(VimeoBaseInfoExtractor): raise ExtractorError('Wrong video password', expected=True) return checked - def _extract_from_api(self, video_id, unlisted_hash=None): - token = self._download_json( - 'https://vimeo.com/_rv/jwt', video_id, headers={ - 'X-Requested-With': 'XMLHttpRequest', - })['token'] - api_url = 'https://api.vimeo.com/videos/' + video_id - if unlisted_hash: - api_url += ':' + unlisted_hash - video = self._download_json( - api_url, video_id, headers={ - 'Authorization': 'jwt ' + token, + def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None): + return self._download_json( + join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'), + video_id, 'Downloading API JSON', headers={ + 'Authorization': f'jwt {jwt_token}', 'Accept': 'application/json', }, query={ 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays', }) + + def _extract_from_api(self, video_id, unlisted_hash=None): + viewer = self._download_json( + 'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info') + + for retry in (False, True): + try: + video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash) + except ExtractorError as e: + if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400 + and 'password' in traverse_obj( + e.cause.response.read(), + ({bytes.decode}, {json.loads}, 'invalid_parameters', ..., 'field'), + )): + self._verify_video_password( + video_id, self._get_video_password(), viewer['xsrft']) + continue + raise + info = self._parse_config(self._download_json( video['config_url'], video_id), video_id) get_timestamp = lambda x: parse_iso8601(video.get(x + '_time')) @@ -865,12 +874,6 @@ class VimeoIE(VimeoBaseInfoExtractor): redirect_url, video_id, headers) return self._parse_config(config, video_id) - if re.search(r']+?id="pw_form"', webpage): - video_password = self._get_video_password() - token, vuid = self._extract_xsrft_and_vuid(webpage) - webpage = self._verify_video_password( - redirect_url, video_id, video_password, token, vuid) - vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None) if vimeo_config: seed_status = vimeo_config.get('seed_status') or {} @@ -1290,9 +1293,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): video_password = self._get_video_password() viewer = self._download_json( 'https://vimeo.com/_rv/viewer', video_id) - webpage = self._verify_video_password( - 'https://vimeo.com/' + video_id, video_id, - video_password, viewer['xsrft'], viewer['vuid']) + webpage = self._verify_video_password(video_id, video_password, viewer['xsrft']) clip_page_config = self._parse_json(self._search_regex( r'window\.vimeo\.clip_page_config\s*=\s*({.+?});', webpage, 'clip page config'), video_id)