diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b20837ce28..eedab37a7e 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -39,7 +39,6 @@ from .compat import ( compat_urllib_error, compat_urllib_request, compat_urllib_request_DataHandler, - compat_urllib_request_Request, ) from .utils import ( ContentTooShortError, @@ -65,6 +64,7 @@ from .utils import ( SameFileError, sanitize_filename, sanitize_path, + sanitized_Request, std_headers, subtitles_filename, UnavailableVideoError, @@ -1874,7 +1874,7 @@ class YoutubeDL(object): def urlopen(self, req): """ Start an HTTP download """ if isinstance(req, compat_basestring): - req = compat_urllib_request_Request(req) + req = sanitized_Request(req) return self._opener.open(req, timeout=self._socket_timeout) def print_debug_header(self): diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 4e3de7f517..a3e85264ac 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -198,14 +198,6 @@ except ImportError: # Python < 3.4 return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url) - -# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of -# unwanted failures due to missing protocol -def compat_urllib_request_Request(url, *args, **kwargs): - return compat_urllib_request.Request( - 'http:%s' % url if url.startswith('//') else url, *args, **kwargs) - - try: compat_basestring = basestring # Python 2 except NameError: diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index d1daf96729..f392ccf1cd 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -8,7 +8,6 @@ import itertools from .common import InfoExtractor from ..compat import ( compat_HTTPError, - compat_urllib_request_Request, compat_urlparse, ) from ..utils import ( @@ -17,6 +16,7 @@ from ..utils import ( InAdvancePagedList, int_or_none, RegexNotFoundError, + sanitized_Request, smuggle_url, std_headers, unified_strdate, @@ -47,7 +47,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'service': 'vimeo', 'token': token, })) - login_request = compat_urllib_request_Request(self._LOGIN_URL, data) + login_request = sanitized_Request(self._LOGIN_URL, data) login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') login_request.add_header('Referer', self._LOGIN_URL) self._set_vimeo_cookie('vuid', vuid) @@ -222,7 +222,7 @@ class VimeoIE(VimeoBaseInfoExtractor): if url.startswith('http://'): # vimeo only supports https now, but the user can give an http url url = url.replace('http://', 'https://') - password_request = compat_urllib_request_Request(url + '/password', data) + password_request = sanitized_Request(url + '/password', data) password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') password_request.add_header('Referer', url) self._set_vimeo_cookie('vuid', vuid) @@ -236,7 +236,7 @@ class VimeoIE(VimeoBaseInfoExtractor): raise ExtractorError('This video is protected by a password, use the --video-password option') data = urlencode_postdata(encode_dict({'password': password})) pass_url = url + '/check-password' - password_request = compat_urllib_request_Request(pass_url, data) + password_request = sanitized_Request(pass_url, data) password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') return self._download_json( password_request, video_id, @@ -265,7 +265,7 @@ class VimeoIE(VimeoBaseInfoExtractor): url = 'https://vimeo.com/' + video_id # Retrieve video webpage to extract further information - request = compat_urllib_request_Request(url, None, headers) + request = sanitized_Request(url, None, headers) try: webpage = self._download_webpage(request, video_id) except ExtractorError as ee: @@ -481,7 +481,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): password_path = self._search_regex( r'action="([^"]+)"', login_form, 'password URL') password_url = compat_urlparse.urljoin(page_url, password_path) - password_request = compat_urllib_request_Request(password_url, post) + password_request = sanitized_Request(password_url, post) password_request.add_header('Content-type', 'application/x-www-form-urlencoded') self._set_vimeo_cookie('vuid', vuid) self._set_vimeo_cookie('xsrft', token) @@ -640,7 +640,7 @@ class VimeoWatchLaterIE(VimeoChannelIE): def _page_url(self, base_url, pagenum): url = '%s/page:%d/' % (base_url, pagenum) - request = compat_urllib_request_Request(url) + request = sanitized_Request(url) # Set the header to get a partial html page with the ids, # the normal page doesn't contain them. request.add_header('X-Requested-With', 'XMLHttpRequest') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c0325f054d..d7b737e216 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -373,6 +373,13 @@ def sanitize_path(s): return os.path.join(*sanitized_path) +# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of +# unwanted failures due to missing protocol +def sanitized_Request(url, *args, **kwargs): + return compat_urllib_request.Request( + 'http:%s' % url if url.startswith('//') else url, *args, **kwargs) + + def orderedSet(iterable): """ Remove all duplicates from the input iterable """ res = []