From befa4708fd2165b85d04002c3845adf191d34302 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 19 Feb 2018 22:50:23 +0700 Subject: [PATCH] [utils] Fixup some common URL's typos in sanitize_url (closes #15649) --- test/test_utils.py | 7 +++++++ youtube_dl/utils.py | 18 +++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index fdf6031f7..d8d257d1d 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -57,6 +57,7 @@ from youtube_dl.utils import ( read_batch_urls, sanitize_filename, sanitize_path, + sanitize_url, expand_path, prepend_extension, replace_extension, @@ -219,6 +220,12 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_path('./abc'), 'abc') self.assertEqual(sanitize_path('./../abc'), '..\\abc') + def test_sanitize_url(self): + self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar') + self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar') + self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar') + self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar') + def test_expand_path(self): def env(var): return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 7f24cbb04..af639a124 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -538,10 +538,22 @@ def sanitize_path(s): return os.path.join(*sanitized_path) -# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of -# unwanted failures due to missing protocol def sanitize_url(url): - return 'http:%s' % url if url.startswith('//') else url + # Prepend protocol-less URLs with `http:` scheme in order to mitigate + # the number of unwanted failures due to missing protocol + if url.startswith('//'): + return 'http:%s' % url + # Fix some common typos seen so far + COMMON_TYPOS = ( + # https://github.com/rg3/youtube-dl/issues/15649 + (r'^httpss://', r'https://'), + # https://bx1.be/lives/direct-tv/ + (r'^rmtp([es]?)://', r'rtmp\1://'), + ) + for mistake, fixup in COMMON_TYPOS: + if re.match(mistake, url): + return re.sub(mistake, fixup, url) + return url def sanitized_Request(url, *args, **kwargs):