[utils] Introduce base_url

pull/2/head
Sergey M․ 8 years ago
parent 639e3b5c99
commit 02dc0a36b7
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

@ -69,6 +69,7 @@ from youtube_dl.utils import (
uppercase_escape, uppercase_escape,
lowercase_escape, lowercase_escape,
url_basename, url_basename,
base_url,
urlencode_postdata, urlencode_postdata,
urshift, urshift,
update_url_query, update_url_query,
@ -437,6 +438,13 @@ class TestUtil(unittest.TestCase):
url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
'trailer.mp4') 'trailer.mp4')
def test_base_url(self):
self.assertEqual(base_url('http://foo.de/'), 'http://foo.de/')
self.assertEqual(base_url('http://foo.de/bar'), 'http://foo.de/')
self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/')
self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/')
self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/')
def test_parse_age_limit(self): def test_parse_age_limit(self):
self.assertEqual(parse_age_limit(None), None) self.assertEqual(parse_age_limit(None), None)
self.assertEqual(parse_age_limit(False), None) self.assertEqual(parse_age_limit(False), None)

@ -30,6 +30,7 @@ from ..downloader.f4m import remove_encrypted_media
from ..utils import ( from ..utils import (
NO_DEFAULT, NO_DEFAULT,
age_restricted, age_restricted,
base_url,
bug_reports_message, bug_reports_message,
clean_html, clean_html,
compiled_regex_type, compiled_regex_type,
@ -1539,7 +1540,7 @@ class InfoExtractor(object):
if res is False: if res is False:
return [] return []
mpd, urlh = res mpd, urlh = res
mpd_base_url = re.match(r'https?://[^?#&]+/', urlh.geturl()).group() mpd_base_url = base_url(urlh.geturl())
return self._parse_mpd_formats( return self._parse_mpd_formats(
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
@ -1797,7 +1798,7 @@ class InfoExtractor(object):
if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None: if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
return [] return []
ism_base_url = re.match(r'https?://.+/', ism_url).group() ism_base_url = base_url(ism_url)
duration = int(ism_doc.attrib['Duration']) duration = int(ism_doc.attrib['Duration'])
timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000 timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000

@ -1691,6 +1691,10 @@ def url_basename(url):
return path.strip('/').split('/')[-1] return path.strip('/').split('/')[-1]
def base_url(url):
return re.match(r'https?://[^?#&]+/', url).group()
class HEADRequest(compat_urllib_request.Request): class HEADRequest(compat_urllib_request.Request):
def get_method(self): def get_method(self):
return 'HEAD' return 'HEAD'

Loading…
Cancel
Save