Remove the calls to 'compat_urllib_request.urlopen' in a few extractors

12 years ago · baa7b1978b
parent ac5118bcb9
commit baa7b1978b
5 changed files with 42 additions and 86 deletions
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@ -51,8 +51,7 @@ class BlipTVIE(InfoExtractor):
            url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
        urlp = compat_urllib_parse_urlparse(url)
        if urlp.path.startswith('/play/'):
-            request = compat_urllib_request.Request(url)
+            response = self._request_webpage(url, None, False)
            response = compat_urllib_request.urlopen(request)
            redirecturl = response.geturl()
            rurlp = compat_urllib_parse_urlparse(redirecturl)
            file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
@ -69,25 +68,23 @@ class BlipTVIE(InfoExtractor):
        request.add_header('User-Agent', 'iTunes/10.6.1')
        self.report_extraction(mobj.group(1))
        info = None
-        try:
+        urlh = self._request_webpage(request, None, False,
-            urlh = compat_urllib_request.urlopen(request)
+            u'unable to download video info webpage')
-            if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
+        if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
-                basename = url.split('/')[-1]
+            basename = url.split('/')[-1]
-                title,ext = os.path.splitext(basename)
+            title,ext = os.path.splitext(basename)
-                title = title.decode('UTF-8')
+            title = title.decode('UTF-8')
-                ext = ext.replace('.', '')
+            ext = ext.replace('.', '')
-                self.report_direct_download(title)
+            self.report_direct_download(title)
-                info = {
+            info = {
-                    'id': title,
+                'id': title,
-                    'url': url,
+                'url': url,
-                    'uploader': None,
+                'uploader': None,
-                    'upload_date': None,
+                'upload_date': None,
-                    'title': title,
+                'title': title,
-                    'ext': ext,
+                'ext': ext,
-                    'urlhandle': urlh
+                'urlhandle': urlh
-                }
+            }
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
        if info is None: # Regular URL
            try:
                json_code_bytes = urlh.read()
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@ -1,11 +1,8 @@
 import re
 import socket
 from .common import InfoExtractor
 from ..utils import (
    compat_http_client,
    compat_parse_qs,
    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_request,
    compat_str,
@ -93,12 +90,8 @@ class MetacafeIE(InfoExtractor):
    def _real_initialize(self):
        # Retrieve disclaimer
-        request = compat_urllib_request.Request(self._DISCLAIMER)
+        self.report_disclaimer()
-        try:
+        self._download_webpage(self._DISCLAIMER, None, False, u'Unable to retrieve disclaimer')
            self.report_disclaimer()
            compat_urllib_request.urlopen(request).read()
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            raise ExtractorError(u'Unable to retrieve disclaimer: %s' % compat_str(err))
        # Confirm age
        disclaimer_form = {
@ -107,11 +100,8 @@ class MetacafeIE(InfoExtractor):
            }
        request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        try:
+        self.report_age_confirmation()
-            self.report_age_confirmation()
+        self._download_webpage(request, None, False, u'Unable to confirm age')
            compat_urllib_request.urlopen(request).read()
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
    def _real_extract(self, url):
        # Extract id and simplified title from URL
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@ -1,13 +1,10 @@
 import json
 import re
 import socket
 from .common import InfoExtractor
 from ..utils import (
    compat_http_client,
    compat_urllib_error,
    compat_urllib_request,
    unified_strdate,
    ExtractorError,
 )
@ -31,9 +28,11 @@ class MixcloudIE(InfoExtractor):
        """Returns 1st active url from list"""
        for url in url_list:
            try:
-                compat_urllib_request.urlopen(url)
+                # We only want to know if the request succeed
                # don't download the whole file
                self._request_webpage(url, None, False)
                return url
-            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error):
+            except ExtractorError:
                url = None
        return None
--- a/youtube_dl/extractor/stanfordoc.py
+++ b/youtube_dl/extractor/stanfordoc.py
@ -1,13 +1,8 @@
 import re
 import socket
 import xml.etree.ElementTree
 from .common import InfoExtractor
 from ..utils import (
    compat_http_client,
    compat_str,
    compat_urllib_error,
    compat_urllib_request,
    ExtractorError,
    orderedSet,
@ -45,11 +40,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
            self.report_extraction(info['id'])
            baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
            xmlUrl = baseUrl + video + '.xml'
-            try:
+            mdoc = self._download_xml(xmlUrl, info['id'])
                metaXml = compat_urllib_request.urlopen(xmlUrl).read()
            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
            mdoc = xml.etree.ElementTree.fromstring(metaXml)
            try:
                info['title'] = mdoc.findall('./title')[0].text
                info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
@ -95,12 +86,9 @@ class StanfordOpenClassroomIE(InfoExtractor):
                'upload_date': None,
            }
            self.report_download_webpage(info['id'])
            rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
-            try:
+            rootpage = self._download_webpage(rootURL, info['id'],
-                rootpage = compat_urllib_request.urlopen(rootURL).read()
+                errnote=u'Unable to download course info page')
            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                raise ExtractorError(u'Unable to download course info page: ' + compat_str(err))
            info['title'] = info['id']
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -7,7 +7,6 @@ import itertools
 import json
 import os.path
 import re
 import socket
 import string
 import struct
 import traceback
@ -17,9 +16,7 @@ from .common import InfoExtractor, SearchInfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 from ..utils import (
    compat_chr,
    compat_http_client,
    compat_parse_qs,
    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_request,
    compat_urlparse,
@ -53,9 +50,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
        request = compat_urllib_request.Request(self._LANG_URL)
        try:
            self.report_lang()
-            compat_urllib_request.urlopen(request).read()
+            self._download_webpage(self._LANG_URL, None, False)
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+        except ExtractorError as err:
-            self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
+            self._downloader.report_warning(u'unable to set language: %s' % compat_str(err.cause))
            return False
        return True
@ -67,12 +64,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
            return False
-        request = compat_urllib_request.Request(self._LOGIN_URL)
+        login_page = self._download_webpage(self._LOGIN_URL, None, False,
-        try:
+            u'Unable to fetch login page')
            login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
            return False
        galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
                                  login_page, u'Login GALX parameter')
@ -105,12 +98,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
        request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
        try:
            self.report_login()
-            login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
+            login_results = self._download_webpage(request, None, False)
            if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
                self._downloader.report_warning(u'unable to log in: bad username or password')
                return False
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+        except ExtractorError as err:
-            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
+            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err.cause))
            return False
        return True
@ -120,11 +113,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                'action_confirm':   'Confirm',
                }
        request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
-        try:
+        self.report_age_confirmation()
-            self.report_age_confirmation()
+        self._download_webpage(request, None, False, u'Unable to confirm age')
            compat_urllib_request.urlopen(request).read().decode('utf-8')
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
        return True
    def _real_initialize(self):
@ -1737,10 +1727,6 @@ class YoutubeSearchIE(SearchInfoExtractor):
    IE_NAME = u'youtube:search'
    _SEARCH_KEY = 'ytsearch'
    def report_download_page(self, query, pagenum):
        """Report attempt to download search page with given number."""
        self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
    def _get_n_results(self, query, n):
        """Get a specified number of results for a query"""
@ -1749,13 +1735,9 @@ class YoutubeSearchIE(SearchInfoExtractor):
        limit = n
        while (50 * pagenum) < limit:
            self.report_download_page(query, pagenum+1)
            result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
-            request = compat_urllib_request.Request(result_url)
+            data = self._download_webpage(result_url, u'query "%s"' % query,
-            try:
+                u'Downloading page %s' % pagenum, u'Unable to download API page')
                data = compat_urllib_request.urlopen(request).read().decode('utf-8')
            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
            api_response = json.loads(data)['data']
            if not 'items' in api_response: