[theonion] Modernize

pull/2/head
Philipp Hagemeister 10 years ago
parent 383456aa29
commit e880c66bd8

@ -4,11 +4,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError
class TheOnionIE(InfoExtractor): class TheOnionIE(InfoExtractor):
_VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?' _VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
_TEST = { _TEST = {
'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/', 'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
'md5': '19eaa9a39cf9b9804d982e654dc791ee', 'md5': '19eaa9a39cf9b9804d982e654dc791ee',
@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) display_id = self._match_id(url)
article_id = mobj.group('article_id') webpage = self._download_webpage(url, display_id)
webpage = self._download_webpage(url, article_id)
video_id = self._search_regex( video_id = self._search_regex(
r'"videoId":\s(\d+),', webpage, 'video ID') r'"videoId":\s(\d+),', webpage, 'video ID')
@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor):
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage) sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
if not sources:
raise ExtractorError(
'No sources found for video %s' % video_id, expected=True)
formats = [] formats = []
for src, type_ in sources: for src, type_ in sources:
if type_ == 'video/mp4': if type_ == 'video/mp4':
@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor):
}) })
elif type_ == 'application/x-mpegURL': elif type_ == 'application/x-mpegURL':
formats.extend( formats.extend(
self._extract_m3u8_formats(src, video_id, preference=-1)) self._extract_m3u8_formats(src, display_id, preference=-1))
else: else:
self.report_warning( self.report_warning(
'Encountered unexpected format: %s' % type_) 'Encountered unexpected format: %s' % type_)
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id,
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'thumbnail': thumbnail, 'thumbnail': thumbnail,

Loading…
Cancel
Save