Add an extractor for rottentomatoes.com and improve InternetVideoArchiveIE to get the best quality

pull/8/head
Jaime Marquínez Ferrándiz 11 years ago
parent 3d60d33773
commit 4b7b839f24

@ -94,6 +94,7 @@ from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE from .redtube import RedTubeIE
from .ringtv import RingTVIE from .ringtv import RingTVIE
from .ro220 import Ro220IE from .ro220 import Ro220IE
from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE from .rtlnow import RTLnowIE
from .sina import SinaIE from .sina import SinaIE

@ -4,6 +4,7 @@ import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urlparse, compat_urlparse,
compat_urllib_parse,
xpath_with_ns, xpath_with_ns,
determine_ext, determine_ext,
) )
@ -26,6 +27,16 @@ class InternetVideoArchiveIE(InfoExtractor):
def _build_url(query): def _build_url(query):
return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
@staticmethod
def _clean_query(query):
NEEDED_ARGS = ['publishedid', 'customerid']
query_dic = compat_urlparse.parse_qs(query)
cleaned_dic = dict((k,v[0]) for (k,v) in query_dic.items() if k in NEEDED_ARGS)
# Other player ids return m3u8 urls
cleaned_dic['playerid'] = '247'
cleaned_dic['videokbrate'] = '100000'
return compat_urllib_parse.urlencode(cleaned_dic)
def _real_extract(self, url): def _real_extract(self, url):
query = compat_urlparse.urlparse(url).query query = compat_urlparse.urlparse(url).query
query_dic = compat_urlparse.parse_qs(query) query_dic = compat_urlparse.parse_qs(query)
@ -37,6 +48,11 @@ class InternetVideoArchiveIE(InfoExtractor):
flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8')) flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
file_url = flashconfiguration.find('file').text file_url = flashconfiguration.find('file').text
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx') file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
# Replace some of the parameters in the query to get the best quality
# and http links (no m3u8 manifests)
file_url = re.sub(r'(?<=\?)(.+)$',
lambda m: self._clean_query(m.group()),
file_url)
info_xml = self._download_webpage(file_url, video_id, info_xml = self._download_webpage(file_url, video_id,
u'Downloading video info') u'Downloading video info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))

@ -0,0 +1,16 @@
from .videodetective import VideoDetectiveIE
# It just uses the same method as videodetective.com,
# the internetvideoarchive.com is extracted from the og:video property
class RottenTomatoesIE(VideoDetectiveIE):
_VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
u'file': '613340.mp4',
u'info_dict': {
u'title': u'TOY STORY 3',
u'description': u'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
},
}

@ -16,7 +16,7 @@ class VideoDetectiveIE(InfoExtractor):
u'info_dict': { u'info_dict': {
u'title': u'KICK-ASS 2', u'title': u'KICK-ASS 2',
u'description': u'md5:65ba37ad619165afac7d432eaded6013', u'description': u'md5:65ba37ad619165afac7d432eaded6013',
u'duration': 135, u'duration': 138,
}, },
} }

Loading…
Cancel
Save