[screenwavemedia] Simplify (#3766)

pull/8/head
Philipp Hagemeister 10 years ago
parent 807962f4a1
commit f17e4c9c28

@ -622,23 +622,17 @@ class YoutubeDL(object):
ie_result['url'], ie_key=ie_result.get('ie_key'), ie_result['url'], ie_key=ie_result.get('ie_key'),
extra_info=extra_info, download=False, process=False) extra_info=extra_info, download=False, process=False)
def make_result(embedded_info):
new_result = ie_result.copy() new_result = ie_result.copy()
for f in ('_type', 'url', 'ext', 'player_url', 'formats', for f in ('_type', 'id', 'url', 'ext', 'player_url', 'formats',
'entries', 'ie_key', 'duration', 'entries', 'ie_key', 'duration',
'subtitles', 'annotations', 'format', 'subtitles', 'annotations', 'format',
'thumbnail', 'thumbnails'): 'thumbnail', 'thumbnails'):
if f in new_result: if f in new_result:
del new_result[f] del new_result[f]
if f in embedded_info: if f in info:
new_result[f] = embedded_info[f] new_result[f] = info[f]
return new_result
new_result = make_result(info)
assert new_result.get('_type') != 'url_transparent' assert new_result.get('_type') != 'url_transparent'
if new_result.get('_type') == 'compat_list':
new_result['entries'] = [
make_result(e) for e in new_result['entries']]
return self.process_ie_result( return self.process_ie_result(
new_result, download=download, extra_info=extra_info) new_result, download=download, extra_info=extra_info)

@ -335,7 +335,7 @@ from .savefrom import SaveFromIE
from .sbs import SBSIE from .sbs import SBSIE
from .scivee import SciVeeIE from .scivee import SciVeeIE
from .screencast import ScreencastIE from .screencast import ScreencastIE
from .screenwavemedia import ScreenwaveMediaIE from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
from .servingsys import ServingSysIE from .servingsys import ServingSysIE
from .sexu import SexuIE from .sexu import SexuIE
from .sexykarma import SexyKarmaIE from .sexykarma import SexyKarmaIE

@ -6,109 +6,28 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
month_by_name,
int_or_none, int_or_none,
month_by_name,
unified_strdate,
) )
class ScreenwaveMediaIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?' \
r':(?P<generic>player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<video_id>.+))' \
r'|(?P<cinemassacre>(?:www\.)?cinemassacre\.com/(?P<cm_date_Y>[0-9]{4})/(?P<cm_date_m>[0-9]{2})/(?P<cm_date_d>[0-9]{2})/(?P<cm_display_id>[^?#/]+))' \
r'|(?P<teamfourstar>(?:www\.)?teamfourstar\.com/video/(?P<tfs_display_id>[a-z0-9\-]+)/?)' \
r')'
_TESTS = [
{
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
'md5': 'fde81fbafaee331785f58cd6c0d46190',
'info_dict': {
'id': 'Cinemasssacre-19911',
'ext': 'mp4',
'upload_date': '20121110',
'title': '“Angry Video Game Nerd: The Movie” Trailer',
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
},
},
{
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
'md5': 'd72f10cd39eac4215048f62ab477a511',
'info_dict': {
'id': 'Cinemasssacre-521be8ef82b16',
'ext': 'mp4',
'upload_date': '20131002',
'title': 'The Mummys Hand (1940)',
},
}
]
def _cinemassacre_get_info(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('cm_display_id')
webpage = self._download_webpage(url, display_id) class ScreenwaveMediaIE(InfoExtractor):
video_date = mobj.group('cm_date_Y') + mobj.group('cm_date_m') + mobj.group('cm_date_d') _VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', webpage)
if not mobj:
raise ExtractorError('Can\'t extract embed url and video id')
playerdata_url = mobj.group('embed_url')
video_title = self._html_search_regex(
r'<title>(?P<title>.+?)\|', webpage, 'title')
video_description = self._html_search_regex(
r'<div class="entry-content">(?P<description>.+?)</div>',
webpage, 'description', flags=re.DOTALL, fatal=False)
video_thumbnail = self._og_search_thumbnail(webpage)
return {
'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'_embed_url': playerdata_url,
}
def _teamfourstar_get_info(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('tfs_display_id')
webpage = self._download_webpage(url, display_id)
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', webpage)
if not mobj:
raise ExtractorError('Can\'t extract embed url and video id')
playerdata_url = mobj.group('embed_url')
video_title = self._html_search_regex(
r'<div class="heroheadingtitle">(?P<title>.+?)</div>', webpage, 'title')
video_date = self._html_search_regex(
r'<div class="heroheadingdate">(?P<date>.+?)</div>', webpage, 'date')
mobj = re.match('(?P<month>[A-Z][a-z]+) (?P<day>\d+), (?P<year>\d+)', video_date)
video_date = '%04u%02u%02u' % (int(mobj.group('year')), month_by_name(mobj.group('month')), int(mobj.group('day')))
video_description = self._html_search_regex(
r'<div class="postcontent">(?P<description>.+?)</div>', webpage, 'description', flags=re.DOTALL)
video_thumbnail = self._og_search_thumbnail(webpage)
return {
'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'_embed_url': playerdata_url,
}
def _screenwavemedia_get_info(self, url): _TESTS = [{
mobj = re.match(self._VALID_URL, url) 'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
if not mobj: 'only_matching': True,
raise ExtractorError('Can\'t extract embed url and video id') }]
video_id = mobj.group('video_id')
def _real_extract(self, url):
video_id = self._match_id(url)
playerdata = self._download_webpage(url, video_id, 'Downloading player webpage') playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
vidtitle = self._search_regex( vidtitle = self._search_regex(
r'\'vidtitle\'\s*:\s*"([^\']+)"', playerdata, 'vidtitle').replace('\\/', '/') r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
vidurl = self._search_regex( vidurl = self._search_regex(
r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/') r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/')
pageurl = self._search_regex(
r'\'pageurl\'\s*:\s*"([^\']+)"', playerdata, 'pageurl', fatal=False).replace('\\/', '/')
videolist_url = None videolist_url = None
@ -134,61 +53,128 @@ class ScreenwaveMediaIE(InfoExtractor):
file_ = src.partition(':')[-1] file_ = src.partition(':')[-1]
width = int_or_none(video.get('width')) width = int_or_none(video.get('width'))
height = int_or_none(video.get('height')) height = int_or_none(video.get('height'))
bitrate = int_or_none(video.get('system-bitrate')) bitrate = int_or_none(video.get('system-bitrate'), scale=1000)
format = { format = {
'url': baseurl + file_, 'url': baseurl + file_,
'format_id': src.rpartition('.')[0].rpartition('_')[-1], 'format_id': src.rpartition('.')[0].rpartition('_')[-1],
} }
if width or height: if width or height:
format.update({ format.update({
'tbr': bitrate // 1000 if bitrate else None, 'tbr': bitrate,
'width': width, 'width': width,
'height': height, 'height': height,
}) })
else: else:
format.update({ format.update({
'abr': bitrate // 1000 if bitrate else None, 'abr': bitrate,
'vcodec': 'none', 'vcodec': 'none',
}) })
formats.append(format) formats.append(format)
self._sort_formats(formats)
else: else:
formats = [{ formats = [{
'url': vidurl, 'url': vidurl,
}] }]
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'title': vidtitle, 'title': vidtitle,
'formats': formats, 'formats': formats,
'_episode_page': pageurl,
} }
class CinemassacreIE(InfoExtractor):
_VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
_TESTS = [
{
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
'md5': 'fde81fbafaee331785f58cd6c0d46190',
'info_dict': {
'id': 'Cinemassacre-19911',
'ext': 'mp4',
'upload_date': '20121110',
'title': '“Angry Video Game Nerd: The Movie” Trailer',
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
},
},
{
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
'md5': 'd72f10cd39eac4215048f62ab477a511',
'info_dict': {
'id': 'Cinemassacre-521be8ef82b16',
'ext': 'mp4',
'upload_date': '20131002',
'title': 'The Mummys Hand (1940)',
},
}
]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id')
video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
swm_info = None webpage = self._download_webpage(url, display_id)
site_info = None
if mobj.group('generic'): playerdata_url = self._search_regex(
swm_info = self._screenwavemedia_get_info(url) r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
url = swm_info['_episode_page'] webpage, 'player data URL')
mobj = re.match(self._VALID_URL, url) video_title = self._html_search_regex(
r'<title>(?P<title>.+?)\|', webpage, 'title')
video_description = self._html_search_regex(
r'<div class="entry-content">(?P<description>.+?)</div>',
webpage, 'description', flags=re.DOTALL, fatal=False)
video_thumbnail = self._og_search_thumbnail(webpage)
if mobj: return {
if mobj.group('cinemassacre'): '_type': 'url_transparent',
site_info = self._cinemassacre_get_info(url) 'display_id': display_id,
elif mobj.group('teamfourstar'): 'title': video_title,
site_info = self._teamfourstar_get_info(url) 'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'url': playerdata_url,
}
if not swm_info:
if site_info:
swm_info = self._screenwavemedia_get_info(site_info['_embed_url'])
if not swm_info: class TeamFourIE(InfoExtractor):
raise ExtractorError("Failed to extract metadata for this URL") _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
_TEST = {
'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/',
'info_dict': {
'id': 'TeamFourStar-5292a02f20bfa',
'ext': 'mp4',
'upload_date': '20130401',
'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
'title': 'A Moment With TFS Episode 4',
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
if site_info: playerdata_url = self._search_regex(
swm_info.update(site_info) r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
webpage, 'player data URL')
video_title = self._html_search_regex(
r'<div class="heroheadingtitle">(?P<title>.+?)</div>',
webpage, 'title')
video_date = unified_strdate(self._html_search_regex(
r'<div class="heroheadingdate">(?P<date>.+?)</div>',
webpage, 'date', fatal=False))
video_description = self._html_search_regex(
r'(?s)<div class="postcontent">(?P<description>.+?)</div>',
webpage, 'description', fatal=False)
video_thumbnail = self._og_search_thumbnail(webpage)
return swm_info return {
'_type': 'url_transparent',
'display_id': display_id,
'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'url': playerdata_url,
}

Loading…
Cancel
Save