@ -6,109 +6,28 @@ import re
from . common import InfoExtractor
from . common import InfoExtractor
from . . utils import (
from . . utils import (
ExtractorError ,
ExtractorError ,
month_by_name ,
int_or_none ,
int_or_none ,
month_by_name ,
unified_strdate ,
)
)
class ScreenwaveMediaIE ( InfoExtractor ) :
_VALID_URL = r ' (?:http://)?(? ' \
r ' :(?P<generic>player \ .screenwavemedia \ .com/play/[a-zA-Z]+ \ .php \ ?[^ " ]* \ bid=(?P<video_id>.+)) ' \
r ' |(?P<cinemassacre>(?:www \ .)?cinemassacre \ .com/(?P<cm_date_Y>[0-9] {4} )/(?P<cm_date_m>[0-9] {2} )/(?P<cm_date_d>[0-9] {2} )/(?P<cm_display_id>[^?#/]+)) ' \
r ' |(?P<teamfourstar>(?:www \ .)?teamfourstar \ .com/video/(?P<tfs_display_id>[a-z0-9 \ -]+)/?) ' \
r ' ) '
_TESTS = [
{
' url ' : ' http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/ ' ,
' md5 ' : ' fde81fbafaee331785f58cd6c0d46190 ' ,
' info_dict ' : {
' id ' : ' Cinemasssacre-19911 ' ,
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20121110 ' ,
' title ' : ' “Angry Video Game Nerd: The Movie” – Trailer ' ,
' description ' : ' md5:fb87405fcb42a331742a0dce2708560b ' ,
} ,
} ,
{
' url ' : ' http://cinemassacre.com/2013/10/02/the-mummys-hand-1940 ' ,
' md5 ' : ' d72f10cd39eac4215048f62ab477a511 ' ,
' info_dict ' : {
' id ' : ' Cinemasssacre-521be8ef82b16 ' ,
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20131002 ' ,
' title ' : ' The Mummy’ s Hand (1940) ' ,
} ,
}
]
def _cinemassacre_get_info ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
display_id = mobj . group ( ' cm_display_id ' )
webpage = self . _download_webpage ( url , display_id )
video_date = mobj . group ( ' cm_date_Y ' ) + mobj . group ( ' cm_date_m ' ) + mobj . group ( ' cm_date_d ' )
mobj = re . search ( r ' src= " (?P<embed_url>http://player \ .screenwavemedia \ .com/play/[a-zA-Z]+ \ .php \ ?[^ " ]* \ bid=.+?) " ' , webpage )
if not mobj :
raise ExtractorError ( ' Can \' t extract embed url and video id ' )
playerdata_url = mobj . group ( ' embed_url ' )
video_title = self . _html_search_regex (
r ' <title>(?P<title>.+?) \ | ' , webpage , ' title ' )
video_description = self . _html_search_regex (
r ' <div class= " entry-content " >(?P<description>.+?)</div> ' ,
webpage , ' description ' , flags = re . DOTALL , fatal = False )
video_thumbnail = self . _og_search_thumbnail ( webpage )
return {
' title ' : video_title ,
' description ' : video_description ,
' upload_date ' : video_date ,
' thumbnail ' : video_thumbnail ,
' _embed_url ' : playerdata_url ,
}
def _teamfourstar_get_info ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
display_id = mobj . group ( ' tfs_display_id ' )
webpage = self . _download_webpage ( url , display_id )
mobj = re . search ( r ' src= " (?P<embed_url>http://player \ .screenwavemedia \ .com/play/[a-zA-Z]+ \ .php \ ?[^ " ]* \ bid=.+?) " ' , webpage )
if not mobj :
raise ExtractorError ( ' Can \' t extract embed url and video id ' )
playerdata_url = mobj . group ( ' embed_url ' )
video_title = self . _html_search_regex (
r ' <div class= " heroheadingtitle " >(?P<title>.+?)</div> ' , webpage , ' title ' )
video_date = self . _html_search_regex (
r ' <div class= " heroheadingdate " >(?P<date>.+?)</div> ' , webpage , ' date ' )
mobj = re . match ( ' (?P<month>[A-Z][a-z]+) (?P<day> \ d+), (?P<year> \ d+) ' , video_date )
video_date = ' %04u %02u %02u ' % ( int ( mobj . group ( ' year ' ) ) , month_by_name ( mobj . group ( ' month ' ) ) , int ( mobj . group ( ' day ' ) ) )
video_description = self . _html_search_regex (
r ' <div class= " postcontent " >(?P<description>.+?)</div> ' , webpage , ' description ' , flags = re . DOTALL )
video_thumbnail = self . _og_search_thumbnail ( webpage )
return {
class ScreenwaveMediaIE ( InfoExtractor ) :
' title ' : video_title ,
_VALID_URL = r ' http://player \ .screenwavemedia \ .com/play/[a-zA-Z]+ \ .php \ ?[^ " ]* \ bid=(?P<id>.+) '
' description ' : video_description ,
' upload_date ' : video_date ,
' thumbnail ' : video_thumbnail ,
' _embed_url ' : playerdata_url ,
}
def _screenwavemedia_get_info ( self , url ) :
_TESTS = [ {
mobj = re . match ( self . _VALID_URL , url )
' url ' : ' http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911 ' ,
if not mobj :
' only_matching ' : True ,
raise ExtractorError ( ' Can \' t extract embed url and video id ' )
} ]
video_id = mobj . group ( ' video_id ' )
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
playerdata = self . _download_webpage ( url , video_id , ' Downloading player webpage ' )
playerdata = self . _download_webpage ( url , video_id , ' Downloading player webpage ' )
vidtitle = self . _search_regex (
vidtitle = self . _search_regex (
r ' \' vidtitle \' \ s*: \ s* " ([^ \' ]+) " ' , playerdata , ' vidtitle ' ) . replace ( ' \\ / ' , ' / ' )
r ' \' vidtitle \' \ s*: \ s* " ([^ " ]+) " ' , playerdata , ' vidtitle ' ) . replace ( ' \\ / ' , ' / ' )
vidurl = self . _search_regex (
vidurl = self . _search_regex (
r ' \' vidurl \' \ s*: \ s* " ([^ \' ]+) " ' , playerdata , ' vidurl ' ) . replace ( ' \\ / ' , ' / ' )
r ' \' vidurl \' \ s*: \ s* " ([^ " ]+) " ' , playerdata , ' vidurl ' ) . replace ( ' \\ / ' , ' / ' )
pageurl = self . _search_regex (
r ' \' pageurl \' \ s*: \ s* " ([^ \' ]+) " ' , playerdata , ' pageurl ' , fatal = False ) . replace ( ' \\ / ' , ' / ' )
videolist_url = None
videolist_url = None
@ -134,61 +53,128 @@ class ScreenwaveMediaIE(InfoExtractor):
file_ = src . partition ( ' : ' ) [ - 1 ]
file_ = src . partition ( ' : ' ) [ - 1 ]
width = int_or_none ( video . get ( ' width ' ) )
width = int_or_none ( video . get ( ' width ' ) )
height = int_or_none ( video . get ( ' height ' ) )
height = int_or_none ( video . get ( ' height ' ) )
bitrate = int_or_none ( video . get ( ' system-bitrate ' ) )
bitrate = int_or_none ( video . get ( ' system-bitrate ' ) , scale = 1000 )
format = {
format = {
' url ' : baseurl + file_ ,
' url ' : baseurl + file_ ,
' format_id ' : src . rpartition ( ' . ' ) [ 0 ] . rpartition ( ' _ ' ) [ - 1 ] ,
' format_id ' : src . rpartition ( ' . ' ) [ 0 ] . rpartition ( ' _ ' ) [ - 1 ] ,
}
}
if width or height :
if width or height :
format . update ( {
format . update ( {
' tbr ' : bitrate / / 1000 if bitrate else None ,
' tbr ' : bitrate ,
' width ' : width ,
' width ' : width ,
' height ' : height ,
' height ' : height ,
} )
} )
else :
else :
format . update ( {
format . update ( {
' abr ' : bitrate / / 1000 if bitrate else None ,
' abr ' : bitrate ,
' vcodec ' : ' none ' ,
' vcodec ' : ' none ' ,
} )
} )
formats . append ( format )
formats . append ( format )
self . _sort_formats ( formats )
else :
else :
formats = [ {
formats = [ {
' url ' : vidurl ,
' url ' : vidurl ,
} ]
} ]
self . _sort_formats ( formats )
return {
return {
' id ' : video_id ,
' id ' : video_id ,
' title ' : vidtitle ,
' title ' : vidtitle ,
' formats ' : formats ,
' formats ' : formats ,
' _episode_page ' : pageurl ,
}
}
class CinemassacreIE ( InfoExtractor ) :
_VALID_URL = ' https?://(?:www \ .)?cinemassacre \ .com/(?P<date_y>[0-9] {4} )/(?P<date_m>[0-9] {2} )/(?P<date_d>[0-9] {2} )/(?P<display_id>[^?#/]+) '
_TESTS = [
{
' url ' : ' http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/ ' ,
' md5 ' : ' fde81fbafaee331785f58cd6c0d46190 ' ,
' info_dict ' : {
' id ' : ' Cinemassacre-19911 ' ,
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20121110 ' ,
' title ' : ' “Angry Video Game Nerd: The Movie” – Trailer ' ,
' description ' : ' md5:fb87405fcb42a331742a0dce2708560b ' ,
} ,
} ,
{
' url ' : ' http://cinemassacre.com/2013/10/02/the-mummys-hand-1940 ' ,
' md5 ' : ' d72f10cd39eac4215048f62ab477a511 ' ,
' info_dict ' : {
' id ' : ' Cinemassacre-521be8ef82b16 ' ,
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20131002 ' ,
' title ' : ' The Mummy’ s Hand (1940) ' ,
} ,
}
]
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
mobj = re . match ( self . _VALID_URL , url )
display_id = mobj . group ( ' display_id ' )
video_date = mobj . group ( ' date_y ' ) + mobj . group ( ' date_m ' ) + mobj . group ( ' date_d ' )
swm_info = None
webpage = self . _download_webpage ( url , display_id )
site_info = None
if mobj . group ( ' generic ' ) :
playerdata_url = self . _search_regex (
swm_info = self . _screenwavemedia_get_info ( url )
r ' src= " (http://player \ .screenwavemedia \ .com/play/[a-zA-Z]+ \ .php \ ?[^ " ]* \ bid=.+?) " ' ,
url = swm_info [ ' _episode_page ' ]
webpage , ' player data URL ' )
mobj = re . match ( self . _VALID_URL , url )
video_title = self . _html_search_regex (
r ' <title>(?P<title>.+?) \ | ' , webpage , ' title ' )
video_description = self . _html_search_regex (
r ' <div class= " entry-content " >(?P<description>.+?)</div> ' ,
webpage , ' description ' , flags = re . DOTALL , fatal = False )
video_thumbnail = self . _og_search_thumbnail ( webpage )
if mobj :
return {
if mobj . group ( ' cinemassacre ' ) :
' _type ' : ' url_transparent ' ,
site_info = self . _cinemassacre_get_info ( url )
' display_id ' : display_id ,
elif mobj . group ( ' teamfourstar ' ) :
' title ' : video_title ,
site_info = self . _teamfourstar_get_info ( url )
' description ' : video_description ,
' upload_date ' : video_date ,
' thumbnail ' : video_thumbnail ,
' url ' : playerdata_url ,
}
if not swm_info :
if site_info :
swm_info = self . _screenwavemedia_get_info ( site_info [ ' _embed_url ' ] )
if not swm_info :
class TeamFourIE ( InfoExtractor ) :
raise ExtractorError ( " Failed to extract metadata for this URL " )
_VALID_URL = r ' https?://(?:www \ .)?teamfourstar \ .com/video/(?P<id>[a-z0-9 \ -]+)/? '
_TEST = {
' url ' : ' http://teamfourstar.com/video/a-moment-with-tfs-episode-4/ ' ,
' info_dict ' : {
' id ' : ' TeamFourStar-5292a02f20bfa ' ,
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20130401 ' ,
' description ' : ' Check out this and more on our website: http://teamfourstar.com \n TFS Store: http://sharkrobot.com/team-four-star \n Follow on Twitter: http://twitter.com/teamfourstar \n Like on FB: http://facebook.com/teamfourstar ' ,
' title ' : ' A Moment With TFS Episode 4 ' ,
}
}
if site_info :
def _real_extract ( self , url ) :
swm_info . update ( site_info )
display_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , display_id )
playerdata_url = self . _search_regex (
r ' src= " (http://player \ .screenwavemedia \ .com/play/[a-zA-Z]+ \ .php \ ?[^ " ]* \ bid=.+?) " ' ,
webpage , ' player data URL ' )
video_title = self . _html_search_regex (
r ' <div class= " heroheadingtitle " >(?P<title>.+?)</div> ' ,
webpage , ' title ' )
video_date = unified_strdate ( self . _html_search_regex (
r ' <div class= " heroheadingdate " >(?P<date>.+?)</div> ' ,
webpage , ' date ' , fatal = False ) )
video_description = self . _html_search_regex (
r ' (?s)<div class= " postcontent " >(?P<description>.+?)</div> ' ,
webpage , ' description ' , fatal = False )
video_thumbnail = self . _og_search_thumbnail ( webpage )
return swm_info
return {
' _type ' : ' url_transparent ' ,
' display_id ' : display_id ,
' title ' : video_title ,
' description ' : video_description ,
' upload_date ' : video_date ,
' thumbnail ' : video_thumbnail ,
' url ' : playerdata_url ,
}