@ -7,13 +7,13 @@ from .common import InfoExtractor
from . . utils import (
from . . utils import (
determine_ext ,
determine_ext ,
dict_get ,
dict_get ,
int_or_none ,
try_get ,
)
)
class SVTBaseIE ( InfoExtractor ) :
class SVTBaseIE ( InfoExtractor ) :
def _extract_video ( self , info , video_id ) :
def _extract_video ( self , video_info , video_id ) :
video_info = self . _get_video_info ( info )
formats = [ ]
formats = [ ]
for vr in video_info [ ' videoReferences ' ] :
for vr in video_info [ ' videoReferences ' ] :
player_type = vr . get ( ' playerType ' )
player_type = vr . get ( ' playerType ' )
@ -37,6 +37,8 @@ class SVTBaseIE(InfoExtractor):
' format_id ' : player_type ,
' format_id ' : player_type ,
' url ' : vurl ,
' url ' : vurl ,
} )
} )
if not formats and video_info . get ( ' rights ' , { } ) . get ( ' geoBlockedSweden ' ) :
self . raise_geo_restricted ( ' This video is only available in Sweden ' )
self . _sort_formats ( formats )
self . _sort_formats ( formats )
subtitles = { }
subtitles = { }
@ -52,15 +54,32 @@ class SVTBaseIE(InfoExtractor):
subtitles . setdefault ( subtitle_lang , [ ] ) . append ( { ' url ' : subtitle_url } )
subtitles . setdefault ( subtitle_lang , [ ] ) . append ( { ' url ' : subtitle_url } )
duration = video_info . get ( ' materialLength ' )
title = video_info . get ( ' title ' )
age_limit = 18 if video_info . get ( ' inappropriateForChildren ' ) else 0
series = video_info . get ( ' programTitle ' )
season_number = int_or_none ( video_info . get ( ' season ' ) )
episode = video_info . get ( ' episodeTitle ' )
episode_number = int_or_none ( video_info . get ( ' episodeNumber ' ) )
duration = int_or_none ( dict_get ( video_info , ( ' materialLength ' , ' contentDuration ' ) ) )
age_limit = None
adult = dict_get (
video_info , ( ' inappropriateForChildren ' , ' blockedForChildren ' ) ,
skip_false_values = False )
if adult is not None :
age_limit = 18 if adult else 0
return {
return {
' id ' : video_id ,
' id ' : video_id ,
' title ' : title ,
' formats ' : formats ,
' formats ' : formats ,
' subtitles ' : subtitles ,
' subtitles ' : subtitles ,
' duration ' : duration ,
' duration ' : duration ,
' age_limit ' : age_limit ,
' age_limit ' : age_limit ,
' series ' : series ,
' season_number ' : season_number ,
' episode ' : episode ,
' episode_number ' : episode_number ,
}
}
@ -85,9 +104,6 @@ class SVTIE(SVTBaseIE):
if mobj :
if mobj :
return mobj . group ( ' url ' )
return mobj . group ( ' url ' )
def _get_video_info ( self , info ) :
return info [ ' video ' ]
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
mobj = re . match ( self . _VALID_URL , url )
widget_id = mobj . group ( ' widget_id ' )
widget_id = mobj . group ( ' widget_id ' )
@ -97,7 +113,7 @@ class SVTIE(SVTBaseIE):
' http://www.svt.se/wd?widgetId= %s &articleId= %s &format=json&type=embed&output=json ' % ( widget_id , article_id ) ,
' http://www.svt.se/wd?widgetId= %s &articleId= %s &format=json&type=embed&output=json ' % ( widget_id , article_id ) ,
article_id )
article_id )
info_dict = self . _extract_video ( info , article_id )
info_dict = self . _extract_video ( info [' video ' ] , article_id )
info_dict [ ' title ' ] = info [ ' context ' ] [ ' title ' ]
info_dict [ ' title ' ] = info [ ' context ' ] [ ' title ' ]
return info_dict
return info_dict
@ -105,7 +121,7 @@ class SVTIE(SVTBaseIE):
class SVTPlayIE ( SVTBaseIE ) :
class SVTPlayIE ( SVTBaseIE ) :
IE_DESC = ' SVT Play and Öppet arkiv '
IE_DESC = ' SVT Play and Öppet arkiv '
_VALID_URL = r ' https?://(?:www \ .)?(?:svtplay|oppetarkiv) \ .se/video/(?P<id>[0-9]+) '
_VALID_URL = r ' https?://(?:www \ .)?(?:svtplay|oppetarkiv) \ .se/video/(?P<id>[0-9]+) '
_TEST = {
_TEST S = [ {
' url ' : ' http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2 ' ,
' url ' : ' http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2 ' ,
' md5 ' : ' 2b6704fe4a28801e1a098bbf3c5ac611 ' ,
' md5 ' : ' 2b6704fe4a28801e1a098bbf3c5ac611 ' ,
' info_dict ' : {
' info_dict ' : {
@ -121,25 +137,47 @@ class SVTPlayIE(SVTBaseIE):
} ]
} ]
} ,
} ,
} ,
} ,
}
} , {
# geo restricted to Sweden
def _get_video_info ( self , info ) :
' url ' : ' http://www.oppetarkiv.se/video/5219710/trollflojten ' ,
return info [ ' context ' ] [ ' dispatcher ' ] [ ' stores ' ] [ ' VideoTitlePageStore ' ] [ ' data ' ] [ ' video ' ]
' only_matching ' : True ,
} ]
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
webpage = self . _download_webpage ( url , video_id )
data = self . _parse_json ( self . _search_regex (
data = self . _parse_json (
r ' root \ [ " __svtplay " \ ] \ s*= \ s*([^;]+); ' , webpage , ' embedded data ' ) , video_id )
self . _search_regex (
r ' root \ [ " __svtplay " \ ] \ s*= \ s*([^;]+); ' ,
webpage , ' embedded data ' , default = ' {} ' ) ,
video_id , fatal = False )
thumbnail = self . _og_search_thumbnail ( webpage )
thumbnail = self . _og_search_thumbnail ( webpage )
info_dict = self . _extract_video ( data , video_id )
if data :
video_info = try_get (
data , lambda x : x [ ' context ' ] [ ' dispatcher ' ] [ ' stores ' ] [ ' VideoTitlePageStore ' ] [ ' data ' ] [ ' video ' ] ,
dict )
if video_info :
info_dict = self . _extract_video ( video_info , video_id )
info_dict . update ( {
info_dict . update ( {
' title ' : data [ ' context ' ] [ ' dispatcher ' ] [ ' stores ' ] [ ' MetaStore ' ] [ ' title ' ] ,
' title ' : data [ ' context ' ] [ ' dispatcher ' ] [ ' stores ' ] [ ' MetaStore ' ] [ ' title ' ] ,
' thumbnail ' : thumbnail ,
' thumbnail ' : thumbnail ,
} )
} )
return info_dict
video_id = self . _search_regex (
r ' <video[^>]+data-video-id=[ " \' ]([ \ da-zA-Z-]+) ' ,
webpage , ' video id ' , default = None )
if video_id :
data = self . _download_json (
' http://www.svt.se/videoplayer-api/video/ %s ' % video_id , video_id )
info_dict = self . _extract_video ( data , video_id )
if not info_dict . get ( ' title ' ) :
info_dict [ ' title ' ] = re . sub (
r ' \ s* \ | \ s*.+?$ ' , ' ' ,
info_dict . get ( ' episode ' ) or self . _og_search_title ( webpage ) )
return info_dict
return info_dict