@ -17,7 +17,6 @@ from ..utils import (
int_or_none ,
parse_duration ,
strip_or_none ,
unescapeHTML ,
unified_strdate ,
unified_timestamp ,
update_url_query ,
@ -122,27 +121,8 @@ class RaiBaseIE(InfoExtractor):
class RaiPlayIE ( RaiBaseIE ) :
_VALID_URL = r ' (?P< url >https?://(?:www\ .)?raiplay \ .it/.+?- (?P<id>%s ) \ . html)' % RaiBaseIE . _UUID_RE
_VALID_URL = r ' (?P< base >https?://(?:www\ .)?raiplay \ .it/.+?- ) (?P<id>%s ) \ . (?: html|json )' % RaiBaseIE . _UUID_RE
_TESTS = [ {
' url ' : ' http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter ' ,
' md5 ' : ' 340aa3b7afb54bfd14a8c11786450d76 ' ,
' info_dict ' : {
' id ' : ' e06118bb-59a9-4636-b914-498e4cfd2c66 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' La Casa Bianca ' ,
' alt_title ' : ' S2016 - Puntata del 23/10/2016 ' ,
' description ' : ' md5:a09d45890850458077d1f68bb036e0a5 ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' uploader ' : ' Rai 3 ' ,
' creator ' : ' Rai 3 ' ,
' duration ' : 3278 ,
' timestamp ' : 1477764300 ,
' upload_date ' : ' 20161029 ' ,
' series ' : ' La Casa Bianca ' ,
' season ' : ' 2016 ' ,
} ,
' skip ' : ' This content is not available ' ,
} , {
' url ' : ' http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html ' ,
' md5 ' : ' 8970abf8caf8aef4696e7b1f2adfc696 ' ,
' info_dict ' : {
@ -166,10 +146,11 @@ class RaiPlayIE(RaiBaseIE):
} ]
def _real_extract ( self , url ) :
url , video_id = re . match ( self . _VALID_URL , url ) . groups ( )
mobj = re . match ( self . _VALID_URL , url )
base , video_id , = mobj . group ( ' base ' , ' id ' )
media = self . _download_json (
url . replace ( ' .html ' , ' .json ' ) , video_id , ' Downloading video JSON ' )
' %s %s .json ' % ( base , video_id ) , video_id , ' Downloading video JSON ' )
title = media [ ' name ' ]
@ -219,7 +200,7 @@ class RaiPlayIE(RaiBaseIE):
class RaiPlayLiveIE ( RaiBaseIE ) :
_VALID_URL = r ' https?://(?:www\ .)?raiplay \ .it/dirette/(?P<id>[^/?#&]+ )'
_VALID_URL = r ' (?P<base> https?://(?:www\ .)?raiplay \ .it/dirette/(?P<id>[^/?#&]+ ) )'
_TEST = {
' url ' : ' http://www.raiplay.it/dirette/rainews24 ' ,
' info_dict ' : {
@ -227,7 +208,7 @@ class RaiPlayLiveIE(RaiBaseIE):
' display_id ' : ' rainews24 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' re:^Diretta di Rai News 24 [0-9] {4} -[0-9] {2} -[0-9] {2} [0-9] {2} :[0-9] {2} $ ' ,
' description ' : ' md5: 6eca31500550f9376819f174e5644754 ' ,
' description ' : ' md5: 4d00bcf6dc98b27c6ec480de329d1497 ' ,
' uploader ' : ' Rai News 24 ' ,
' creator ' : ' Rai News 24 ' ,
' is_live ' : True ,
@ -238,53 +219,75 @@ class RaiPlayLiveIE(RaiBaseIE):
}
def _real_extract ( self , url ) :
display_id = self . _match_id ( url )
mobj = re . match ( self . _VALID_URL , url )
base , display_id , = mobj . group ( ' base ' , ' id ' )
media = self . _download_json (
' %s .json ' % base ,
display_id , ' Downloading channel JSON ' )
webpage = self . _download_webpage ( url , display_id )
title = media [ ' name ' ]
video = media [ ' video ' ]
video_id = media [ ' id ' ] . replace ( ' ContentItem- ' , ' ' )
video_id = self . _search_regex (
r ' data-uniquename=[ " \' ]ContentItem-( %s ) ' % RaiBaseIE . _UUID_RE ,
webpage , ' content id ' )
relinker_info = self . _extract_relinker_info ( video [ ' content_url ' ] , video_id )
self . _sort_formats ( relinker_info [ ' formats ' ] )
return {
' _type ' : ' url_transparent ' ,
' ie_key ' : RaiPlayIE . ie_key ( ) ,
' url ' : ' http://www.raiplay.it/dirette/ContentItem- %s .html ' % video_id ,
info = {
' id ' : video_id ,
' display_id ' : display_id ,
' title ' : self . _live_title ( title ) if relinker_info . get (
' is_live ' ) else title ,
' description ' : media . get ( ' description ' ) ,
' uploader ' : strip_or_none ( media . get ( ' channel ' ) ) ,
' creator ' : strip_or_none ( media . get ( ' editor ' ) ) ,
}
info . update ( relinker_info )
return info
class RaiPlayPlaylistIE ( InfoExtractor ) :
_VALID_URL = r ' https?://(?:www \ .)?raiplay \ .it/programmi/(?P<id>[^/?#&]+) '
_VALID_URL = r ' (?P<base> https?://(?:www\ .)?raiplay \ .it/programmi/(?P<id>[^/?#&]+ ) )'
_TESTS = [ {
' url ' : ' http://www.raiplay.it/programmi/nondirloalmiocapo/ ' ,
' info_dict ' : {
' id ' : ' nondirloalmiocapo ' ,
' title ' : ' Non dirlo al mio capo ' ,
' description ' : ' md5:9 f3d603b2947c1c7abb098f3b14fac86 ' ,
' description ' : ' md5:9 8ab6b98f7f44c2843fd7d6f045f153b ' ,
} ,
' playlist_mincount ' : 12 ,
} ]
def _real_extract ( self , url ) :
playlist_id = self . _match_id ( url )
mobj = re . match ( self . _VALID_URL , url )
base , playlist_id , = mobj . group ( ' base ' , ' id ' )
webpage = self . _download_webpage ( url , playlist_id )
media = self . _download_json (
' %s .json ' % base ,
playlist_id , ' Downloading program JSON ' )
title = self . _html_search_meta (
( ' programma ' , ' nomeProgramma ' ) , webpage , ' title ' )
description = unescapeHTML ( self . _html_search_meta (
( ' description ' , ' og:description ' ) , webpage , ' description ' ) )
title = media . get ( ' name ' )
description = None
if media . get ( ' program_info ' ) and media [ ' program_info ' ] . get ( ' description ' ) :
description = media [ ' program_info ' ] [ ' description ' ]
entries = [ ]
for mobj in re . finditer (
r ' <a \ b[^>]+ \ bhref=([ " \' ])(?P<path>/raiplay/video/.+?) \ 1 ' ,
webpage ) :
video_url = urljoin ( url , mobj . group ( ' path ' ) )
entries . append ( self . url_result (
video_url , ie = RaiPlayIE . ie_key ( ) ,
video_id = RaiPlayIE . _match_id ( video_url ) ) )
for b in media . get ( ' blocks ' , [ ] ) :
for s in b . get ( ' sets ' , [ ] ) :
cs = s . get ( ' id ' )
if not cs :
continue
medias = self . _download_json (
' %s / %s .json ' % ( base , cs ) ,
cs , ' Downloading content set JSON ' , fatal = False )
if not medias :
continue
for m in medias [ ' items ' ] :
video_url = urljoin ( url , m [ ' path_id ' ] )
entries . append ( self . url_result (
video_url , ie = RaiPlayIE . ie_key ( ) ,
video_id = RaiPlayIE . _match_id ( video_url ) ) )
return self . playlist_result ( entries , playlist_id , title , description )
@ -329,19 +332,6 @@ class RaiIE(RaiBaseIE):
' duration ' : 2214 ,
' upload_date ' : ' 20161103 ' ,
}
} , {
# drawMediaRaiTV(...)
' url ' : ' http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html ' ,
' md5 ' : ' 2dd727e61114e1ee9c47f0da6914e178 ' ,
' info_dict ' : {
' id ' : ' 59d69d28-6bb6-409d-a4b5-ed44096560af ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Il pacco ' ,
' description ' : ' md5:4b1afae1364115ce5d78ed83cd2e5b3a ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' upload_date ' : ' 20141221 ' ,
} ,
' skip ' : ' This content is not available ' ,
} , {
# initEdizione('ContentItem-...'
' url ' : ' http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined ' ,
@ -353,18 +343,6 @@ class RaiIE(RaiBaseIE):
' upload_date ' : ' 20170401 ' ,
} ,
' skip ' : ' Changes daily ' ,
} , {
# HDS live stream with only relinker URL
' url ' : ' http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews ' ,
' info_dict ' : {
' id ' : ' 1912dbbf-3f96-44c3-b4cf-523681fbacbc ' ,
' ext ' : ' flv ' ,
' title ' : ' EuroNews ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
' skip ' : ' This content is available only in Italy ' ,
} , {
# HLS live stream with ContentItem in og:url
' url ' : ' http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html ' ,