@ -6,6 +6,7 @@ import functools
import re
from . common import InfoExtractor
from . youtube import YoutubeIE
from . . utils import (
clean_html ,
determine_ext ,
@ -14,10 +15,8 @@ from ..utils import (
int_or_none ,
merge_dicts ,
mimetype2ext ,
orderedSet ,
parse_age_limit ,
parse_iso8601 ,
remove_end ,
strip_jsonp ,
txt_or_none ,
unified_strdate ,
@ -305,11 +304,90 @@ class ORFPodcastIE(ORFRadioBase):
} , self . _extract_podcast_upload ( data ) , rev = True )
class ORFIPTVIE ( InfoExtractor ) :
class ORFIPTVBase ( InfoExtractor ) :
_TITLE_STRIP_RE = ' '
def _extract_video ( self , video_id , webpage , fatal = False ) :
data = self . _download_json (
' http://bits.orf.at/filehandler/static-api/json/current/data.json?file= %s ' % video_id ,
video_id ) [ 0 ]
video = traverse_obj ( data , (
' sources ' , ( ' default ' , ' q8c ' ) ,
T ( lambda x : x if x [ ' loadBalancerUrl ' ] else None ) ,
any ) )
load_balancer_url = video [ ' loadBalancerUrl ' ]
try :
rendition = self . _download_json (
load_balancer_url , video_id , transform_source = strip_jsonp )
except ExtractorError :
rendition = None
if not rendition :
rendition = {
' redirect ' : {
' smil ' : re . sub (
r ' (/)jsonp(/.+ \ .)mp4$ ' , r ' \ 1dash \ 2smil/manifest.mpd ' ,
load_balancer_url ) ,
} ,
}
f = traverse_obj ( video , {
' abr ' : ( ' audioBitrate ' , T ( int_or_none ) ) ,
' vbr ' : ( ' bitrate ' , T ( int_or_none ) ) ,
' fps ' : ( ' videoFps ' , T ( int_or_none ) ) ,
' width ' : ( ' videoWidth ' , T ( int_or_none ) ) ,
' height ' : ( ' videoHeight ' , T ( int_or_none ) ) ,
} )
formats = [ ]
for format_id , format_url in traverse_obj ( rendition , (
' redirect ' , T ( dict . items ) , Ellipsis ) ) :
if format_id == ' rtmp ' :
ff = f . copy ( )
ff . update ( {
' url ' : format_url ,
' format_id ' : format_id ,
} )
formats . append ( ff )
elif determine_ext ( format_url ) == ' f4m ' :
formats . extend ( self . _extract_f4m_formats (
format_url , video_id , f4m_id = format_id ) )
elif determine_ext ( format_url ) == ' m3u8 ' :
formats . extend ( self . _extract_m3u8_formats (
format_url , video_id , ' mp4 ' , m3u8_id = format_id ,
entry_protocol = ' m3u8_native ' ) )
elif determine_ext ( format_url ) == ' mpd ' :
formats . extend ( self . _extract_mpd_formats (
format_url , video_id , mpd_id = format_id ) )
if formats or fatal :
self . _sort_formats ( formats )
else :
return
return merge_dicts ( {
' id ' : video_id ,
' title ' : re . sub ( self . _TITLE_STRIP_RE , ' ' , self . _og_search_title ( webpage ) ) ,
' description ' : self . _og_search_description ( webpage ) ,
' upload_date ' : unified_strdate ( self . _html_search_meta (
' dc.date ' , webpage , ' upload date ' , fatal = False ) ) ,
' formats ' : formats ,
} , traverse_obj ( data , {
' duration ' : ( ' duration ' , T ( k_float_or_none ) ) ,
' thumbnail ' : ( ' sources ' , ' default ' , ' preview ' , T ( url_or_none ) ) ,
} ) , rev = True )
class ORFIPTVIE ( ORFIPTVBase ) :
IE_NAME = ' orf:iptv '
IE_DESC = ' iptv.ORF.at '
_WORKING = False # URLs redirect to orf.at/
_VALID_URL = r ' https?://iptv \ .orf \ .at/(?:#/)?stories/(?P<id> \ d+) '
_TITLE_STRIP_RE = r ' \ s+- \ s+iptv \ .ORF \ .at \ S*$ '
_TEST = {
' url ' : ' http://iptv.orf.at/stories/2275236/ ' ,
@ -334,74 +412,32 @@ class ORFIPTVIE(InfoExtractor):
video_id = self . _search_regex (
r ' data-video(?:id)?= " ( \ d+) " ' , webpage , ' video id ' )
data = self . _download_json (
' http://bits.orf.at/filehandler/static-api/json/current/data.json?file= %s ' % video_id ,
video_id ) [ 0 ]
duration = float_or_none ( data [ ' duration ' ] , 1000 )
video = data [ ' sources ' ] [ ' default ' ]
load_balancer_url = video [ ' loadBalancerUrl ' ]
abr = int_or_none ( video . get ( ' audioBitrate ' ) )
vbr = int_or_none ( video . get ( ' bitrate ' ) )
fps = int_or_none ( video . get ( ' videoFps ' ) )
width = int_or_none ( video . get ( ' videoWidth ' ) )
height = int_or_none ( video . get ( ' videoHeight ' ) )
thumbnail = video . get ( ' preview ' )
rendition = self . _download_json (
load_balancer_url , video_id , transform_source = strip_jsonp )
f = {
' abr ' : abr ,
' vbr ' : vbr ,
' fps ' : fps ,
' width ' : width ,
' height ' : height ,
}
formats = [ ]
for format_id , format_url in rendition [ ' redirect ' ] . items ( ) :
if format_id == ' rtmp ' :
ff = f . copy ( )
ff . update ( {
' url ' : format_url ,
' format_id ' : format_id ,
} )
formats . append ( ff )
elif determine_ext ( format_url ) == ' f4m ' :
formats . extend ( self . _extract_f4m_formats (
format_url , video_id , f4m_id = format_id ) )
elif determine_ext ( format_url ) == ' m3u8 ' :
formats . extend ( self . _extract_m3u8_formats (
format_url , video_id , ' mp4 ' , m3u8_id = format_id ) )
else :
continue
self . _sort_formats ( formats )
return self . _extract_video ( video_id , webpage )
title = remove_end ( self . _og_search_title ( webpage ) , ' - iptv.ORF.at ' )
description = self . _og_search_description ( webpage )
upload_date = unified_strdate ( self . _html_search_meta (
' dc.date ' , webpage , ' upload date ' ) )
return {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' duration ' : duration ,
' thumbnail ' : thumbnail ,
' upload_date ' : upload_date ,
' formats ' : formats ,
}
class ORFFM4StoryIE ( InfoExtractor ) :
class ORFFM4StoryIE ( ORFIPTVBase ) :
IE_NAME = ' orf:fm4:story '
IE_DESC = ' fm4.orf.at stories '
_VALID_URL = r ' https?://fm4 \ .orf \ .at/stories/(?P<id> \ d+) '
_TITLE_STRIP_RE = r ' \ s+- \ s+fm4 \ .ORF \ .at \ s*$ '
_TEST = {
_TESTS = [ {
' url ' : ' https://fm4.orf.at/stories/3041554/ ' ,
' add_ie ' : [ ' Youtube ' ] ,
' info_dict ' : {
' id ' : ' 3041554 ' ,
' title ' : ' Is The EU Green Deal In Mortal Danger? ' ,
} ,
' playlist_count ' : 4 ,
' params ' : {
' format ' : ' bestvideo ' ,
} ,
} , {
' url ' : ' http://fm4.orf.at/stories/2865738/ ' ,
' info_dict ' : {
' id ' : ' 2865738 ' ,
' title ' : ' Manu Delago und Inner Tongue live ' ,
} ,
' playlist ' : [ {
' md5 ' : ' e1c2c706c45c7b34cf478bbf409907ca ' ,
' info_dict ' : {
@ -418,86 +454,49 @@ class ORFFM4StoryIE(InfoExtractor):
' info_dict ' : {
' id ' : ' 547798 ' ,
' ext ' : ' flv ' ,
' title ' : ' Manu Delago und Inner Tongue live (2)' ,
' title ' : ' Manu Delago und Inner Tongue https://vod-ww.mdn.ors.at/cms-worldwide_episodes_nas/_definst_/nas/cms-worldwide_episodes/online/14228823_0005.smil/chunklist_b992000_vo.m3u8 live (2)' ,
' duration ' : 1504.08 ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' upload_date ' : ' 20170913 ' ,
' description ' : ' Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video. ' ,
} ,
} ] ,
}
' skip ' : ' Videos gone ' ,
} ]
def _real_extract ( self , url ) :
story_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , story_id )
entries = [ ]
all_ids = orderedSet ( re . findall ( r ' data-video(?:id)?= " ( \ d+) " ' , webpage ) )
for idx , video_id in enumerate ( all_ids ) :
data = self . _download_json (
' http://bits.orf.at/filehandler/static-api/json/current/data.json?file= %s ' % video_id ,
video_id ) [ 0 ]
duration = float_or_none ( data [ ' duration ' ] , 1000 )
video = data [ ' sources ' ] [ ' q8c ' ]
load_balancer_url = video [ ' loadBalancerUrl ' ]
abr = int_or_none ( video . get ( ' audioBitrate ' ) )
vbr = int_or_none ( video . get ( ' bitrate ' ) )
fps = int_or_none ( video . get ( ' videoFps ' ) )
width = int_or_none ( video . get ( ' videoWidth ' ) )
height = int_or_none ( video . get ( ' videoHeight ' ) )
thumbnail = video . get ( ' preview ' )
seen_ids = set ( )
for idx , video_id in enumerate ( re . findall ( r ' data-video(?:id)?= " ( \ d+) " ' , webpage ) ) :
if video_id in seen_ids :
continue
seen_ids . add ( video_id )
entry = self . _extract_video ( video_id , webpage , fatal = False )
if not entry :
continue
rendition = self . _download_json (
load_balancer_url , video_id , transform_source = strip_jsonp )
if idx > = 1 :
# Titles are duplicates, make them unique
entry [ ' title ' ] = ' %s ( %d ) ' % ( entry [ ' title ' ] , idx )
f = {
' abr ' : abr ,
' vbr ' : vbr ,
' fps ' : fps ,
' width ' : width ,
' height ' : height ,
}
entries . append ( entry )
formats = [ ]
for format_id , format_url in rendition [ ' redirect ' ] . items ( ) :
if format_id == ' rtmp ' :
ff = f . copy ( )
ff . update ( {
' url ' : format_url ,
' format_id ' : format_id ,
} )
formats . append ( ff )
elif determine_ext ( format_url ) == ' f4m ' :
formats . extend ( self . _extract_f4m_formats (
format_url , video_id , f4m_id = format_id ) )
elif determine_ext ( format_url ) == ' m3u8 ' :
formats . extend ( self . _extract_m3u8_formats (
format_url , video_id , ' mp4 ' , m3u8_id = format_id ) )
else :
continue
self . _sort_formats ( formats )
seen_ids = set ( )
for yt_id in re . findall (
r ' data-id \ s*= \ s*[ " \' ]([ \ w-]+)[^>]+ \ bclass \ s*= \ s*[ " \' ]youtube \ b ' ,
webpage ) :
if yt_id in seen_ids :
continue
seen_ids . add ( yt_id )
if YoutubeIE . suitable ( yt_id ) :
entries . append ( self . url_result ( yt_id , ie = ' Youtube ' , video_id = yt_id ) )
title = remove_end ( self . _og_search_title ( webpage ) , ' - fm4.ORF.at ' )
if idx > = 1 :
# Titles are duplicates, make them unique
title + = ' ( ' + str ( idx + 1 ) + ' ) '
description = self . _og_search_description ( webpage )
upload_date = unified_strdate ( self . _html_search_meta (
' dc.date ' , webpage , ' upload date ' ) )
entries . append ( {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' duration ' : duration ,
' thumbnail ' : thumbnail ,
' upload_date ' : upload_date ,
' formats ' : formats ,
} )
return self . playlist_result ( entries )
return self . playlist_result (
entries , story_id ,
re . sub ( self . _TITLE_STRIP_RE , ' ' , self . _og_search_title ( webpage , default = ' ' ) or None ) )
class ORFONBase ( InfoExtractor ) :