@ -10,11 +10,62 @@ from ..utils import (
unified_strdate ,
OnDemandPagedList ,
xpath_text ,
determine_ext ,
qualities ,
float_or_none ,
)
def extract_from_xml_url ( ie , video_id , xml_url ) :
doc = ie . _download_xml (
class ZDFIE ( InfoExtractor ) :
_VALID_URL = r ' (?:zdf:|zdf:video:|https?://www \ .zdf \ .de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?: \ ?.*)? '
_TESTS = [ {
' url ' : ' http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt ' ,
' info_dict ' : {
' id ' : ' 2037704 ' ,
' ext ' : ' webm ' ,
' title ' : ' ZDFspezial - Ende des Machtpokers ' ,
' description ' : ' Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial " Ende des Machtpokers - Große Koalition für Deutschland " . ' ,
' duration ' : 1022 ,
' uploader ' : ' spezial ' ,
' uploader_id ' : ' 225948 ' ,
' upload_date ' : ' 20131127 ' ,
} ,
' skip ' : ' Videos on ZDF.de are depublicised in short order ' ,
} ]
def _parse_smil_formats ( self , smil , smil_url , video_id , namespace = None , f4m_params = None , transform_rtmp_url = None ) :
param_groups = { }
for param_group in smil . findall ( self . _xpath_ns ( ' ./head/paramGroup ' , namespace ) ) :
group_id = param_group . attrib . get ( self . _xpath_ns ( ' id ' , ' http://www.w3.org/XML/1998/namespace ' ) )
params = { }
for param in param_group :
params [ param . get ( ' name ' ) ] = param . get ( ' value ' )
param_groups [ group_id ] = params
formats = [ ]
for video in smil . findall ( self . _xpath_ns ( ' .//video ' , namespace ) ) :
src = video . get ( ' src ' )
if not src :
continue
bitrate = float_or_none ( video . get ( ' system-bitrate ' ) or video . get ( ' systemBitrate ' ) , 1000 )
group_id = video . get ( ' paramGroup ' )
param_group = param_groups [ group_id ]
for proto in param_group [ ' protocols ' ] . split ( ' , ' ) :
formats . append ( {
' url ' : ' %s :// %s ' % ( proto , param_group [ ' host ' ] ) ,
' app ' : param_group [ ' app ' ] ,
' play_path ' : src ,
' ext ' : ' flv ' ,
' format_id ' : ' %s - %d ' % ( proto , bitrate ) ,
' tbr ' : bitrate ,
' protocol ' : proto ,
} )
self . _sort_formats ( formats )
return formats
def extract_from_xml_url ( self , video_id , xml_url ) :
doc = self . _download_xml (
xml_url , video_id ,
note = ' Downloading video info ' ,
errnote = ' Failed to download video info ' )
@ -26,20 +77,71 @@ def extract_from_xml_url(ie, video_id, xml_url):
uploader_id = xpath_text ( doc , ' .//details/originChannelId ' , ' uploader id ' )
upload_date = unified_strdate ( xpath_text ( doc , ' .//details/airtime ' , ' upload date ' ) )
def xml_to_format ( fnode ) :
def xml_to_thumbnails ( fnode ) :
thumbnails = [ ]
for node in fnode :
thumbnail_url = node . text
if not thumbnail_url :
continue
thumbnail = {
' url ' : thumbnail_url ,
}
if ' key ' in node . attrib :
m = re . match ( ' ^([0-9]+)x([0-9]+)$ ' , node . attrib [ ' key ' ] )
if m :
thumbnail [ ' width ' ] = int ( m . group ( 1 ) )
thumbnail [ ' height ' ] = int ( m . group ( 2 ) )
thumbnails . append ( thumbnail )
return thumbnails
thumbnails = xml_to_thumbnails ( doc . findall ( ' .//teaserimages/teaserimage ' ) )
format_nodes = doc . findall ( ' .//formitaeten/formitaet ' )
quality = qualities ( [ ' veryhigh ' , ' high ' , ' med ' , ' low ' ] )
def get_quality ( elem ) :
return quality ( xpath_text ( elem , ' quality ' ) )
format_nodes . sort ( key = get_quality )
format_ids = [ ]
formats = [ ]
for fnode in format_nodes :
video_url = fnode . find ( ' url ' ) . text
is_available = ' http://www.metafilegenerator ' not in video_url
if not is_available :
continue
format_id = fnode . attrib [ ' basetype ' ]
quality = xpath_text ( fnode , ' ./quality ' , ' quality ' )
format_m = re . match ( r ''' (?x)
( ? P < vcodec > [ ^ _ ] + ) _ ( ? P < acodec > [ ^ _ ] + ) _ ( ? P < container > [ ^ _ ] + ) _
( ? P < proto > [ ^ _ ] + ) _ ( ? P < index > [ ^ _ ] + ) _ ( ? P < indexproto > [ ^ _ ] + )
''' , format_id)
ext = format_m . group ( ' container ' )
ext = determine_ext ( video_url , None ) or format_m . group ( ' container ' )
if ext not in ( ' smil ' , ' f4m ' , ' m3u8 ' ) :
format_id = format_id + ' - ' + quality
if format_id in format_ids :
continue
if ext == ' meta ' :
continue
elif ext == ' smil ' :
smil_formats = self . _extract_smil_formats (
video_url , video_id , fatal = False )
if smil_formats :
formats . extend ( smil_formats )
elif ext == ' m3u8 ' :
m3u8_formats = self . _extract_m3u8_formats (
video_url , video_id , ' mp4 ' , m3u8_id = ' hls ' , fatal = False )
if m3u8_formats :
formats . extend ( m3u8_formats )
elif ext == ' f4m ' :
f4m_formats = self . _extract_f4m_formats (
video_url , video_id , f4m_id = ' hds ' , fatal = False )
if f4m_formats :
formats . extend ( f4m_formats )
else :
proto = format_m . group ( ' proto ' ) . lower ( )
quality = xpath_text ( fnode , ' ./quality ' , ' quality ' )
abr = int_or_none ( xpath_text ( fnode , ' ./audioBitrate ' , ' abr ' ) , 1000 )
vbr = int_or_none ( xpath_text ( fnode , ' ./videoBitrate ' , ' vbr ' ) , 1000 )
@ -52,8 +154,8 @@ def extract_from_xml_url(ie, video_id, xml_url):
if not format_note :
format_note = None
return {
' format_id ' : format_id + ' - ' + quality ,
formats . append ( {
' format_id ' : format_id ,
' url ' : video_url ,
' ext ' : ext ,
' acodec ' : format_m . group ( ' acodec ' ) ,
@ -66,32 +168,10 @@ def extract_from_xml_url(ie, video_id, xml_url):
' format_note ' : format_note ,
' protocol ' : proto ,
' _available ' : is_available ,
}
def xml_to_thumbnails ( fnode ) :
thumbnails = [ ]
for node in fnode :
thumbnail_url = node . text
if not thumbnail_url :
continue
thumbnail = {
' url ' : thumbnail_url ,
}
if ' key ' in node . attrib :
m = re . match ( ' ^([0-9]+)x([0-9]+)$ ' , node . attrib [ ' key ' ] )
if m :
thumbnail [ ' width ' ] = int ( m . group ( 1 ) )
thumbnail [ ' height ' ] = int ( m . group ( 2 ) )
thumbnails . append ( thumbnail )
return thumbnails
thumbnails = xml_to_thumbnails ( doc . findall ( ' .//teaserimages/teaserimage ' ) )
} )
format_ids . append ( format_id )
format_nodes = doc . findall ( ' .//formitaeten/formitaet ' )
formats = list ( filter (
lambda f : f [ ' _available ' ] ,
map ( xml_to_format , format_nodes ) ) )
ie . _sort_formats ( formats )
self . _sort_formats ( formats )
return {
' id ' : video_id ,
@ -105,29 +185,10 @@ def extract_from_xml_url(ie, video_id, xml_url):
' formats ' : formats ,
}
class ZDFIE ( InfoExtractor ) :
_VALID_URL = r ' (?:zdf:|zdf:video:|https?://www \ .zdf \ .de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?: \ ?.*)? '
_TEST = {
' url ' : ' http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt ' ,
' info_dict ' : {
' id ' : ' 2037704 ' ,
' ext ' : ' webm ' ,
' title ' : ' ZDFspezial - Ende des Machtpokers ' ,
' description ' : ' Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial " Ende des Machtpokers - Große Koalition für Deutschland " . ' ,
' duration ' : 1022 ,
' uploader ' : ' spezial ' ,
' uploader_id ' : ' 225948 ' ,
' upload_date ' : ' 20131127 ' ,
} ,
' skip ' : ' Videos on ZDF.de are depublicised in short order ' ,
}
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
xml_url = ' http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id= %s ' % video_id
return extract_from_xml_url ( self , video_id , xml_url )
return self . extract_from_xml_url ( video_id , xml_url )
class ZDFChannelIE ( InfoExtractor ) :