@ -8,13 +8,14 @@ from ..compat import compat_str
from . . utils import (
determine_ext ,
ExtractorError ,
extract_attributes ,
float_or_none ,
int_or_none ,
merge_dicts ,
NO_DEFAULT ,
orderedSet ,
parse_codecs ,
qualities ,
str_or_none ,
try_get ,
unified_timestamp ,
update_url_query ,
@ -57,28 +58,39 @@ class ZDFBaseIE(InfoExtractor):
format_urls . add ( format_url )
mime_type = meta . get ( ' mimeType ' )
ext = determine_ext ( format_url )
join_nonempty = lambda s , l : s . join ( filter ( None , l ) )
meta_map = lambda t : map ( lambda x : str_or_none ( meta . get ( x ) ) , t )
if mime_type == ' application/x-mpegURL ' or ext == ' m3u8 ' :
formats . extend ( self . _extract_m3u8_formats (
new_formats = self . _extract_m3u8_formats (
format_url , video_id , ' mp4 ' , m3u8_id = ' hls ' ,
entry_protocol = ' m3u8_native ' , fatal = False ) )
entry_protocol = ' m3u8_native ' , fatal = False )
elif mime_type == ' application/f4m+xml ' or ext == ' f4m ' :
formats. extend ( self . _extract_f4m_formats (
update_url_query ( format_url , { ' hdcore ' : ' 3.7.0 ' } ) , video_id , f4m_id = ' hds ' , fatal = False ) )
new_formats = self . _extract_f4m_formats (
update_url_query ( format_url , { ' hdcore ' : ' 3.7.0 ' } ) , video_id , f4m_id = ' hds ' , fatal = False )
else :
f = parse_codecs ( meta . get ( ' mimeCodec ' ) )
if not f :
data = meta . get ( ' type ' , ' ' ) . split ( ' _ ' )
if try_get ( data , lambda x : x [ 2 ] ) == ext :
f = dict ( zip ( ( ' vcodec ' , ' acodec ' ) , data [ 1 ] ) )
format_id = [ ' http ' ]
for p in ( meta . get ( ' type ' ) , meta . get ( ' quality ' ) ) :
if p and isinstance ( p , compat_str ) :
format_id . append ( p )
format_id . extend ( join_nonempty ( ' - ' , meta_map ( ( ' type ' , ' quality ' ) ) ) )
f . update ( {
' url ' : format_url ,
' format_id ' : ' - ' . join ( format_id ) ,
' format_note ' : meta . get ( ' quality ' ) ,
' language ' : meta . get ( ' language ' ) ,
' quality ' : qualities ( self . _QUALITIES ) ( meta . get ( ' quality ' ) ) ,
' preference ' : - 10 ,
' tbr ' : int_or_none ( self . _search_regex ( r ' _( \ d+)k_ ' , format_url , ' tbr ' , default = None ) )
} )
formats . append ( f )
new_formats = [ f ]
formats . extend ( merge_dicts ( f , {
' format_note ' : join_nonempty ( ' , ' , meta_map ( ( ' quality ' , ' class ' ) ) ) ,
' language ' : meta . get ( ' language ' ) ,
' language_preference ' : 10 if meta . get ( ' class ' ) == ' main ' else - 10 if meta . get ( ' class ' ) == ' ad ' else - 1 ,
' quality ' : qualities ( self . _QUALITIES ) ( meta . get ( ' quality ' ) ) ,
} ) for f in new_formats )
def _extract_ptmd ( self , ptmd_url , video_id , api_token , referrer ) :
ptmd = self . _call_api (
@ -107,6 +119,7 @@ class ZDFBaseIE(InfoExtractor):
' type ' : f . get ( ' type ' ) ,
' mimeType ' : f . get ( ' mimeType ' ) ,
' quality ' : quality . get ( ' quality ' ) ,
' class ' : track . get ( ' class ' ) ,
' language ' : track . get ( ' language ' ) ,
} )
self . _sort_formats ( formats )
@ -171,6 +184,20 @@ class ZDFIE(ZDFBaseIE):
' duration ' : 2615 ,
' timestamp ' : 1465021200 ,
' upload_date ' : ' 20160604 ' ,
' thumbnail ' : ' https://www.zdf.de/assets/mauve-im-labor-100~768x432?cb=1464909117806 ' ,
} ,
} , {
' url ' : ' https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html ' ,
' md5 ' : ' 1b93bdec7d02fc0b703c5e7687461628 ' ,
' info_dict ' : {
' ext ' : ' mp4 ' ,
' id ' : ' video_funk_1770473 ' ,
' duration ' : 1278 ,
' description ' : ' Die Neue an der Schule verdreht Ismail den Kopf. ' ,
' title ' : ' Alles ist verzaubert ' ,
' timestamp ' : 1635520560 ,
' upload_date ' : ' 20211029 ' ,
' thumbnail ' : ' https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-100~1920x1080?cb=1636466431799 ' ,
} ,
} , {
# Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche
@ -204,6 +231,19 @@ class ZDFIE(ZDFBaseIE):
' timestamp ' : 1641355200 ,
' upload_date ' : ' 20220105 ' ,
} ,
' skip ' : ' No longer available " Diese Seite wurde leider nicht gefunden " '
} , {
' url ' : ' https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html ' ,
' info_dict ' : {
' id ' : ' 191205_1800_sendung_sok8 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Das Geld anderer Leute ' ,
' description ' : ' md5:cb6f660850dc5eb7d1ab776ea094959d ' ,
' duration ' : 2581.0 ,
' timestamp ' : 1654790700 ,
' upload_date ' : ' 20220609 ' ,
' thumbnail ' : ' https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=2400x1350 ' ,
} ,
} ]
def _extract_entry ( self , url , player , content , video_id ) :
@ -265,15 +305,16 @@ class ZDFIE(ZDFBaseIE):
' https://zdf-cdn.live.cellular.de/mediathekV2/document/ %s ' % video_id ,
video_id )
document = video [ ' document ' ]
title = document [ ' titel ' ]
content_id = document [ ' basename ' ]
formats = [ ]
format_urls = set ( )
for f in document [ ' formitaeten ' ] :
self . _extract_format ( content_id , formats , format_urls , f )
formitaeten = try_get ( video , lambda x : x [ ' document ' ] [ ' formitaeten ' ] , list )
document = formitaeten and video [ ' document ' ]
if formitaeten :
title = document [ ' titel ' ]
content_id = document [ ' basename ' ]
format_urls = set ( )
for f in formitaeten or [ ] :
self . _extract_format ( content_id , formats , format_urls , f )
self . _sort_formats ( formats )
thumbnails = [ ]
@ -320,9 +361,9 @@ class ZDFChannelIE(ZDFBaseIE):
' url ' : ' https://www.zdf.de/sport/das-aktuelle-sportstudio ' ,
' info_dict ' : {
' id ' : ' das-aktuelle-sportstudio ' ,
' title ' : ' das aktuelle sportstudio | ZDF ' ,
' title ' : ' das aktuelle sportstudio ' ,
} ,
' playlist_mincount ' : 23 ,
' playlist_mincount ' : 18 ,
} , {
' url ' : ' https://www.zdf.de/dokumentation/planet-e ' ,
' info_dict ' : {
@ -330,6 +371,14 @@ class ZDFChannelIE(ZDFBaseIE):
' title ' : ' planet e. ' ,
} ,
' playlist_mincount ' : 50 ,
} , {
' url ' : ' https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest ' ,
' info_dict ' : {
' id ' : ' aktenzeichen-xy-ungeloest ' ,
' title ' : ' Aktenzeichen XY... ungelöst ' ,
' entries ' : " lambda x: not any( ' xy580-fall1-kindermoerder-gesucht-100 ' in e[ ' url ' ] for e in x) " ,
} ,
' playlist_mincount ' : 2 ,
} , {
' url ' : ' https://www.zdf.de/filme/taunuskrimi/ ' ,
' only_matching ' : True ,
@ -339,60 +388,36 @@ class ZDFChannelIE(ZDFBaseIE):
def suitable ( cls , url ) :
return False if ZDFIE . suitable ( url ) else super ( ZDFChannelIE , cls ) . suitable ( url )
def _og_search_title ( self , webpage , fatal = False ) :
title = super ( ZDFChannelIE , self ) . _og_search_title ( webpage , fatal = fatal )
return re . split ( r ' \ s+[-|] \ s+ZDF(?:mediathek)?$ ' , title or ' ' ) [ 0 ] or None
def _real_extract ( self , url ) :
channel_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , channel_id )
entries = [
self . url_result ( item_url , ie = ZDFIE . ie_key ( ) )
for item_url in orderedSet ( re . findall (
r ' data-plusbar-url=[ " \' ](http.+? \ .html) ' , webpage ) ) ]
return self . playlist_result (
entries , channel_id , self . _og_search_title ( webpage , fatal = False ) )
r """
player = self . _extract_player ( webpage , channel_id )
channel_id = self . _search_regex (
r ' docId \ s*: \ s*([ " \' ])(?P<id>(?! \ 1).+?) \ 1 ' , webpage ,
' channel id ' , group = ' id ' )
channel = self . _call_api (
' https://api.zdf.de/content/documents/ %s .json ' % channel_id ,
player , url , channel_id )
items = [ ]
for module in channel [ ' module ' ] :
for teaser in try_get ( module , lambda x : x [ ' teaser ' ] , list ) or [ ] :
t = try_get (
teaser , lambda x : x [ ' http://zdf.de/rels/target ' ] , dict )
if not t :
continue
items . extend ( try_get (
t ,
lambda x : x [ ' resultsWithVideo ' ] [ ' http://zdf.de/rels/search/results ' ] ,
list ) or [ ] )
items . extend ( try_get (
module ,
lambda x : x [ ' filterRef ' ] [ ' resultsWithVideo ' ] [ ' http://zdf.de/rels/search/results ' ] ,
list ) or [ ] )
entries = [ ]
entry_urls = set ( )
for item in items :
t = try_get ( item , lambda x : x [ ' http://zdf.de/rels/target ' ] , dict )
if not t :
continue
sharing_url = t . get ( ' http://zdf.de/rels/sharing-url ' )
if not sharing_url or not isinstance ( sharing_url , compat_str ) :
continue
if sharing_url in entry_urls :
continue
entry_urls . add ( sharing_url )
entries . append ( self . url_result (
sharing_url , ie = ZDFIE . ie_key ( ) , video_id = t . get ( ' id ' ) ) )
return self . playlist_result ( entries , channel_id , channel . get ( ' title ' ) )
"""
matches = re . finditer (
r ''' <div \ b[^>]*? \ sdata-plusbar-id \ s*= \ s*([ " ' ])(?P<p_id>[ \ w-]+) \ 1[^>]*? \ sdata-plusbar-url= \ 1(?P<url> %s ) \ 1 ''' % ZDFIE . _VALID_URL ,
webpage )
if self . _downloader . params . get ( ' noplaylist ' , False ) :
entry = next (
( self . url_result ( m . group ( ' url ' ) , ie = ZDFIE . ie_key ( ) ) for m in matches ) ,
None )
self . to_screen ( ' Downloading just the main video because of --no-playlist ' )
if entry :
return entry
else :
self . to_screen ( ' Downloading playlist %s - add --no-playlist to download just the main video ' % ( channel_id , ) )
def check_video ( m ) :
v_ref = self . _search_regex (
r ''' (<a \ b[^>]*? \ shref \ s*=[^>]+? \ sdata-target-id \ s*= \ s*([ " ' ]) %s \ 2[^>]*>) ''' % ( m . group ( ' p_id ' ) , ) ,
webpage , ' check id ' , default = ' ' )
v_ref = extract_attributes ( v_ref )
return v_ref . get ( ' data-target-video-type ' ) != ' novideo '
return self . playlist_from_matches (
( m . group ( ' url ' ) for m in matches if check_video ( m ) ) ,
channel_id , self . _og_search_title ( webpage , fatal = False ) )