@ -9,17 +9,19 @@ from .common import InfoExtractor
from . . utils import (
from . . utils import (
clean_html ,
clean_html ,
determine_ext ,
determine_ext ,
ExtractorError ,
float_or_none ,
float_or_none ,
int_or_none ,
int_or_none ,
merge_dicts ,
merge_dicts ,
mimetype2ext ,
orderedSet ,
orderedSet ,
parse_age_limit ,
parse_age_limit ,
parse_iso8601 ,
parse_iso8601 ,
remove_end ,
remove_end ,
str_or_none ,
strip_jsonp ,
strip_jsonp ,
txt_or_none ,
txt_or_none ,
unified_strdate ,
unified_strdate ,
update_url_query ,
url_or_none ,
url_or_none ,
)
)
from . . traversal import T , traverse_obj
from . . traversal import T , traverse_obj
@ -27,221 +29,280 @@ from ..traversal import T, traverse_obj
k_float_or_none = functools . partial ( float_or_none , scale = 1000 )
k_float_or_none = functools . partial ( float_or_none , scale = 1000 )
class ORFRadioIE ( InfoExtractor ) :
class ORFRadioBase ( InfoExtractor ) :
def _real_extract ( self , url ) :
STATION_INFO = {
mobj = re . match ( self . _VALID_URL , url )
' fm4 ' : ( ' fm4 ' , ' fm4 ' , ' orffm4 ' ) ,
show_date = mobj . group ( ' date ' )
' noe ' : ( ' noe ' , ' oe2n ' , ' orfnoe ' ) ,
show_id = mobj . group ( ' show ' )
' wien ' : ( ' wie ' , ' oe2w ' , ' orfwie ' ) ,
' burgenland ' : ( ' bgl ' , ' oe2b ' , ' orfbgl ' ) ,
' ooe ' : ( ' ooe ' , ' oe2o ' , ' orfooe ' ) ,
' steiermark ' : ( ' stm ' , ' oe2st ' , ' orfstm ' ) ,
' kaernten ' : ( ' ktn ' , ' oe2k ' , ' orfktn ' ) ,
' salzburg ' : ( ' sbg ' , ' oe2s ' , ' orfsbg ' ) ,
' tirol ' : ( ' tir ' , ' oe2t ' , ' orftir ' ) ,
' vorarlberg ' : ( ' vbg ' , ' oe2v ' , ' orfvbg ' ) ,
' oe3 ' : ( ' oe3 ' , ' oe3 ' , ' orfoe3 ' ) ,
' oe1 ' : ( ' oe1 ' , ' oe1 ' , ' orfoe1 ' ) ,
}
_ID_NAMES = ( ' id ' , ' guid ' , ' program ' )
data = self . _download_json (
@classmethod
' http://audioapi.orf.at/ %s /api/json/current/broadcast/ %s / %s '
def _get_item_id ( cls , data ) :
% ( self . _API_STATION , show_id , show_date ) , show_id )
return traverse_obj ( data , * cls . _ID_NAMES , expected_type = txt_or_none )
entries = [ ]
@classmethod
for info in data [ ' streams ' ] :
def _get_api_payload ( cls , data , expected_id , in_payload = False ) :
loop_stream_id = str_or_none ( info . get ( ' loopStreamId ' ) )
if expected_id not in traverse_obj ( data , ( ' payload ' , ) [ : 1 if in_payload else 0 ] + ( cls . _ID_NAMES , T ( txt_or_none ) ) ) :
if not loop_stream_id :
raise ExtractorError ( ' Unexpected API data result ' , video_id = expected_id )
continue
return data [ ' payload ' ]
title = str_or_none ( data . get ( ' title ' ) )
if not title :
@staticmethod
continue
def _extract_podcast_upload ( data ) :
start = int_or_none ( info . get ( ' start ' ) , scale = 1000 )
return traverse_obj ( data , {
end = int_or_none ( info . get ( ' end ' ) , scale = 1000 )
' url ' : ( ' enclosures ' , 0 , ' url ' ) ,
duration = end - start if end and start else None
' ext ' : ( ' enclosures ' , 0 , ' type ' , T ( mimetype2ext ) ) ,
entries . append ( {
' filesize ' : ( ' enclosures ' , 0 , ' length ' , T ( int_or_none ) ) ,
' id ' : loop_stream_id . replace ( ' .mp3 ' , ' ' ) ,
' title ' : ( ' title ' , T ( txt_or_none ) ) ,
' url ' : ' https://loopstream01.apa.at/?channel= %s &id= %s ' % ( self . _LOOP_STATION , loop_stream_id ) ,
' description ' : ( ' description ' , T ( clean_html ) ) ,
' title ' : title ,
' timestamp ' : ( ( ' published ' , ' postDate ' ) , T ( parse_iso8601 ) ) ,
' description ' : clean_html ( data . get ( ' subtitle ' ) ) ,
' duration ' : ( ' duration ' , T ( k_float_or_none ) ) ,
' duration ' : duration ,
' series ' : ( ' podcast ' , ' title ' ) ,
' timestamp ' : start ,
' uploader ' : ( ( ( ' podcast ' , ' author ' ) , ' station ' ) , T ( txt_or_none ) ) ,
' uploader_id ' : ( ' podcast ' , ' channel ' , T ( txt_or_none ) ) ,
} , get_all = False )
@classmethod
def _entries ( cls , data , station , item_type = None ) :
if item_type in ( ' upload ' , ' podcast-episode ' ) :
yield merge_dicts ( {
' id ' : cls . _get_item_id ( data ) ,
' ext ' : ' mp3 ' ,
' ext ' : ' mp3 ' ,
' series ' : data . get ( ' programTitle ' ) ,
' vcodec ' : ' none ' ,
} )
} , cls . _extract_podcast_upload ( data ) , rev = True )
return
loop_station = cls . STATION_INFO [ station ] [ 1 ]
for info in traverse_obj ( data , ( ( ( ' streams ' , Ellipsis ) , ' stream ' ) , T ( lambda v : v if v [ ' loopStreamId ' ] else None ) ) ) :
item_id = info [ ' loopStreamId ' ]
host = info . get ( ' host ' ) or ' loopstream01.apa.at '
yield merge_dicts ( {
' id ' : item_id . replace ( ' .mp3 ' , ' ' ) ,
' ext ' : ' mp3 ' ,
' url ' : update_url_query ( ' https:// {0} / ' . format ( host ) , {
' channel ' : loop_station ,
' id ' : item_id ,
} ) ,
' vcodec ' : ' none ' ,
# '_old_archive_ids': [make_archive_id(old_ie, video_id)],
} , traverse_obj ( data , {
' title ' : ( ' title ' , T ( txt_or_none ) ) ,
' description ' : ( ' subtitle ' , T ( clean_html ) ) ,
' uploader ' : ' station ' ,
' series ' : ( ' programTitle ' , T ( txt_or_none ) ) ,
} ) , traverse_obj ( info , {
' duration ' : ( ( ' duration ' ,
( None , T ( lambda x : x [ ' end ' ] - x [ ' start ' ] ) ) ) ,
T ( k_float_or_none ) , any ) ,
' timestamp ' : ( ( ' start ' , ' startISO ' ) , T ( parse_iso8601 ) , any ) ,
} ) )
return {
' _type ' : ' playlist ' ,
' id ' : show_id ,
' title ' : data . get ( ' title ' ) ,
' description ' : clean_html ( data . get ( ' subtitle ' ) ) ,
' entries ' : entries ,
}
class ORFRadioIE ( ORFRadioBase ) :
IE_NAME = ' orf:sound '
_STATION_RE = ' | ' . join ( map ( re . escape , ORFRadioBase . STATION_INFO . keys ( ) ) )
class ORFFM4IE ( ORFRadioIE ) :
_VALID_URL = (
IE_NAME = ' orf:fm4 '
r ' https?://sound \ .orf \ .at/radio/(?P<station> {0} )/sendung/(?P<id> \ d+)(?:/(?P<show> \ w+))? ' . format ( _STATION_RE ) ,
IE_DESC = ' radio FM4 '
r ' https?://(?P<station> {0} ) \ .orf \ .at/player/(?P<date> \ d {{ 8}})/(?P<id> \ d+) ' . format ( _STATION_RE ) ,
_VALID_URL = r ' https?://(?P<station>fm4) \ .orf \ .at/player/(?P<date>[0-9]+)/(?P<show>4 \ w+) '
)
_API_STATION = ' fm4 '
_LOOP_STATION = ' fm4 '
_TEST = {
_TESTS = [ {
' url ' : ' http://fm4.orf.at/player/20170107/4CC ' ,
' url ' : ' https://sound.orf.at/radio/ooe/sendung/37802/guten-morgen-oberoesterreich-am-feiertag ' ,
' md5 ' : ' 2b0be47375432a7ef104453432a19212 ' ,
' info_dict ' : {
' id ' : ' 37802 ' ,
' title ' : ' Guten Morgen Oberösterreich am Feiertag ' ,
' description ' : ' Oberösterreichs meistgehörte regionale Frühsendung. \n Regionale Nachrichten zu jeder halben Stunde. \n Moderation: Wolfgang Lehner \n Nachrichten: Stephan Schnabl ' ,
} ,
' playlist ' : [ {
' md5 ' : ' f9ff8517dd681b642a2c900e2c9e6085 ' ,
' info_dict ' : {
' id ' : ' 2024-05-30_0559_tl_66_7DaysThu1_443862 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Guten Morgen Oberösterreich am Feiertag ' ,
' description ' : ' Oberösterreichs meistgehörte regionale Frühsendung. \n Regionale Nachrichten zu jeder halben Stunde. \n Moderation: Wolfgang Lehner \n Nachrichten: Stephan Schnabl ' ,
' timestamp ' : 1717041587 ,
' upload_date ' : ' 20240530 ' ,
' uploader ' : ' ooe ' ,
' duration ' : 14413.0 ,
}
} ] ,
# 'skip': 'Shows from ORF Sound are only available for 30 days.'
} , {
' url ' : ' https://oe1.orf.at/player/20240531/758136 ' ,
' md5 ' : ' 2397717aaf3ae9c22a4f090ee3b8d374 ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 2017-01-07_2100_tl_54_7DaysSat18_31295 ' ,
' id ' : ' 20 24-05-31_1905_tl_51_7DaysFri35_2413387 ' ,
' ext ' : ' mp3 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Solid Steel Radioshow ' ,
' title ' : ' " Who Cares? " ' ,
' description ' : ' Die Mixshow von Coldcut und Ninja Tune. ' ,
' description ' : ' Europas größte Netzkonferenz re:publica 2024 ' ,
' duration ' : 3599 ,
' timestamp ' : 1717175100 ,
' timestamp ' : 1483819257 ,
' upload_date ' : ' 20240531 ' ,
' upload_date ' : ' 20170107 ' ,
' uploader ' : ' oe1 ' ,
' duration ' : 1500 ,
} ,
} ,
' skip ' : ' Shows from ORF radios are only available for 7 days. ' ,
# 'skip': 'Shows from ORF Sound are only available for 30 days.'
' only_matching ' : True ,
} ]
}
class ORFNOEIE ( ORFRadioIE ) :
IE_NAME = ' orf:noe '
IE_DESC = ' Radio Niederösterreich '
_VALID_URL = r ' https?://(?P<station>noe) \ .orf \ .at/player/(?P<date>[0-9]+)/(?P<show> \ w+) '
_API_STATION = ' noe '
_LOOP_STATION = ' oe2n '
_TEST = {
' url ' : ' https://noe.orf.at/player/20200423/NGM ' ,
' only_matching ' : True ,
}
class ORFWIEIE ( ORFRadioIE ) :
IE_NAME = ' orf:wien '
IE_DESC = ' Radio Wien '
_VALID_URL = r ' https?://(?P<station>wien) \ .orf \ .at/player/(?P<date>[0-9]+)/(?P<show> \ w+) '
_API_STATION = ' wie '
_LOOP_STATION = ' oe2w '
_TEST = {
' url ' : ' https://wien.orf.at/player/20200423/WGUM ' ,
' only_matching ' : True ,
}
class ORFBGLIE ( ORFRadioIE ) :
IE_NAME = ' orf:burgenland '
IE_DESC = ' Radio Burgenland '
_VALID_URL = r ' https?://(?P<station>burgenland) \ .orf \ .at/player/(?P<date>[0-9]+)/(?P<show> \ w+) '
_API_STATION = ' bgl '
_LOOP_STATION = ' oe2b '
_TEST = {
' url ' : ' https://burgenland.orf.at/player/20200423/BGM ' ,
' only_matching ' : True ,
}
class ORFOOEIE ( ORFRadioIE ) :
IE_NAME = ' orf:oberoesterreich '
IE_DESC = ' Radio Oberösterreich '
_VALID_URL = r ' https?://(?P<station>ooe) \ .orf \ .at/player/(?P<date>[0-9]+)/(?P<show> \ w+) '
_API_STATION = ' ooe '
_LOOP_STATION = ' oe2o '
_TEST = {
' url ' : ' https://ooe.orf.at/player/20200423/OGMO ' ,
' only_matching ' : True ,
}
class ORFSTMIE ( ORFRadioIE ) :
IE_NAME = ' orf:steiermark '
IE_DESC = ' Radio Steiermark '
_VALID_URL = r ' https?://(?P<station>steiermark) \ .orf \ .at/player/(?P<date>[0-9]+)/(?P<show> \ w+) '
_API_STATION = ' stm '
_LOOP_STATION = ' oe2st '
_TEST = {
def _real_extract ( self , url ) :
' url ' : ' https://steiermark.orf.at/player/20200423/STGMS ' ,
m = self . _match_valid_url ( url )
' only_matching ' : True ,
station , show_id = m . group ( ' station ' , ' id ' )
}
api_station , _ , _ = self . STATION_INFO [ station ]
if ' date ' in m . groupdict ( ) :
data = self . _download_json (
' https://audioapi.orf.at/ {0} /json/4.0/broadcast/ {1} / {2} ?_o= {3} .orf.at ' . format (
api_station , show_id , m . group ( ' date ' ) , station ) , show_id )
show_id = data [ ' id ' ]
else :
data = self . _download_json (
' https://audioapi.orf.at/ {0} /api/json/5.0/broadcast/ {1} ?_o=sound.orf.at ' . format (
api_station , show_id ) , show_id )
data = self . _get_api_payload ( data , show_id , in_payload = True )
class ORFKTNIE ( ORFRadioIE ) :
# site sends ISO8601 GMT date-times with separate TZ offset, ignored
IE_NAME = ' orf:kaernten '
# TODO: should `..._date` be calculated relative to TZ?
IE_DESC = ' Radio Kärnten '
_VALID_URL = r ' https?://(?P<station>kaernten) \ .orf \ .at/player/(?P<date>[0-9]+)/(?P<show> \ w+) '
_API_STATION = ' ktn '
_LOOP_STATION = ' oe2k '
_TEST = {
return merge_dicts (
' url ' : ' https://kaernten.orf.at/player/20200423/KGUMO ' ,
{ ' _type ' : ' multi_video ' } ,
' only_matching ' : True ,
self . playlist_result (
}
self . _entries ( data , station ) , show_id ,
txt_or_none ( data . get ( ' title ' ) ) ,
clean_html ( data . get ( ' subtitle ' ) ) ) )
class ORFSBGIE ( ORFRadioIE ) :
class ORFRadioCollectionIE ( ORFRadioBase ) :
IE_NAME = ' orf:salzburg '
IE_NAME = ' orf:collection '
IE_DESC = ' Radio Salzburg '
_VALID_URL = r ' https?://sound \ .orf \ .at/collection/(?P<coll_id> \ d+)(?:/(?P<item_id> \ d+))? '
_VALID_URL = r ' https?://(?P<station>salzburg) \ .orf \ .at/player/(?P<date>[0-9]+)/(?P<show> \ w+) '
_API_STATION = ' sbg '
_LOOP_STATION = ' oe2s '
_TEST = {
' url ' : ' https://salzburg.orf.at/player/20200423/SGUM ' ,
' only_matching ' : True ,
}
_TESTS = [ {
' url ' : ' https://sound.orf.at/collection/4/61908/was-das-uberschreiten-des-15-limits-bedeutet ' ,
' info_dict ' : {
' id ' : ' 2577582 ' ,
} ,
' playlist ' : [ {
' md5 ' : ' 5789cec7d75575ff58d19c0428c80eb3 ' ,
' info_dict ' : {
' id ' : ' 2024-06-06_1659_tl_54_7DaysThu6_153926 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Klimakrise: Was das Überschreiten des 1,5°-Limits bedeutet ' ,
' timestamp ' : 1717686674 ,
' upload_date ' : ' 20240606 ' ,
' uploader ' : ' fm4 ' ,
} ,
} ] ,
# 'skip': 'Shows from ORF Sound are only available for 30 days.'
} , {
' url ' : ' https://sound.orf.at/collection/4/ ' ,
' info_dict ' : {
' id ' : ' 4 ' ,
} ,
' playlist_mincount ' : 10 ,
' playlist_maxcount ' : 13 ,
} ]
class ORFTIRIE ( ORFRadioIE ) :
def _real_extract ( self , url ) :
IE_NAME = ' orf:tirol '
coll_id , item_id = self . _match_valid_url ( url ) . group ( ' coll_id ' , ' item_id ' )
IE_DESC = ' Radio Tirol '
data = self . _download_json (
_VALID_URL = r ' https?://(?P<station>tirol) \ .orf \ .at/player/(?P<date>[0-9]+)/(?P<show> \ w+) '
' https://collector.orf.at/api/frontend/collections/ {0} ?_o=sound.orf.at ' . format (
_API_STATION = ' tir '
coll_id ) , coll_id )
_LOOP_STATION = ' oe2t '
data = self . _get_api_payload ( data , coll_id , in_payload = True )
def yield_items ( ) :
for item in traverse_obj ( data , (
' content ' , ' items ' , lambda _ , v : any ( k in v [ ' target ' ] [ ' params ' ] for k in self . _ID_NAMES ) ) ) :
if item_id is None or item_id == txt_or_none ( item . get ( ' id ' ) ) :
target = item [ ' target ' ]
typed_item_id = self . _get_item_id ( target [ ' params ' ] )
station = target [ ' params ' ] . get ( ' station ' )
item_type = target . get ( ' type ' )
if typed_item_id and ( station or item_type ) :
yield station , typed_item_id , item_type
if item_id is not None :
break
else :
if item_id is not None :
raise ExtractorError ( ' Item not found in collection ' ,
video_id = coll_id , expected = True )
def item_playlist ( station , typed_item_id , item_type ) :
if item_type == ' upload ' :
item_data = self . _download_json ( ' https://audioapi.orf.at/radiothek/api/2.0/upload/ {0} ?_o=sound.orf.at ' . format (
typed_item_id ) , typed_item_id )
elif item_type == ' podcast-episode ' :
item_data = self . _download_json ( ' https://audioapi.orf.at/radiothek/api/2.0/episode/ {0} ?_o=sound.orf.at ' . format (
typed_item_id ) , typed_item_id )
else :
api_station , _ , _ = self . STATION_INFO [ station ]
item_data = self . _download_json (
' https://audioapi.orf.at/ {0} /api/json/5.0/ {1} / {2} ?_o=sound.orf.at ' . format (
api_station , item_type or ' broadcastitem ' , typed_item_id ) , typed_item_id )
_TEST = {
item_data = self . _get_api_payload ( item_data , typed_item_id , in_payload = True )
' url ' : ' https://tirol.orf.at/player/20200423/TGUMO ' ,
' only_matching ' : True ,
}
return merge_dicts (
{ ' _type ' : ' multi_video ' } ,
self . playlist_result (
self . _entries ( item_data , station , item_type ) , typed_item_id ,
txt_or_none ( data . get ( ' title ' ) ) ,
clean_html ( data . get ( ' subtitle ' ) ) ) )
class ORFVBGIE ( ORFRadioIE ) :
def yield_item_entries ( ) :
IE_NAME = ' orf:vorarlberg '
for station , typed_id , item_type in yield_items ( ) :
IE_DESC = ' Radio Vorarlberg '
yield item_playlist ( station , typed_id , item_type )
_VALID_URL = r ' https?://(?P<station>vorarlberg) \ .orf \ .at/player/(?P<date>[0-9]+)/(?P<show> \ w+) '
_API_STATION = ' vbg '
_LOOP_STATION = ' oe2v '
_TEST = {
if item_id is not None :
' url ' : ' https://vorarlberg.orf.at/player/20200423/VGUM ' ,
# coll_id = '/'.join((coll_id, item_id))
' only_matching ' : True ,
return next ( yield_item_entries ( ) )
}
return self . playlist_result ( yield_item_entries ( ) , coll_id , data . get ( ' title ' ) )
class ORFOE3IE ( ORFRadioIE ) :
IE_NAME = ' orf:oe3 '
IE_DESC = ' Radio Österreich 3 '
_VALID_URL = r ' https?://(?P<station>oe3) \ .orf \ .at/player/(?P<date>[0-9]+)/(?P<show> \ w+) '
_API_STATION = ' oe3 '
_LOOP_STATION = ' oe3 '
_TEST = {
class ORFPodcastIE ( ORFRadioBase ) :
' url ' : ' https://oe3.orf.at/player/20200424/3WEK ' ,
IE_NAME = ' orf:podcast '
' only_matching ' : True ,
_STATION_RE = ' | ' . join ( map ( re . escape , ( x [ 0 ] for x in ORFRadioBase . STATION_INFO . values ( ) ) ) ) + ' |tv '
}
_VALID_URL = r ' https?://sound \ .orf \ .at/podcast/(?P<station> {0} )/(?P<show>[ \ w-]+)/(?P<id>[ \ w-]+) ' . format ( _STATION_RE )
_TESTS = [ {
' url ' : ' https://sound.orf.at/podcast/stm/der-kraeutertipp-von-christine-lackner/rotklee ' ,
' md5 ' : ' 1f2bab2ba90c2ce0c2754196ea78b35f ' ,
' info_dict ' : {
' id ' : ' der-kraeutertipp-von-christine-lackner/rotklee ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Rotklee ' ,
' description ' : ' In der Natur weit verbreitet - in der Medizin längst anerkennt: Rotklee. Dieser Podcast begleitet die Sendung " Radio Steiermark am Vormittag " , Radio Steiermark, 28. Mai 2024. ' ,
' timestamp ' : 1716891761 ,
' upload_date ' : ' 20240528 ' ,
' uploader_id ' : ' stm_kraeutertipp ' ,
' uploader ' : ' ORF Radio Steiermark ' ,
' duration ' : 101 ,
' series ' : ' Der Kräutertipp von Christine Lackner ' ,
} ,
# 'skip': 'ORF podcasts are only available for a limited time'
} ]
_ID_NAMES = ( ' slug ' , ' guid ' )
class ORFOE1IE ( ORFRadioIE ) :
def _real_extract ( self , url ) :
IE_NAME = ' orf:oe1 '
station , show , show_id = self . _match_valid_url ( url ) . group ( ' station ' , ' show ' , ' id ' )
IE_DESC = ' Radio Österreich 1 '
data = self . _download_json (
_VALID_URL = r ' https?://(?P<station>oe1) \ .orf \ .at/player/(?P<date>[0-9]+)/(?P<show> \ w+) '
' https://audioapi.orf.at/radiothek/api/2.0/podcast/ {0} / {1} / {2} ' . format (
_API_STATION = ' oe1 '
station , show , show_id ) , show_id )
_LOOP_STATION = ' oe1 '
data = self . _get_api_payload ( data , show_id , in_payload = True )
_TEST = {
return merge_dicts ( {
' url ' : ' http://oe1.orf.at/player/20170108/456544 ' ,
' id ' : ' / ' . join ( ( show , show_id ) ) ,
' md5 ' : ' 34d8a6e67ea888293741c86a099b745b ' ,
' info_dict ' : {
' id ' : ' 2017-01-08_0759_tl_51_7DaysSun6_256141 ' ,
' ext ' : ' mp3 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Morgenjournal ' ,
' vcodec ' : ' none ' ,
' duration ' : 609 ,
} , self . _extract_podcast_upload ( data ) , rev = True )
' timestamp ' : 1483858796 ,
' upload_date ' : ' 20170108 ' ,
} ,
' skip ' : ' Shows from ORF radios are only available for 7 days. '
}
class ORFIPTVIE ( InfoExtractor ) :
class ORFIPTVIE ( InfoExtractor ) :