@ -5,15 +5,16 @@ from ..utils import (
float_or_none ,
int_or_none ,
url_or_none ,
urlencode_postdata ,
)
from . . utils . traversal import traverse_obj
PODCAST_API = ' https://api-prod.ilpost.it/podcast/v1/podcast/ %s ?hits=20 '
class IlPostIE ( InfoExtractor ) :
_VALID_URL = r ' https?://(?:www \ .)?ilpost \ .it/ episodes /(?P<id>[^/?#]+)'
_VALID_URL = r ' https?://(?:www \ .)?ilpost \ .it/ podcasts/(?:.*?) /(?P<id>[^/?#]+)'
_TESTS = [ {
' url ' : ' https://www.ilpost.it/ episodes /1-avis-akvasas-ka/' ,
' url ' : ' https://www.ilpost.it/ podcasts/l-invasione /1-avis-akvasas-ka/' ,
' md5 ' : ' 43649f002d85e1c2f319bb478d479c40 ' ,
' info_dict ' : {
' id ' : ' 2972047 ' ,
@ -23,10 +24,11 @@ class IlPostIE(InfoExtractor):
' url ' : ' https://www.ilpost.it/wp-content/uploads/2023/12/28/1703781217-l-invasione-pt1-v6.mp3 ' ,
' timestamp ' : 1703835014 ,
' upload_date ' : ' 20231229 ' ,
' description ' : ' <p>Circa tre miliardi di persone, oggi, parlano lingue che hanno un’ unica antenata: dall’ italiano all’ inglese, passando per il farsi e l’ islandese, queste lingue discendono tutte da una lingua arrivata in Europa circa cinquemila anni fa, insieme a un gruppo di persone ben preciso.<br /> \n Con la loro lingua queste persone si portarono dietro anche alcuni oggetti, miti e leggende, e una certa visione della società, lasciando tracce indelebili ancora oggi.</p> \n <p>Per approfondire gli argomenti trattati nel podcast abbiamo raccolto in <a href= " https://www.ilpost.it/2024/01/05/invasione-testi/?homepagePosition=3 " >questa pagina</a> le cose da leggere e da guardare dopo aver ascoltato le puntate.</p> \n ' ,
' duration ' : 2495.0 ,
' availability ' : ' public ' ,
' series_id ' : ' 235598 ' ,
' description' : ' ' ,
' thumbnail' : ' https://www.ilpost.it/wp-content/uploads/2023/12/22/1703238848-copertina500x500.jpg ' ,
} ,
} ]
@ -35,34 +37,75 @@ class IlPostIE(InfoExtractor):
webpage = self . _download_webpage ( url , display_id )
endpoint_metadata = self . _search_json (
r ' var \ s+ilpostpodcast \ s*= ' , webpage , ' metadata ' , display_id )
episode_id = endpoint_metadata [ ' post_id ' ]
podcast_id = endpoint_metadata [ ' podcast_id ' ]
podcast_metadata = self . _download_json (
endpoint_metadata [ ' ajax_url ' ] , display_id , data = urlencode_postdata ( {
' action ' : ' checkpodcast ' ,
' cookie ' : endpoint_metadata [ ' cookie ' ] ,
' post_id ' : episode_id ,
' podcast_id ' : podcast_id ,
} ) )
r ' { " props " : { " pageProps " : ' , webpage , ' metadata ' , display_id )
episode_id = endpoint_metadata [ ' data ' ] [ ' data ' ] [ ' episode ' ] [ ' data ' ] [ 0 ] [ ' id ' ]
podcast_id = traverse_obj ( endpoint_metadata , ( ' data ' , ' data ' , ' episode ' , ' data ' , 0 , ' parent ' , ' id ' ) )
podcast_metadata = traverse_obj ( endpoint_metadata , ( ' data ' , ' data ' , ' episode ' , ' data ' , 0 ) )
episode = traverse_obj ( podcast_metadata , (
' data ' , ' postcastList ' , lambda _ , v : str ( v [ ' id ' ] ) == episode_id , { dict } ) , get_all = False )
episode = podcast_metadata
if not episode :
raise ExtractorError ( ' Episode could not be extracted ' )
return {
' id ' : episode_id ,
' display_id ' : display_id ,
' series_id ' : podcast_id ,
' vcodec ' : ' none ' ,
' id ' : str ( episode_id ) ,
' display_id ' : str ( display_id ) ,
' series_id ' : str ( podcast_id ) ,
* * traverse_obj ( episode , {
' title ' : ( ' title ' , { str } ) ,
' description ' : ( ' description ' , { str } ) ,
' url ' : ( ' podcast_raw_url ' , { url_or_none } ) ,
' url ' : ( ' episode_raw_url ' , { url_or_none } ) ,
' thumbnail ' : ( ' image ' , { url_or_none } ) ,
' description ' : ( ' content_html ' , { str } ) ,
' timestamp ' : ( ' timestamp ' , { int_or_none } ) ,
' duration ' : ( ' milliseconds ' , { float_or_none ( scale = 1000 ) } ) ,
' availability ' : ( ' free ' , { lambda v : ' public ' if v else ' subscriber_only ' } ) ,
' availability ' : ( ' access_level ' , { lambda v : ' public ' if v else ' subscriber_only ' } ) ,
} ) ,
}
class IlPostPodcastIE ( InfoExtractor ) :
_VALID_URL = r ' https?://(?:www \ .)?ilpost \ .it/podcasts/(?P<id>[a-zA-Z0-9 \ -]+)/?$ '
_TESTS = [ {
' url ' : ' https://www.ilpost.it/podcasts/morning/ ' ,
' info_dict ' : {
' id ' : ' morning ' ,
' display_id ' : ' morning ' ,
' title ' : ' Morning ' ,
' series ' : ' Morning ' ,
' season_number ' : 1 ,
} ,
' playlist_mincount ' : 20 ,
} , {
' url ' : ' https://www.ilpost.it/podcasts/basaglia-e-i-suoi/ ' ,
' info_dict ' : {
' id ' : ' basaglia-e-i-suoi ' ,
' display_id ' : ' basaglia-e-i-suoi ' ,
' title ' : ' Basaglia e i suoi ' ,
' series ' : ' Basaglia e i suoi ' ,
' season_number ' : 1 ,
} ,
' playlist_mincount ' : 5 ,
} ]
def _real_extract ( self , url ) :
display_id = self . _match_valid_url ( url ) . group ( ' id ' )
data = self . _download_json ( PODCAST_API % display_id , display_id )
entries = [ {
' _type ' : ' url ' ,
' url ' : episode [ ' url ' ] ,
' title ' : episode . get ( ' title ' ) ,
' description ' : episode . get ( ' content_html ' ) ,
' series ' : traverse_obj ( data , ( ' data ' , 0 , ' parent ' , ' title ' ) ) ,
' season_number ' : 1 ,
' episode_number ' : episode [ ' id ' ] ,
} for episode in traverse_obj ( data , ( ' data ' ) ) ]
return {
' _type ' : ' playlist ' ,
' id ' : display_id ,
' display_id ' : display_id ,
' title ' : traverse_obj ( data , ( ' data ' , 0 , ' parent ' , ' title ' ) ) ,
' series ' : traverse_obj ( data , ( ' data ' , 0 , ' parent ' , ' title ' ) ) ,
' entries ' : entries ,
' season_number ' : 1 ,
}