@ -1,272 +1,256 @@
# -*- coding: utf-8 -*-
from . common import InfoExtractor
from . common import InfoExtractor
from . . utils import (
from . . utils import (
ExtractorError ,
int_or_none ,
int_or_none ,
orderedSet ,
orderedSet ,
parse_duration ,
parse_duration ,
parse_iso8601 ,
parse_iso8601 ,
parse_qs ,
parse_qs ,
qualities ,
qualities ,
str_or_none ,
traverse_obj ,
traverse_obj ,
unified_strdate ,
unified_strdate ,
url_or_none ,
xpath_text ,
xpath_text ,
js_to_json ,
urljoin ,
filter_dict ,
HEADRequest , # Import HEADRequest
)
)
import re
import json
import urllib . error # Import urllib.error for HEAD check exception
# --- EuropaIE (Unchanged) ---
class EuropaIE ( InfoExtractor ) :
class EuropaIE ( InfoExtractor ) :
_WORKING = False
_WORKING = False
_VALID_URL = r ' https?://ec \ .europa \ .eu/avservices/(?:video/player|audio/audioDetails) \ .cfm \ ?.*? \ bref=(?P<id>[A-Za-z0-9-]+) '
_VALID_URL = r ' https?://ec \ .europa \ .eu/avservices/(?:video/player|audio/audioDetails) \ .cfm \ ?.*? \ bref=(?P<id>[A-Za-z0-9-]+) '
_TESTS = [
_TESTS = [ {
# Existing tests...
' url ' : ' http://ec.europa.eu/avservices/video/player.cfm?ref=I107758 ' ,
]
' md5 ' : ' 574f080699ddd1e19a675b0ddf010371 ' ,
' info_dict ' : {
' id ' : ' I107758 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' TRADE - Wikileaks on TTIP ' ,
' description ' : ' NEW LIVE EC Midday press briefing of 11/08/2015 ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' upload_date ' : ' 20150811 ' ,
' duration ' : 34 ,
' view_count ' : int ,
' formats ' : ' mincount:3 ' ,
} ,
} , {
' url ' : ' http://ec.europa.eu/avservices/video/player.cfm?sitelang=en&ref=I107786 ' ,
' only_matching ' : True ,
} , {
' url ' : ' http://ec.europa.eu/avservices/audio/audioDetails.cfm?ref=I-109295&sitelang=en ' ,
' only_matching ' : True ,
} ]
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
video_id = self . _match_id ( url )
playlist = self . _download_xml (
playlist = self . _download_xml (
f ' http://ec.europa.eu/avservices/video/player/playlist.cfm?ID= { video_id } ' , video_id )
f ' http://ec.europa.eu/avservices/video/player/playlist.cfm?ID= { video_id } ' , video_id )
def get_item ( type_ , preference ) :
def get_item ( type_ , preference ) :
items = { }
items = { }
for item in playlist . findall ( f ' ./info/ { type_ } /item ' ) :
for item in playlist . findall ( f ' ./info/ { type_ } /item ' ) :
lang , label = ( xpath_text ( item , ' lg ' , default = None ) , xpath_text ( item , ' label ' , default = None ) )
lang , label = xpath_text ( item , ' lg ' , default = None ) , xpath_text ( item , ' label ' , default = None )
if lang and label : items [ lang ] = label . strip ( )
if lang and label :
items [ lang ] = label . strip ( )
for p in preference :
for p in preference :
if items . get ( p ) : return items [ p ]
if items . get ( p ) :
return items [ p ]
query = parse_qs ( url )
query = parse_qs ( url )
preferred_lang = query . get ( ' sitelang ' , ( ' en ' , ) ) [ 0 ]
preferred_lang = query . get ( ' sitelang ' , ( ' en ' , ) ) [ 0 ]
preferred_langs = orderedSet ( ( preferred_lang , ' en ' , ' int ' ) )
preferred_langs = orderedSet ( ( preferred_lang , ' en ' , ' int ' ) )
title = get_item ( ' title ' , preferred_langs ) or video_id
title = get_item ( ' title ' , preferred_langs ) or video_id
description = get_item ( ' description ' , preferred_langs )
description = get_item ( ' description ' , preferred_langs )
thumbnail = xpath_text ( playlist , ' ./info/thumburl ' , ' thumbnail ' )
thumbnail = xpath_text ( playlist , ' ./info/thumburl ' , ' thumbnail ' )
upload_date = unified_strdate ( xpath_text ( playlist , ' ./info/date ' , ' upload date ' ) )
upload_date = unified_strdate ( xpath_text ( playlist , ' ./info/date ' , ' upload date ' ) )
duration = parse_duration ( xpath_text ( playlist , ' ./info/duration ' , ' duration ' ) )
duration = parse_duration ( xpath_text ( playlist , ' ./info/duration ' , ' duration ' ) )
view_count = int_or_none ( xpath_text ( playlist , ' ./info/views ' , ' views ' ) )
view_count = int_or_none ( xpath_text ( playlist , ' ./info/views ' , ' views ' ) )
language_preference = qualities ( preferred_langs [ : : - 1 ] )
language_preference = qualities ( preferred_langs [ : : - 1 ] )
formats = [ ]
formats = [ ]
for file_ in playlist . findall ( ' ./files/file ' ) :
for file_ in playlist . findall ( ' ./files/file ' ) :
video_url = xpath_text ( file_ , ' ./url ' )
video_url = xpath_text ( file_ , ' ./url ' )
if not video_url : continue
if not video_url :
continue
lang = xpath_text ( file_ , ' ./lg ' )
lang = xpath_text ( file_ , ' ./lg ' )
formats . append ( { ' url ' : video_url , ' format_id ' : lang , ' format_note ' : xpath_text ( file_ , ' ./lglabel ' ) , ' language_preference ' : language_preference ( lang ) } )
formats . append ( {
return { ' id ' : video_id , ' title ' : title , ' description ' : description , ' thumbnail ' : thumbnail , ' upload_date ' : upload_date , ' duration ' : duration , ' view_count ' : view_count , ' formats ' : formats }
' url ' : video_url ,
' format_id ' : lang ,
' format_note ' : xpath_text ( file_ , ' ./lglabel ' ) ,
' language_preference ' : language_preference ( lang ) ,
} )
return {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' thumbnail ' : thumbnail ,
' upload_date ' : upload_date ,
' duration ' : duration ,
' view_count ' : view_count ,
' formats ' : formats ,
}
# --- EuroParlWebstreamIE (Using JSON from iframe) ---
class EuroParlWebstreamIE ( InfoExtractor ) :
class EuroParlWebstreamIE ( InfoExtractor ) :
_VALID_URL = r ''' (?x)
_VALID_URL = r ''' (?x)
https ? : / / ( ? :
https ? : / / multimedia \. europarl \. europa \. eu /
multimedia \. europarl \. europa \. eu / ( ? : \w + / ) ? webstreaming / ( ? : [ \w - ] + _ ) ? ( ? P < id > [ \w - ] + ) | # Webstreaming page URL
( ? P < lang > [ ^ / ] * / ) ? webstreaming / ( ? : [ ^ _ ] * _ ) ? ( ? P < id > [ \w - ] + )
live \. media \. eup \. glcloud \. eu / hls / live / ( ? P < live_id > \d + ) / ( ? P < channel > channel - \d + - \w + | [ \w - ] + ) / ( ? P < stream_type > index - archive | index | master | playlist | norsk - archive ) ( ? : \. m3u8 ) ? # Direct HLS URL base
)
'''
'''
_TESTS = [
_TESTS = [ {
{
' url ' : ' https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY ' ,
' url ' : ' https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-agriculture-and-rural-development_20250327-0900-COMMITTEE-AGRI ' ,
' md5 ' : ' 16420ad9c602663759538ac1ca16a8db ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 20250327-0900-COMMITTEE-AGRI ' ,
' id ' : ' 20220914-0900-PLENARY ' ,
' title ' : r ' re:^Committee on Agriculture and Rural Development \ d {4} - \ d {2} - \ d {2} \ d {2} : \ d {2} $ ' ,
' is_live ' : False ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Plenary session ' ,
' description ' : ' ' ,
' duration ' : 45147 ,
' thumbnail ' : ' https://storage.eup.glcloud.eu/thumbnail/default_thumbnail.png ' ,
' release_timestamp ' : 1663139069 ,
' release_date ' : ' 20220914 ' ,
' modified_timestamp ' : 1663650921 ,
' modified_date ' : ' 20220920 ' ,
' live_status ' : ' was_live ' ,
} ,
} ,
' params ' : { ' skip_download ' : True } ,
} , {
# Uses the iframe JSON parsing which should yield 2113752 / channel-06-bxl
' url ' : ' https://multimedia.europarl.europa.eu/en/webstreaming/euroscola_20221115-1000-SPECIAL-EUROSCOLA ' ,
' md5 ' : ' 8b4304f9e15a6e133100248fb55a5dce ' ,
' info_dict ' : {
' ext ' : ' mp4 ' ,
' id ' : ' 20221115-1000-SPECIAL-EUROSCOLA ' ,
' release_timestamp ' : 1668502798 ,
' title ' : ' Euroscola ' ,
' release_date ' : ' 20221115 ' ,
' live_status ' : ' was_live ' ,
' description ' : ' ' ,
' duration ' : 9587 ,
' thumbnail ' : ' https://storage.eup.glcloud.eu/thumbnail/default_thumbnail.png ' ,
' modified_timestamp ' : 1668945274 ,
' modified_date ' : ' 20221120 ' ,
} ,
} ,
{
} , {
' url ' : ' https://multimedia.europarl.europa.eu/en/webstreaming/pre-session-briefing_20250328-1100-SPECIAL-PRESSEr ' ,
' url ' : ' https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT ' ,
' md5 ' : ' 0ca01cf33009d866e6f5e1cd3088c10c ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 20250328-1100-SPECIAL-PRESSEr ' ,
' id ' : ' 20230301-1130-COMMITTEE-CULT ' ,
' title ' : r ' re:^Pre-session briefing \ d {4} - \ d {2} - \ d {2} \ d {2} : \ d {2} $ ' ,
' is_live ' : False ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' release_date ' : ' 20230301 ' ,
' title ' : ' Committee on Culture and Education ' ,
' release_timestamp ' : 1677666641 ,
' description ' : ' Committee on Culture and Education ' ,
' duration ' : 1003 ,
' thumbnail ' : ' https://storage.eup.glcloud.eu/thumbnail/default_thumbnail.png ' ,
' modified_timestamp ' : 1732475771 ,
' modified_date ' : ' 20241124 ' ,
' live_status ' : ' was_live ' ,
} ,
} ,
' params ' : { ' skip_download ' : True } ,
} , {
# Uses the iframe JSON parsing which should yield 2113747 / channel-01-bxl
' url ' : ' https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-environment-public-health-and-food-safety_20230524-0900-COMMITTEE-ENVI ' ,
' md5 ' : ' f2e8c30935f956a7165c2f4f4b4ee090 ' ,
' info_dict ' : {
' id ' : ' 20230524-0900-COMMITTEE-ENVI ' ,
' ext ' : ' mp4 ' ,
' release_date ' : ' 20230524 ' ,
' title ' : ' Committee on Environment, Public Health and Food Safety ' ,
' release_timestamp ' : 1684912288 ,
' live_status ' : ' was_live ' ,
' description ' : ' Committee on Environment, Public Health and Food Safety ' ,
' duration ' : 4831 ,
' thumbnail ' : ' https://storage.eup.glcloud.eu/thumbnail/default_thumbnail.png ' ,
' modified_timestamp ' : 1732475771 ,
' modified_date ' : ' 20241124 ' ,
} ,
} ,
{ # Test direct HLS URL with archive times
} , {
' url ' : ' https://live.media.eup.glcloud.eu/hls/live/2113752/channel-06-bxl/index-archive.m3u8?startTime=1743068400&endTime=1743079800 ' ,
' url ' : ' https://multimedia.europarl.europa.eu/en/webstreaming/20240320-1345-SPECIAL-PRESSER ' ,
' md5 ' : ' 518758eb706471c4c4ef3a134034a5bd ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' index-archive ' ,
' id ' : ' 20240320-1345-SPECIAL-PRESSER ' ,
' title ' : ' European Parliament Stream 2113752/channel-06-bxl ' ,
' is_live ' : False , # Should be detected as not live from lack of live tags/duration
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' release_date ' : ' 20240320 ' ,
' title ' : ' md5:7c6c814cac55dea5e2d87bf8d3db2234 ' ,
' release_timestamp ' : 1710939767 ,
' description ' : ' md5:7c6c814cac55dea5e2d87bf8d3db2234 ' ,
' duration ' : 927 ,
' thumbnail ' : ' https://storage.eup.glcloud.eu/thumbnail/default_thumbnail.png ' ,
' modified_timestamp ' : 1732475771 ,
' modified_date ' : ' 20241124 ' ,
' live_status ' : ' was_live ' ,
} ,
} ,
' params ' : { ' skip_download ' : True } ,
} , {
' url ' : ' https://multimedia.europarl.europa.eu/en/webstreaming/20250328-1600-SPECIAL-PRESSER ' ,
' md5 ' : ' dd1c5e67eb55e609998583d7c2966105 ' ,
' info_dict ' : {
' id ' : ' 20250328-1600-SPECIAL-PRESSER ' ,
' ext ' : ' mp4 ' ,
' title ' : ' md5:04a2ab70c183dabe891a7cd190c3121d ' ,
' description ' : ' ' ,
' duration ' : 1023 ,
' thumbnail ' : ' https://storage.eup.glcloud.eu/thumbnail/default_thumbnail.png ' ,
' release_timestamp ' : 1743177199 ,
' release_date ' : ' 20250328 ' ,
' modified_timestamp ' : 1743180924 ,
' modified_date ' : ' 20250328 ' ,
' live_status ' : ' was_live ' ,
} ,
} ,
# Potentially add a known live stream test if one is available
} , {
]
' url ' : ' https://multimedia.europarl.europa.eu/webstreaming/briefing-for-media-on-2024-european-elections_20240429-1000-SPECIAL-OTHER ' ,
' only_matching ' : True ,
def _log_debug ( self , msg ) :
} ]
self . to_screen ( f " [EuroParlWebstream] { msg } " )
def _extract_title_from_webpage ( self , webpage , display_id ) :
""" Extracts title from the main webstreaming page. """
title_element = self . _search_regex ( r ' <h1[^>]*>(.*?)</h1> ' , webpage , ' title element ' , default = None )
if title_element :
# Clean up potential extra whitespace and HTML entities
title = re . sub ( r ' \ s+ ' , ' ' , title_element ) . strip ( )
title = self . _html_search_meta ( [ ' og:title ' , ' twitter:title ' ] , webpage , default = title )
else :
# Fallback using meta tags or just the ID
title = self . _html_search_meta (
[ ' og:title ' , ' twitter:title ' ] , webpage , default = display_id )
return title . replace ( ' _ ' , ' ' ) # Replace underscores often used in IDs
def _perform_head_check ( self , url , display_id , note = ' ' ) :
""" Performs a HEAD request to check if the HLS URL likely exists. """
self . _log_debug ( f ' [ { display_id } ] Performing HEAD check { note } on: { url } ' )
try :
self . _request_webpage ( HEADRequest ( url ) , display_id , note = f ' HEAD check { note } ' )
self . _log_debug ( f ' [ { display_id } ] HEAD check { note } successful. ' )
return True
except ExtractorError as e :
# Specifically catch HTTP errors, especially 404
if isinstance ( e . cause , urllib . error . HTTPError ) :
self . _log_debug ( f ' [ { display_id } ] HEAD check { note } failed: { e . cause . code } { e . cause . reason } ' )
else :
self . _log_debug ( f ' [ { display_id } ] HEAD check { note } failed: { e } ' )
return False
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
mobj = self . _match_valid_url ( url )
lang , video_id = self . _match_valid_url ( url ) . group ( ' lang ' , ' id ' )
display_id = mobj . group ( ' id ' )
query = {
live_id_direct = mobj . group ( ' live_id ' )
' lang ' : lang ,
' audio ' : lang ,
# --- Handle Direct HLS URL Input ---
' autoplay ' : ' true ' ,
if live_id_direct :
' logo ' : ' false ' ,
self . _log_debug ( f " Processing Direct HLS URL: { url } " )
' muted ' : ' false ' ,
channel_direct = mobj . group ( ' channel ' )
' fullscreen ' : ' true ' ,
stream_type_direct = mobj . group ( ' stream_type ' ) or ' stream ' # Default name if not specified
' disclaimer ' : ' false ' ,
base_url = f ' https://live.media.eup.glcloud.eu/hls/live/ { live_id_direct } / { channel_direct } / { stream_type_direct } '
' multicast ' : ' true ' ,
' analytics ' : ' false ' ,
query_params_str = mobj . group ( 0 ) . split ( ' ? ' , 1 ) [ 1 ] if ' ? ' in mobj . group ( 0 ) else None
query_params = parse_qs ( query_params_str ) if query_params_str else { }
start_time_direct = traverse_obj ( query_params , ( ' startTime ' , 0 , { int_or_none } ) )
end_time_direct = traverse_obj ( query_params , ( ' endTime ' , 0 , { int_or_none } ) )
# Construct the final URL ensuring .m3u8 is present
final_url = base_url + ( ' ' if base_url . endswith ( ' .m3u8 ' ) else ' .m3u8 ' )
if start_time_direct and end_time_direct :
final_url + = f " ?startTime= { start_time_direct } &endTime= { end_time_direct } "
elif query_params_str : # Append original query if not start/end time based
final_url + = f " ? { query_params_str } "
# Basic title for direct URL
title = f ' European Parliament Stream { live_id_direct } / { channel_direct } '
# HEAD check is good even for direct URLs
if not self . _perform_head_check ( final_url , f " { live_id_direct } - { channel_direct } " , ' (direct) ' ) :
raise ExtractorError ( f ' Direct HLS URL HEAD check failed: { final_url } ' , expected = True )
formats , subtitles = self . _extract_m3u8_formats_and_subtitles (
final_url , display_id or stream_type_direct , ' mp4 ' , m3u8_id = ' hls ' , fatal = True )
if not formats : raise ExtractorError ( f ' Could not extract formats from direct HLS URL: { final_url } ' , expected = True )
return {
' id ' : display_id or stream_type_direct ,
' title ' : title ,
' formats ' : formats ,
' subtitles ' : subtitles ,
' is_live ' : not ( start_time_direct and end_time_direct ) and ' .m3u8 ' not in stream_type_direct # Guess based on URL structure
}
}
webpage = self . _download_webpage ( f ' https://control.eup.glcloud.eu/content-manager/content-page/ { video_id } ' ,
# --- Handle Webstreaming Page URL ---
video_id , ' Downloading iframe ' , query = query )
if not display_id : raise ExtractorError ( ' Could not parse display ID from URL ' , expected = True )
stream_info = self . _search_json ( r ' <script [^>]*id= " ng-state " [^>]*> ' , webpage , ' stream info ' , video_id ) [ ' contentEventKey ' ]
player_url = stream_info . get ( ' playerUrl ' )
self . _log_debug ( f " Processing Webstreaming Page: { display_id } " )
# status = traverse_obj(stream_info, ('media_item', 'mediaSubType'))
webpage = self . _download_webpage ( url , display_id )
# base = 'https://control.eup.glcloud.eu/content-manager/api/v1/socket.io/?EIO=4&transport=polling'
title = self . _extract_title_from_webpage ( webpage , display_id ) # Get title early
# headers = {'referer': f'https://control.eup.glcloud.eu/content-manager/content-page/{video_id}'}
# sid = self._download_socket_json(base, video_id, note='Opening socket', headers=headers)['sid']
self . _log_debug ( f ' [ { display_id } ] Extracting metadata and iframe URL... ' )
# base += '&sid=' + sid
nextjs_data = self . _search_nextjs_data ( webpage , display_id , default = { } )
# self._download_webpage(base, video_id, 'Polling socket with payload', data=b'40/content,', headers=headers)
media_info = traverse_obj ( nextjs_data , ( ' props ' , ' pageProps ' , ' mediaItem ' ) ) or { }
# self._download_webpage(base, video_id, 'Polling socket', headers=headers)
# self._download_socket_json(base, video_id, 'Getting broadcast metadata from socket', headers=headers)
# Get initial start time, but prioritize iframe JSON later
if player_url :
initial_start_timestamp = traverse_obj ( media_info , ( ' mediaDate ' , { parse_iso8601 } , { int_or_none } ) )
live_status = ' was_live '
iframe_url = traverse_obj ( media_info , ' iframeUrls ' ) # Usually just one URL string
query = None if stream_info . get ( ' finalVod ' ) else traverse_obj ( stream_info , {
' startTime ' : ( ' startTime ' , { str_or_none } ) ,
self . _log_debug ( f ' [ { display_id } ] Initial Start Time= { initial_start_timestamp } , Iframe URL= { iframe_url } ' )
' endTime ' : ( ' endTime ' , { str_or_none } ) ,
} )
if not iframe_url :
formats , subtitles = self . _extract_m3u8_formats_and_subtitles ( player_url , video_id , query = query , ext = ' mp4 ' )
raise ExtractorError ( f ' [ { display_id } ] Could not find iframe URL in page metadata. ' , expected = True )
# --- Attempt Extraction from Iframe JSON ---
self . _log_debug ( f ' [ { display_id } ] Attempting extraction from iframe: { iframe_url } ' )
try :
iframe_content = self . _download_webpage ( iframe_url , display_id , note = ' Downloading iframe content ' )
json_data_str = self . _search_regex (
r ' <script id= " ng-state " type= " application/json " [^>]*> \ s*( { .+?}) \ s*</script> ' ,
iframe_content , ' iframe JSON data ' , default = None )
if not json_data_str :
raise ExtractorError ( ' Could not find ng-state JSON in iframe content. ' )
iframe_json = self . _parse_json ( json_data_str , display_id , fatal = True )
# Extract required info from the JSON structure
player_url_base = traverse_obj ( iframe_json , ( ' contentEventKey ' , ' playerUrl ' ) )
start_time = traverse_obj ( iframe_json , ( ' contentEventKey ' , ' startTime ' , { int_or_none } ) )
end_time = traverse_obj ( iframe_json , ( ' contentEventKey ' , ' endTime ' , { int_or_none } ) )
is_live = traverse_obj ( iframe_json , ( ' contentEventKey ' , ' live ' ) ) # boolean
# Use title from JSON if available and seems better
json_title = traverse_obj ( iframe_json , ( ' contentEventKey ' , ' title ' ) )
if json_title : title = json_title
self . _log_debug ( f ' [ { display_id } ] Found in iframe JSON: playerUrl= { player_url_base } , startTime= { start_time } , endTime= { end_time } , is_live= { is_live } ' )
if not player_url_base :
raise ExtractorError ( ' Could not extract playerUrl from iframe JSON. ' )
# For recorded streams (archives), startTime and endTime are essential
if not is_live and ( start_time is None or end_time is None ) :
raise ExtractorError ( ' Missing startTime or endTime in iframe JSON for recorded stream. ' )
# Construct the final URL
# Ensure base URL doesn't already have query params before adding ours
player_url_base = player_url_base . split ( ' ? ' ) [ 0 ]
if not player_url_base . endswith ( ' .m3u8 ' ) :
player_url_base + = ' .m3u8 ' # Ensure correct extension
if is_live :
final_player_url = player_url_base # Live streams don't use start/end times
else :
else :
final_player_url = f " { player_url_base } ?startTime= { start_time } &endTime= { end_time } "
formats = None
subtitles = None
# Perform HEAD check on the constructed URL
live_status = ' is_upcoming '
if not self . _perform_head_check ( final_player_url , display_id , ' (dynamic) ' ) :
self . raise_no_formats ( ' Stream didn \' t start yet ' , True , video_id )
raise ExtractorError ( f ' Dynamic HLS URL from iframe failed HEAD check: { final_player_url } ' )
if stream_info . get ( ' live ' ) :
live_status = ' is_live '
# Extract formats
self . _log_debug ( f ' [ { display_id } ] Extracting formats from { final_player_url } ' )
formats , subtitles = self . _extract_m3u8_formats_and_subtitles (
final_player_url , display_id , ' mp4 ' , entry_protocol = ' m3u8_native ' ,
m3u8_id = ' hls ' , fatal = True ) # Use fatal=True, if extraction fails, it's an error
if not formats :
raise ExtractorError ( f ' Could not extract M3U8 formats from { final_player_url } ' , expected = True )
return {
return {
' id ' : display_id ,
' title ' : title ,
' formats ' : formats ,
' formats ' : formats ,
' subtitles ' : subtitles ,
' subtitles ' : subtitles ,
' is_live ' : is_live ,
' live_status ' : live_status ,
' timestamp ' : start_time if not is_live else None , # Use JSON start time for VOD
* * traverse_obj ( stream_info , {
' duration ' : ( end_time - start_time ) if not is_live and start_time and end_time else None ,
' id ' : ( ' commonId ' , { str_or_none } ) ,
' title ' : ( ' title ' , { str_or_none } ) ,
' description ' : ( ' description ' , { str_or_none } ) ,
' release_timestamp ' : ( ' startTime ' , { int_or_none } ) ,
' duration ' : ( ' endTime ' , { lambda e : e and ( s := stream_info . get ( ' startTime ' ) ) and ( e - s ) } ) ,
' thumbnail ' : ( ' posterFrame ' , { url_or_none } ) ,
' modified_timestamp ' : ( ' meta ' , ' updatedAt ' , { parse_iso8601 } ) ,
} ) ,
}
}
except ExtractorError as e :
# Re-raise specific extractor errors
raise e
except Exception as e :
# Wrap unexpected errors
raise ExtractorError ( f ' [ { display_id } ] Error processing iframe content: { e } ' , cause = e )
# This part should ideally not be reached if iframe extraction is mandatory
raise ExtractorError ( f ' [ { display_id } ] Failed to extract stream information from iframe. ' , expected = True )