@ -11,9 +11,9 @@ from .nexx import (
from . spiegeltv import SpiegeltvIE
from . . compat import compat_urlparse
from . . utils import (
extract_attributes ,
unified_strdat e,
get_element_by_attribute ,
parse_duration ,
strip_or_non e,
unified_timestamp ,
)
@ -21,35 +21,38 @@ class SpiegelIE(InfoExtractor):
_VALID_URL = r ' https?://(?:www \ .)?spiegel \ .de/video/[^/]*-(?P<id>[0-9]+)(?:-embed|-iframe)?(?: \ .html)?(?:#.*)?$ '
_TESTS = [ {
' url ' : ' http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html ' ,
' md5 ' : ' 2c2754212136f35fb4b19767d242f66e ' ,
' md5 ' : ' b57399839d055fccfeb9a0455c439868 ' ,
' info_dict ' : {
' id ' : ' 1259285 ' ,
' id ' : ' 563747 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Vulkanausbruch in Ecuador: Der " Feuerschlund " ist wieder aktiv ' ,
' description ' : ' md5:8029d8310232196eb235d27575a8b9f4 ' ,
' duration ' : 49 ,
' upload_date ' : ' 20130311 ' ,
' timestamp ' : 1362994320 ,
} ,
} , {
' url ' : ' http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html ' ,
' md5 ' : ' f2cdf638d7aa47654e251e1aee360af1 ' ,
' md5 ' : ' 5b6c2f4add9d62912ed5fc78a1faed80 ' ,
' info_dict ' : {
' id ' : ' 1309159 ' ,
' id ' : ' 580988 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers ' ,
' description ' : ' md5:c2322b65e58f385a820c10fa03b2d088 ' ,
' duration ' : 983 ,
' upload_date ' : ' 20131115 ' ,
' timestamp ' : 1384546642 ,
} ,
} , {
' url ' : ' http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html ' ,
' md5 ' : ' d8eeca6bfc8f1cd6f490eb1f44695d51 ' ,
' md5 ' : ' 97b91083a672d72976faa8433430afb9 ' ,
' info_dict ' : {
' id ' : ' 1519126 ' ,
' id ' : ' 601883 ' ,
' ext ' : ' mp4 ' ,
' description ' : ' SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen. ' ,
' title ' : ' Fragen an Astronaut Alexander Gerst: " Bekommen Sie die Tageszeiten mit? " ' ,
' upload_date ' : ' 20140904 ' ,
' timestamp ' : 1409834160 ,
}
} , {
' url ' : ' http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html ' ,
@ -62,59 +65,28 @@ class SpiegelIE(InfoExtractor):
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
webpage , handle = self . _download_webpage_handle ( url , video_id )
metadata_url = ' http://www.spiegel.de/video/metadata/video- %s .json ' % video_id
handle = self . _request_webpage ( metadata_url , video_id )
# 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html
if SpiegeltvIE . suitable ( handle . geturl ( ) ) :
return self . url_result ( handle . geturl ( ) , ' Spiegeltv ' )
nexx_id = self . _search_regex (
r ' nexxOmniaId \ s*: \ s*( \ d+) ' , webpage , ' nexx id ' , default = None )
if nexx_id :
domain_id = NexxIE . _extract_domain_id ( webpage ) or ' 748 '
return self . url_result (
' nexx: %s : %s ' % ( domain_id , nexx_id ) , ie = NexxIE . ie_key ( ) ,
video_id = nexx_id )
video_data = extract_attributes ( self . _search_regex ( r ' (<div[^>]+id= " spVideoElements " [^>]+>) ' , webpage , ' video element ' , default = ' ' ) )
title = video_data . get ( ' data-video-title ' ) or get_element_by_attribute ( ' class ' , ' module-title ' , webpage )
description = video_data . get ( ' data-video-teaser ' ) or self . _html_search_meta ( ' description ' , webpage , ' description ' )
base_url = self . _search_regex (
[ r ' server \ s*: \ s*([ " \' ])(?P<url>.+?) \ 1 ' , r ' var \ s+server \ s*= \ s* " (?P<url>[^ " ]+) \ " ' ] ,
webpage , ' server URL ' , group = ' url ' )
xml_url = base_url + video_id + ' .xml '
idoc = self . _download_xml ( xml_url , video_id )
formats = [ ]
for n in list ( idoc ) :
if n . tag . startswith ( ' type ' ) and n . tag != ' type6 ' :
format_id = n . tag . rpartition ( ' type ' ) [ 2 ]
video_url = base_url + n . find ( ' ./filename ' ) . text
formats . append ( {
' format_id ' : format_id ,
' url ' : video_url ,
' width ' : int ( n . find ( ' ./width ' ) . text ) ,
' height ' : int ( n . find ( ' ./height ' ) . text ) ,
' abr ' : int ( n . find ( ' ./audiobitrate ' ) . text ) ,
' vbr ' : int ( n . find ( ' ./videobitrate ' ) . text ) ,
' vcodec ' : n . find ( ' ./codec ' ) . text ,
' acodec ' : ' MP4A ' ,
} )
duration = float ( idoc [ 0 ] . findall ( ' ./duration ' ) [ 0 ] . text )
self . _check_formats ( formats , video_id )
self . _sort_formats ( formats )
video_data = self . _parse_json ( self . _webpage_read_content (
handle , metadata_url , video_id ) , video_id )
title = video_data [ ' title ' ]
nexx_id = video_data [ ' nexxOmniaId ' ]
domain_id = video_data . get ( ' nexxOmniaDomain ' ) or ' 748 '
return {
' _type ' : ' url_transparent ' ,
' id ' : video_id ,
' url ' : ' nexx: %s : %s ' % ( domain_id , nexx_id ) ,
' title ' : title ,
' description ' : description. strip ( ) if description else None ,
' duration ' : duration,
' upload_date' : unified_strdate ( video_data . get ( ' data-video-date ' ) ) ,
' formats' : formats ,
' description ' : strip_or_none ( video_data . get ( ' teaser ' ) ) ,
' duration ' : parse_duration ( video_data . get ( ' duration ' ) ) ,
' timestamp ' : unified_timestamp ( video_data . get ( ' datum ' ) ) ,
' ie_key ' : NexxIE . ie_key ( ) ,
}