@ -4,11 +4,11 @@ from __future__ import unicode_literals
import re
import re
from . common import InfoExtractor
from . common import InfoExtractor
from . . utils import compat_urlparse
from . . compat import compat_urlparse
class SpiegelIE ( InfoExtractor ) :
class SpiegelIE ( InfoExtractor ) :
_VALID_URL = r ' https?://(?:www \ .)?spiegel \ .de/video/[^/]*-(?P<id>[0-9]+)(?: \ .html)?(?:#.*)?$ '
_VALID_URL = r ' https?://(?:www \ .)?spiegel \ .de/video/[^/]*-(?P<id>[0-9]+)(?: -embed)?(?: \ .html)?(?:#.*)?$ '
_TESTS = [ {
_TESTS = [ {
' url ' : ' http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html ' ,
' url ' : ' http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html ' ,
' md5 ' : ' 2c2754212136f35fb4b19767d242f66e ' ,
' md5 ' : ' 2c2754212136f35fb4b19767d242f66e ' ,
@ -29,14 +29,24 @@ class SpiegelIE(InfoExtractor):
' description ' : ' md5:c2322b65e58f385a820c10fa03b2d088 ' ,
' description ' : ' md5:c2322b65e58f385a820c10fa03b2d088 ' ,
' duration ' : 983 ,
' duration ' : 983 ,
} ,
} ,
} , {
' url ' : ' http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html ' ,
' md5 ' : ' d8eeca6bfc8f1cd6f490eb1f44695d51 ' ,
' info_dict ' : {
' id ' : ' 1519126 ' ,
' ext ' : ' mp4 ' ,
' description ' : ' SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen. ' ,
' title ' : ' Fragen an Astronaut Alexander Gerst: " Bekommen Sie die Tageszeiten mit? " ' ,
}
} ]
} ]
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
webpage = self . _download_webpage ( url , video_id )
title = self . _html_search_regex (
title = re . sub ( r ' \ s+ ' , ' ' , self . _html_search_regex (
r ' <div class= " module-title " >(.*?)</div> ' , webpage , ' title ' )
r ' (?s)<(?:h1|div) class= " module-title " [^>]*>(.*?)</(?:h1|div)> ' ,
webpage , ' title ' ) )
description = self . _html_search_meta ( ' description ' , webpage , ' description ' )
description = self . _html_search_meta ( ' description ' , webpage , ' description ' )
base_url = self . _search_regex (
base_url = self . _search_regex (
@ -77,7 +87,7 @@ class SpiegelArticleIE(InfoExtractor):
_VALID_URL = ' https?://www \ .spiegel \ .de/(?!video/)[^?#]*?-(?P<id>[0-9]+) \ .html '
_VALID_URL = ' https?://www \ .spiegel \ .de/(?!video/)[^?#]*?-(?P<id>[0-9]+) \ .html '
IE_NAME = ' Spiegel:Article '
IE_NAME = ' Spiegel:Article '
IE_DESC = ' Articles on spiegel.de '
IE_DESC = ' Articles on spiegel.de '
_TEST = {
_TEST S = [ {
' url ' : ' http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html ' ,
' url ' : ' http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 1516455 ' ,
' id ' : ' 1516455 ' ,
@ -85,19 +95,34 @@ class SpiegelArticleIE(InfoExtractor):
' title ' : ' Faszination Badminton: Nennt es bloß nicht Federball ' ,
' title ' : ' Faszination Badminton: Nennt es bloß nicht Federball ' ,
' description ' : ' re:^Patrick Kämnitz gehört. { 100,} ' ,
' description ' : ' re:^Patrick Kämnitz gehört. { 100,} ' ,
} ,
} ,
}
} , {
' url ' : ' http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html ' ,
' info_dict ' : {
} ,
' playlist_count ' : 6 ,
} ]
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
webpage = self . _download_webpage ( url , video_id )
# Single video on top of the page
video_link = self . _search_regex (
video_link = self . _search_regex (
r ' <a href= " ([^ " ]+) " onclick= " return spOpenVideo \ (this, ' , webpage ,
r ' <a href= " ([^ " ]+) " onclick= " return spOpenVideo \ (this, ' , webpage ,
' video page URL ' )
' video page URL ' , default = None )
video_url = compat_urlparse . urljoin (
if video_link :
self . http_scheme ( ) + ' //spiegel.de/ ' , video_link )
video_url = compat_urlparse . urljoin (
self . http_scheme ( ) + ' //spiegel.de/ ' , video_link )
return {
return self . url_result ( video_url )
' _type ' : ' url ' ,
' url ' : video_url ,
# Multiple embedded videos
}
embeds = re . findall (
r ' <div class= " vid_holder[0-9]+.*?</div> \ s*.*?url \ s*= \ s* " ([^ " ]+) " ' ,
webpage )
entries = [
self . url_result ( compat_urlparse . urljoin (
self . http_scheme ( ) + ' //spiegel.de/ ' , embed_path ) )
for embed_path in embeds
]
return self . playlist_result ( entries )