@ -2,19 +2,19 @@ from __future__ import unicode_literals
from . common import InfoExtractor
from . common import InfoExtractor
from . . utils import (
from . . utils import (
parse_duration ,
parse_iso8601 ,
parse_iso8601 ,
int_or_none ,
)
)
from . . compat import compat_str
class DiscoveryIE ( InfoExtractor ) :
class DiscoveryIE ( InfoExtractor ) :
_VALID_URL = r ' http://www \ .discovery \ .com \ /[a-zA-Z0-9 \ -]*/[a-zA-Z0-9 \ -]*/videos/(?P<id>[a-zA-Z0-9_ \ -]*)(?: \ .htm)? '
_VALID_URL = r ' http://www \ .discovery \ .com \ /[a-zA-Z0-9 \ -]*/[a-zA-Z0-9 \ -]*/videos/(?P<id>[a-zA-Z0-9_ \ -]*)(?: \ .htm)? '
_TEST = {
_TEST S = [ {
' url ' : ' http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm ' ,
' url ' : ' http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm ' ,
' md5 ' : ' 3c69d77d9b0d82bfd5e5932a60f26504 ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' mission-impossible-outtakes ' ,
' id ' : ' 20769 ' ,
' ext ' : ' flv ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Mission Impossible Outtakes ' ,
' title ' : ' Mission Impossible Outtakes ' ,
' description ' : ( ' Watch Jamie Hyneman and Adam Savage practice being '
' description ' : ( ' Watch Jamie Hyneman and Adam Savage practice being '
' each other -- to the point of confusing Jamie \' s dog -- and '
' each other -- to the point of confusing Jamie \' s dog -- and '
@ -24,22 +24,36 @@ class DiscoveryIE(InfoExtractor):
' timestamp ' : 1303099200 ,
' timestamp ' : 1303099200 ,
' upload_date ' : ' 20110418 ' ,
' upload_date ' : ' 20110418 ' ,
} ,
} ,
}
' params ' : {
' skip_download ' : True , # requires ffmpeg
}
} , {
' url ' : ' http://www.discovery.com/tv-shows/mythbusters/videos/mythbusters-the-simpsons ' ,
' info_dict ' : {
' id ' : ' mythbusters-the-simpsons ' ,
' title ' : ' MythBusters: The Simpsons ' ,
} ,
' playlist_count ' : 9 ,
} ]
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
info = self . _download_json ( url + ' ?flat=1 ' , video_id )
info = self . _parse_json ( self . _search_regex (
video_title = info . get ( ' playlist_title ' ) or info . get ( ' video_title ' )
r ' (?s)<script type= " application/ld \ +json " >(.*?)</script> ' ,
webpage , ' video info ' ) , video_id )
return {
entries = [ {
' id ' : video_id ,
' id ' : compat_str ( video_info [ ' id ' ] ) ,
' title ' : info [ ' name ' ] ,
' formats ' : self . _extract_m3u8_formats (
' url ' : info [ ' contentURL ' ] ,
video_info [ ' src ' ] , video_id , ext = ' mp4 ' ,
' description ' : info . get ( ' description ' ) ,
note = ' Download m3u8 information for video %d ' % ( idx + 1 ) ) ,
' thumbnail ' : info . get ( ' thumbnailUrl ' ) ,
' title ' : video_info [ ' title ' ] ,
' timestamp ' : parse_iso8601 ( info . get ( ' uploadDate ' ) ) ,
' description ' : video_info . get ( ' description ' ) ,
' duration ' : int_or_none ( info . get ( ' duration ' ) ) ,
' duration ' : parse_duration ( video_info . get ( ' video_length ' ) ) ,
}
' webpage_url ' : video_info . get ( ' href ' ) ,
' thumbnail ' : video_info . get ( ' thumbnailURL ' ) ,
' alt_title ' : video_info . get ( ' secondary_title ' ) ,
' timestamp ' : parse_iso8601 ( video_info . get ( ' publishedDate ' ) ) ,
} for idx , video_info in enumerate ( info [ ' playlist ' ] ) ]
return self . playlist_result ( entries , video_id , video_title )