@ -1,3 +1,5 @@
# coding: utf-8
import json
import json
import re
import re
@ -5,6 +7,7 @@ from .common import InfoExtractor
from . . utils import (
from . . utils import (
compat_urllib_parse ,
compat_urllib_parse ,
unified_strdate ,
)
)
@ -13,36 +16,69 @@ class WatIE(InfoExtractor):
IE_NAME = ' wat.tv '
IE_NAME = ' wat.tv '
_TEST = {
_TEST = {
u ' url ' : u ' http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html ' ,
u ' url ' : u ' http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html ' ,
u ' file ' : u ' 6bv55 .mp4' ,
u ' file ' : u ' 10631273 .mp4' ,
u ' md5 ' : u ' 0a4fe7870f31eaeabb5e25fd8da8414a ' ,
u ' md5 ' : u ' 0a4fe7870f31eaeabb5e25fd8da8414a ' ,
u ' info_dict ' : {
u ' info_dict ' : {
u " title " : u " World War Z - Philadelphia VOST "
u ' title ' : u ' World War Z - Philadelphia VOST ' ,
u ' description ' : u ' La menace est partout. Que se passe-t-il à Philadelphia ? \r \n WORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet. \r \n http://www.worldwarz.fr ' ,
}
}
}
}
def download_video_info ( self , real_id ) :
# 'contentv4' is used in the website, but it also returns the related
# videos, we don't need them
info = self . _download_webpage ( ' http://www.wat.tv/interface/contentv3/ ' + real_id , real_id , ' Downloading video info ' )
info = json . loads ( info )
return info [ ' media ' ]
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
def real_id_for_chapter ( chapter ) :
return chapter [ ' tc_start ' ] . split ( ' - ' ) [ 0 ]
mobj = re . match ( self . _VALID_URL , url )
mobj = re . match ( self . _VALID_URL , url )
short_id = mobj . group ( ' shortID ' )
short_id = mobj . group ( ' shortID ' )
webpage = self . _download_webpage ( url , short_id )
real_id = self . _search_regex ( r ' xtpage = " .*-(.*?) " ; ' , webpage , ' real id ' )
video_info = self . download_video_info ( real_id )
chapters = video_info [ ' chapters ' ]
first_chapter = chapters [ 0 ]
if real_id_for_chapter ( first_chapter ) != real_id :
self . to_screen ( ' Multipart video detected ' )
chapter_urls = [ ]
for chapter in chapters :
chapter_id = real_id_for_chapter ( chapter )
# Yes, when we this chapter is processed by WatIE,
# it will download the info again
chapter_info = self . download_video_info ( chapter_id )
chapter_urls . append ( chapter_info [ ' url ' ] )
entries = [ self . url_result ( chapter_url ) for chapter_url in chapter_urls ]
return self . playlist_result ( entries , real_id , video_info [ ' title ' ] )
# Otherwise we can continue and extract just one part, we have to use
# the short id for getting the video url
player_data = compat_urllib_parse . urlencode ( { ' shortVideoId ' : short_id ,
player_data = compat_urllib_parse . urlencode ( { ' shortVideoId ' : short_id ,
' html5 ' : ' 1 ' } )
' html5 ' : ' 1 ' } )
player_info = self . _download_webpage ( ' http://www.wat.tv/player? ' + player_data ,
player_info = self . _download_webpage ( ' http://www.wat.tv/player? ' + player_data ,
short_id , u ' Downloading player info ' )
real _id, u ' Downloading player info ' )
player = json . loads ( player_info ) [ ' player ' ]
player = json . loads ( player_info ) [ ' player ' ]
html5_player = self . _html_search_regex ( r ' iframe src= " (.*?) " ' , player ,
html5_player = self . _html_search_regex ( r ' iframe src= " (.*?) " ' , player ,
' html5 player ' )
' html5 player ' )
player_webpage = self . _download_webpage ( html5_player , short_id ,
player_webpage = self . _download_webpage ( html5_player , real _id,
u ' Downloading player webpage ' )
u ' Downloading player webpage ' )
video_url = self . _search_regex ( r ' urlhtml5 : " (.*?) " ' , player_webpage ,
video_url = self . _search_regex ( r ' urlhtml5 : " (.*?) " ' , player_webpage ,
' video url ' )
' video url ' )
title = self . _search_regex ( r ' contentTitle : " (.*?) " ' , player_webpage ,
info = { ' id ' : real_id ,
' title ' )
thumbnail = self . _search_regex ( r ' previewMedia : " (.*?) " ' , player_webpage ,
' thumbnail ' )
return { ' id ' : short_id ,
' url ' : video_url ,
' url ' : video_url ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' title ' : title ,
' title ' : first_chapter [ ' title ' ] ,
' thumbnail ' : thumbnail ,
' thumbnail ' : first_chapter [ ' preview ' ] ,
' description ' : first_chapter [ ' description ' ] ,
' view_count ' : video_info [ ' views ' ] ,
}
}
if ' date_diffusion ' in first_chapter :
info [ ' upload_date ' ] = unified_strdate ( first_chapter [ ' date_diffusion ' ] )
return info