@ -1,21 +1,24 @@
from __future__ import unicode_literals
from __future__ import unicode_literals
from . common import InfoExtractor
from . theplatform import ThePlatformIE
from . . utils import (
from . . utils import (
sanitized_Request ,
xpath_text ,
smuggle_url ,
xpath_element ,
int_or_none ,
ExtractorError ,
find_xpath_attr ,
)
)
class CBSIE ( InfoExtractor ) :
class CBSIE ( ThePlatformIE ) :
_VALID_URL = r ' https?://(?:www \ .)?(?:cbs \ .com/shows/[^/]+/(?:video|artist)|colbertlateshow \ .com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+) '
_VALID_URL = r ' https?://(?:www \ .)?(?:cbs \ .com/shows/[^/]+/(?:video|artist)|colbertlateshow \ .com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+) '
_TESTS = [ {
_TESTS = [ {
' url ' : ' http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/ ' ,
' url ' : ' http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/ ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 4JUVEwq3wUT7 ' ,
' id ' : ' _u7W953k6la293J7EPTd9oHkSPs6Xn6_ ' ,
' display_id ' : ' connect-chat-feat-garth-brooks ' ,
' display_id ' : ' connect-chat-feat-garth-brooks ' ,
' ext ' : ' flv ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Connect Chat feat. Garth Brooks ' ,
' title ' : ' Connect Chat feat. Garth Brooks ' ,
' description ' : ' Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS! ' ,
' description ' : ' Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS! ' ,
' duration ' : 1495 ,
' duration ' : 1495 ,
@ -47,22 +50,55 @@ class CBSIE(InfoExtractor):
' url ' : ' http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/ ' ,
' url ' : ' http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/ ' ,
' only_matching ' : True ,
' only_matching ' : True ,
} ]
} ]
TP_RELEASE_URL_TEMPLATE = ' http://link.theplatform.com/s/dJ5BDC/ %s ?manifest=m3u&mbr=true '
def _parse_smil_subtitles ( self , smil , namespace = None , subtitles_lang = ' en ' ) :
closed_caption_e = find_xpath_attr ( smil , self . _xpath_ns ( ' .//param ' , namespace ) , ' name ' , ' ClosedCaptionURL ' )
return {
' en ' : [ {
' ext ' : ' ttml ' ,
' url ' : closed_caption_e . attrib [ ' value ' ] ,
} ]
} if closed_caption_e is not None and closed_caption_e . attrib . get ( ' value ' ) else [ ]
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
display_id = self . _match_id ( url )
display_id = self . _match_id ( url )
request = sanitized_Request ( url )
webpage = self . _download_webpage ( url , display_id )
# Android UA is served with higher quality (720p) streams (see
content_id = self . _search_regex (
# https://github.com/rg3/youtube-dl/issues/7490)
[ r " video \ .settings \ .content_id \ s*= \ s* ' ([^ ' ]+) ' ; " , r " cbsplayer \ .contentId \ s*= \ s* ' ([^ ' ]+) ' ; " ] ,
request . add_header ( ' User-Agent ' , ' Mozilla/5.0 (Linux; Android 4.4; Nexus 5) ' )
webpage , ' content id ' )
webpage = self . _download_webpage ( request , display_id )
items_data = self . _download_xml (
real_id = self . _search_regex (
' http://can.cbs.com/thunder/player/videoPlayerService.php ' ,
[ r " video \ .settings \ .pid \ s*= \ s* ' ([^ ' ]+) ' ; " , r " cbsplayer \ .pid \ s*= \ s* ' ([^ ' ]+) ' ; " ] ,
content_id , query = { ' partner ' : ' cbs ' , ' contentId ' : content_id } )
webpage , ' real video ID ' )
video_data = xpath_element ( items_data , ' .//item ' )
return {
title = xpath_text ( video_data , ' videoTitle ' , ' title ' , True )
' _type ' : ' url_transparent ' ,
' ie_key ' : ' ThePlatform ' ,
subtitles = { }
' url ' : smuggle_url (
formats = [ ]
' http://link.theplatform.com/s/dJ5BDC/ %s ?mbr=true&manifest=m3u ' % real_id ,
for item in items_data . findall ( ' .//item ' ) :
{ ' force_smil_url ' : True } ) ,
pid = xpath_text ( item , ' pid ' )
if not pid :
continue
try :
tp_formats , tp_subtitles = self . _extract_theplatform_smil (
self . TP_RELEASE_URL_TEMPLATE % pid , content_id , ' Downloading %s SMIL data ' % pid )
except ExtractorError :
continue
formats . extend ( tp_formats )
subtitles = self . _merge_subtitles ( subtitles , tp_subtitles )
self . _sort_formats ( formats )
info = self . get_metadata ( ' dJ5BDC/media/guid/2198311517/ %s ' % content_id , content_id )
info . update ( {
' id ' : content_id ,
' display_id ' : display_id ,
' display_id ' : display_id ,
}
' title ' : title ,
' series ' : xpath_text ( video_data , ' seriesTitle ' ) ,
' season_number ' : int_or_none ( xpath_text ( video_data , ' seasonNumber ' ) ) ,
' episode_number ' : int_or_none ( xpath_text ( video_data , ' episodeNumber ' ) ) ,
' duration ' : int_or_none ( xpath_text ( video_data , ' videoLength ' ) , 1000 ) ,
' thumbnail ' : xpath_text ( video_data , ' previewImageURL ' ) ,
' formats ' : formats ,
' subtitles ' : subtitles ,
} )
return info