@ -1,15 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
import json
from . theplatform import ThePlatformIE
from . . utils import int_or_none
from . common import InfoExtractor
from . . utils import (
ExtractorError ,
)
class CNETIE ( InfoExtractor ) :
class CNETIE ( ThePlatformIE ) :
_VALID_URL = r ' https?://(?:www \ .)?cnet \ .com/videos/(?P<id>[^/]+)/ '
_TESTS = [ {
' url ' : ' http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/ ' ,
@ -18,25 +14,20 @@ class CNETIE(InfoExtractor):
' ext ' : ' flv ' ,
' title ' : ' Hands-on with Microsoft Windows 8.1 Update ' ,
' description ' : ' The new update to the Windows 8 OS brings improved performance for mouse and keyboard users. ' ,
' thumbnail ' : ' re:^http://.*/flmswindows8.jpg$ ' ,
' uploader_id ' : ' 6085384d-619e-11e3-b231-14feb5ca9861 ' ,
' uploader ' : ' Sarah Mitroff ' ,
' duration ' : 70 ,
} ,
' params ' : {
' skip_download ' : ' requires rtmpdump ' ,
}
} , {
' url ' : ' http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/ ' ,
' info_dict ' : {
' id ' : ' 56527b93-d25d-44e3-b738-f989ce2e49ba ' ,
' ext ' : ' flv ' ,
' title ' : ' Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187) ' ,
' description ' : ' Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole ' ,
' uploader_id ' : ' b163284d-6b73-44fc-b3e6-3da66c392d40 ' ,
' uploader ' : ' Ashley Esqueda ' ,
' title ' : ' Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187) ' ,
} ,
' params ' : {
' skip_download ' : True , # requires rtmpdump
' duration ' : 1482 ,
} ,
} ]
@ -45,26 +36,13 @@ class CNETIE(InfoExtractor):
webpage = self . _download_webpage ( url , display_id )
data_json = self . _html_search_regex (
r " <div class=\" cnetVideoPlayer \" \ s+.*? data-cnet-video-options=' ([^ ' ]+) ' " ,
r " data-cnet-video(?:-uvp)? -options=' ([^ ' ]+) ' " ,
webpage , ' data json ' )
data = json . loads ( data_json )
vdata = data [ ' video ' ]
if not vdata :
vdata = data [ ' videos ' ] [ 0 ]
if not vdata :
raise ExtractorError ( ' Cannot find video data ' )
mpx_account = data [ ' config ' ] [ ' players ' ] [ ' default ' ] [ ' mpx_account ' ]
vid = vdata [ ' files ' ] . get ( ' rtmp ' , vdata [ ' files ' ] [ ' hds ' ] )
tp_link = ' http://link.theplatform.com/s/ %s / %s ' % ( mpx_account , vid )
data = self . _parse_json ( data_json , display_id )
vdata = data . get ( ' video ' ) or data [ ' videos ' ] [ 0 ]
video_id = vdata [ ' id ' ]
title = vdata . get ( ' headline ' )
if title is None :
title = vdata . get ( ' title ' )
if title is None :
raise ExtractorError ( ' Cannot find title! ' )
thumbnail = vdata . get ( ' image ' , { } ) . get ( ' path ' )
title = vdata [ ' title ' ]
author = vdata . get ( ' author ' )
if author :
uploader = ' %s %s ' % ( author [ ' firstName ' ] , author [ ' lastName ' ] )
@ -73,13 +51,34 @@ class CNETIE(InfoExtractor):
uploader = None
uploader_id = None
mpx_account = data [ ' config ' ] [ ' uvpConfig ' ] [ ' default ' ] [ ' mpx_account ' ]
metadata = self . get_metadata ( ' %s / %s ' % ( mpx_account , list ( vdata [ ' files ' ] . values ( ) ) [ 0 ] ) , video_id )
description = vdata . get ( ' description ' ) or metadata . get ( ' description ' )
duration = int_or_none ( vdata . get ( ' duration ' ) ) or metadata . get ( ' duration ' )
formats = [ ]
subtitles = { }
for ( fkey , vid ) in vdata [ ' files ' ] . items ( ) :
if fkey == ' hls_phone ' and ' hls_tablet ' in vdata [ ' files ' ] :
continue
release_url = ' http://link.theplatform.com/s/ %s / %s ?format=SMIL&mbr=true ' % ( mpx_account , vid )
if fkey == ' hds ' :
release_url + = ' &manifest=f4m '
tp_formats , tp_subtitles = self . _extract_theplatform_smil ( release_url , video_id , ' Downloading %s SMIL data ' % fkey )
formats . extend ( tp_formats )
subtitles = self . _merge_subtitles ( subtitles , tp_subtitles )
self . _sort_formats ( formats )
return {
' _type ' : ' url_transparent ' ,
' url ' : tp_link ,
' id ' : video_id ,
' display_id ' : display_id ,
' title ' : title ,
' description ' : description ,
' thumbnail ' : metadata . get ( ' thumbnail ' ) ,
' duration ' : duration ,
' uploader ' : uploader ,
' uploader_id ' : uploader_id ,
' thumbnail ' : thumbnail ,
' subtitles ' : subtitles ,
' formats ' : formats ,
}