@ -11,7 +11,7 @@ from ..utils import (
class CNNIE ( InfoExtractor ) :
_VALID_URL = r ''' (?x)https?://(?:(? :edition|www)\ .)?cnn \ .com/video/(?:data/.+?| \ ?) /
_VALID_URL = r ''' (?x)https?://(?:(? P<sub_domain>edition|www|money)\ .)?cnn \ .com/(?:video/(?:data/.+?| \ ?)/)?videos? /
( ? P < path > . + ? / ( ? P < title > [ ^ / ] + ? ) ( ? : \. ( ? : [ a - z \- ] + ) | ( ? = & ) ) ) '''
_TESTS = [ {
@ -45,19 +45,46 @@ class CNNIE(InfoExtractor):
' description ' : ' md5:e7223a503315c9f150acac52e76de086 ' ,
' upload_date ' : ' 20141222 ' ,
}
} , {
' url ' : ' http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html ' ,
' md5 ' : ' 52a515dc1b0f001cd82e4ceda32be9d1 ' ,
' info_dict ' : {
' id ' : ' /video/news/2016/08/19/netflix-stunning-stats.cnnmoney ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 5 stunning stats about Netflix ' ,
' description ' : ' Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn \' t know. ' ,
' upload_date ' : ' 20160819 ' ,
}
} , {
' url ' : ' http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk ' ,
' only_matching ' : True ,
} , {
' url ' : ' http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg ' ,
' only_matching ' : True ,
} , {
' url ' : ' http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn ' ,
' only_matching ' : True ,
} ]
_CONFIG = {
# http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
' edition ' : {
' data_src ' : ' http://edition.cnn.com/video/data/3.0/video/ %s /index.xml ' ,
' media_src ' : ' http://pmd.cdn.turner.com/cnn/big ' ,
} ,
# http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
' money ' : {
' data_src ' : ' http://money.cnn.com/video/data/4.0/video/ %s .xml ' ,
' media_src ' : ' http://ht3.cdn.turner.com/money/big ' ,
} ,
}
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
path = mobj . group ( ' path ' )
page_title = mobj . group ( ' title ' )
info_url = ' http://edition.cnn.com/video/data/3.0/ %s /index.xml ' % path
sub_domain , path , page_title = re . match ( self . _VALID_URL , url ) . groups ( )
if sub_domain not in ( ' money ' , ' edition ' ) :
sub_domain = ' edition '
config = self . _CONFIG [ sub_domain ]
info_url = config [ ' data_src ' ] % path
info = self . _download_xml ( info_url , page_title )
formats = [ ]
@ -66,7 +93,7 @@ class CNNIE(InfoExtractor):
( ? : _ ( ? P < bitrate > [ 0 - 9 ] + ) k ) ?
''' )
for f in info . findall ( ' files/file ' ) :
video_url = ' http://ht.cdn.turner.com/cnn/big %s ' % ( f . text . strip ( ) )
video_url = config [ ' media_src ' ] + f . text . strip ( )
fdct = {
' format_id ' : f . attrib [ ' bitrate ' ] ,
' url ' : video_url ,
@ -146,7 +173,7 @@ class CNNBlogsIE(InfoExtractor):
class CNNArticleIE ( InfoExtractor ) :
_VALID_URL = r ' https?://(?:(?:edition|www) \ .)?cnn \ .com/(?!video /)'
_VALID_URL = r ' https?://(?:(?:edition|www) \ .)?cnn \ .com/(?!video s? /)'
_TEST = {
' url ' : ' http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/ ' ,
' md5 ' : ' 689034c2a3d9c6dc4aa72d65a81efd01 ' ,