@ -1,36 +1,153 @@
import base64
import re
import urllib . error
import urllib . parse
import zlib
from . anvato import AnvatoIE
from . common import InfoExtractor
from . cbs import CBSIE
from . . compat import (
compat_b64decode ,
compat_urllib_parse_unquote ,
)
from . paramountplus import ParamountPlusIE
from . . utils import (
ExtractorError ,
HEADRequest ,
UserNotLive ,
determine_ext ,
float_or_none ,
format_field ,
int_or_none ,
make_archive_id ,
mimetype2ext ,
parse_duration ,
smuggle_url ,
traverse_obj ,
url_or_none ,
)
class CBSNewsEmbedIE ( CBSIE ) : # XXX: Do not subclass from concrete IE
class CBSNewsBaseIE ( InfoExtractor ) :
_LOCALES = {
' atlanta ' : None ,
' baltimore ' : ' BAL ' ,
' boston ' : ' BOS ' ,
' chicago ' : ' CHI ' ,
' colorado ' : ' DEN ' ,
' detroit ' : ' DET ' ,
' losangeles ' : ' LA ' ,
' miami ' : ' MIA ' ,
' minnesota ' : ' MIN ' ,
' newyork ' : ' NY ' ,
' philadelphia ' : ' PHI ' ,
' pittsburgh ' : ' PIT ' ,
' sacramento ' : ' SAC ' ,
' sanfrancisco ' : ' SF ' ,
' texas ' : ' DAL ' ,
}
_LOCALE_RE = ' | ' . join ( map ( re . escape , _LOCALES ) )
_ANVACK = ' 5VD6Eyd6djewbCmNwBFnsJj17YAvGRwl '
def _get_item ( self , webpage , display_id ) :
return traverse_obj ( self . _search_json (
r ' CBSNEWS \ .defaultPayload \ s*= ' , webpage , ' payload ' , display_id ,
default = { } ) , ( ' items ' , 0 , { dict } ) ) or { }
def _get_video_url ( self , item ) :
return traverse_obj ( item , ' video ' , ' video2 ' , expected_type = url_or_none )
def _extract_playlist ( self , webpage , playlist_id ) :
entries = [ self . url_result ( embed_url , CBSNewsEmbedIE ) for embed_url in re . findall (
r ' <iframe[^>]+data-src= " (https?://(?:www \ .)?cbsnews \ .com/embed/video/[^#]*#[^ " ]+) " ' , webpage ) ]
if entries :
return self . playlist_result (
entries , playlist_id , self . _html_search_meta ( [ ' og:title ' , ' twitter:title ' ] , webpage ) ,
self . _html_search_meta ( [ ' og:description ' , ' twitter:description ' , ' description ' ] , webpage ) )
def _extract_video ( self , item , video_url , video_id ) :
if mimetype2ext ( item . get ( ' format ' ) , default = determine_ext ( video_url ) ) == ' mp4 ' :
formats = [ { ' url ' : video_url , ' ext ' : ' mp4 ' } ]
else :
manifest = self . _download_webpage ( video_url , video_id , note = ' Downloading m3u8 information ' )
anvato_id = self . _search_regex ( r ' anvato-( \ d+) ' , manifest , ' Anvato ID ' , default = None )
# Prefer Anvato if available; cbsnews.com m3u8 formats are re-encoded from Anvato source
if anvato_id :
return self . url_result (
smuggle_url ( f ' anvato: { self . _ANVACK } : { anvato_id } ' , { ' token ' : ' default ' } ) ,
AnvatoIE , url_transparent = True , _old_archive_ids = [ make_archive_id ( self , anvato_id ) ] )
formats , _ = self . _parse_m3u8_formats_and_subtitles (
manifest , video_url , ' mp4 ' , m3u8_id = ' hls ' , video_id = video_id )
def get_subtitles ( subs_url ) :
return {
' en ' : [ {
' url ' : subs_url ,
' ext ' : ' dfxp ' , # TTAF1
} ] ,
} if url_or_none ( subs_url ) else None
episode_meta = traverse_obj ( item , {
' season_number ' : ( ' season ' , { int_or_none } ) ,
' episode_number ' : ( ' episode ' , { int_or_none } ) ,
} ) if item . get ( ' isFullEpisode ' ) else { }
return {
' id ' : video_id ,
' formats ' : formats ,
* * traverse_obj ( item , {
' title ' : ( None , ( ' fulltitle ' , ' title ' ) ) ,
' description ' : ' dek ' ,
' timestamp ' : ( ' timestamp ' , { lambda x : float_or_none ( x , 1000 ) } ) ,
' duration ' : ( ' duration ' , { float_or_none } ) ,
' subtitles ' : ( ' captions ' , { get_subtitles } ) ,
' thumbnail ' : ( ' images ' , ( ' hd ' , ' sd ' ) , { url_or_none } ) ,
' is_live ' : ( ' type ' , { lambda x : x == ' live ' } ) ,
} , get_all = False ) ,
* * episode_meta ,
}
class CBSNewsEmbedIE ( CBSNewsBaseIE ) :
IE_NAME = ' cbsnews:embed '
_VALID_URL = r ' https?://(?:www \ .)?cbsnews \ .com/embed/video[^#]*#(?P<id>.+) '
_TESTS = [ {
' url ' : ' https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi %2F %2F vuJNkNhmHdGxgM2NL57vjd6zt % 2B8PngdN %2F yg79qeGvhzN %2F LGrS %2F % 2BuBLB531V28 % 2B % 2BO7Qg7 %2F y97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf %2F 89NM9Hi9fXifhpr8sr %2F lP848tn % 2BTdXycX25zh4cdX %2F vHl6PmmPqnWQv9w8Ed % 2B9GjYRim07bFEqdG % 2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k % 2By91mjspGsJ % 2BwVae4 % 2BsvdnaO1p73HkXs %2F VisUDTGm7R8IcdnOROeq % 2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC %2F dK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd %2F LewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf %2F hbbpzs0IB3vb8KkyzJQ % 2BywOAgCrMpgRrz % 2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu % 2BBUs9bam %2F ds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4 % 2B1u867cY5u4344n %2F xXjCqAjucdTHgLKojNKmSfO8KRsOFY %2F zKEYCKEJBzv90QA9nfm9gL % 2BHulaFqUkz9ULUYxl62B3U %2F RVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB %2F hKfwCKOzEk % 2B4k6rWd4uuT %2F wTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp %2F 3GPcXn0VXHdSS029 %2F j99UC %2F wJUvyKQ %2F zKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe %2F Q %2F BT % 2BMpf8d5GN02Ox %2F fiGsl7TA7POu1xZ5 % 2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y % 2Bix6609weFtey4ESp60WtyeWMy % 2BsmBuhsoKIyuoT % 2Bq2R %2F rW5qi3g %2F vzS2j40DoixDP8 % 2BKP0yUdpXJ4l6Vla % 2Bg9vce % 2BC4yM5YlUcbA %2F 0jLKdpmTwvsdN5z88nAIe08 %2F 0HgxeG1iv % 2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y %2F Ra3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk % 2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2 %2F kOE833sfeG %2F f5cfHtEFNXgYes0 %2F Xj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02 % 2ByXsCTk % 2B9zhzswIN04YueNSK % 2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690 %2F 0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp %2F jwKh4wMV %2F V1yx3emLoR0MwbG4K %2F % 2BZgVep3PnzXGDHZ6a3i %2F k % 2BJrONDN13 % 2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro %2F E6JfE6d5RydbiR76k % 2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL % 2BwgboUVtUFRI %2F LemgTpdtF59ii7pab08kuPcfWzs0l %2F RI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl %2F imXI599Ev7fWjoPD78A ' ,
' only_matching ' : True ,
' info_dict ' : {
' id ' : ' 6ZP4cXvo9FaX3VLH7MF4CgY30JFpY_GA ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Cops investigate gorilla incident at Cincinnati Zoo ' ,
' description ' : ' md5:fee7441ab8aaeb3c693482394738102b ' ,
' duration ' : 350 ,
' timestamp ' : 1464719713 ,
' upload_date ' : ' 20160531 ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
} ,
' params ' : { ' skip_download ' : ' m3u8 ' } ,
} ]
def _real_extract ( self , url ) :
item = self . _parse_json ( zlib . decompress ( compat_b64decode (
compat_urllib_parse_unquote ( self . _match_id ( url ) ) ) ,
- zlib . MAX_WBITS ) . decode ( ' utf-8 ' ) , None ) [ ' video ' ] [ ' items ' ] [ 0 ]
return self . _extract_video_info ( item [ ' mpxRefId ' ] , ' cbsnews ' )
item = traverse_obj ( self . _parse_json ( zlib . decompress ( base64 . b64decode (
urllib . parse . unquote ( self . _match_id ( url ) ) ) ,
- zlib . MAX_WBITS ) . decode ( ) , None ) , ( ' video ' , ' items ' , 0 , { dict } ) ) or { }
video_id = item [ ' mpxRefId ' ]
video_url = self . _get_video_url ( item )
if not video_url :
# Old embeds redirect user to ParamountPlus but most links are 404
pplus_url = f ' https://www.paramountplus.com/shows/video/ { video_id } '
try :
self . _request_webpage ( HEADRequest ( pplus_url ) , video_id )
return self . url_result ( pplus_url , ParamountPlusIE )
except ExtractorError :
self . raise_no_formats ( ' This video is no longer available ' , True , video_id )
class CBSNewsIE ( CBSIE ) : # XXX: Do not subclass from concrete IE
return self . _extract_video ( item , video_url , video_id )
class CBSNewsIE ( CBSNewsBaseIE ) :
IE_NAME = ' cbsnews '
IE_DESC = ' CBS News '
_VALID_URL = r ' https?://(?:www \ .)?cbsnews \ .com/(?:news|video)/(?P<id>[ \ da-z_-]+) '
_VALID_URL = r ' https?://(?:www \ .)?cbsnews \ .com/(?:news|video)/(?P<id>[ \ w -]+)'
_TESTS = [
{
@ -47,10 +164,7 @@ class CBSNewsIE(CBSIE): # XXX: Do not subclass from concrete IE
' timestamp ' : 1476046464 ,
' upload_date ' : ' 20161009 ' ,
} ,
' params ' : {
# rtmp download
' skip_download ' : True ,
} ,
' skip ' : ' This video is no longer available ' ,
} ,
{
' url ' : ' https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/ ' ,
@ -61,48 +175,234 @@ class CBSNewsIE(CBSIE): # XXX: Do not subclass from concrete IE
' description ' : ' md5:4a6983e480542d8b333a947bfc64ddc7 ' ,
' upload_date ' : ' 20140404 ' ,
' timestamp ' : 1396650660 ,
' uploader ' : ' CBSI-NEW ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' duration ' : 205 ,
' subtitles ' : {
' en ' : [ {
' ext ' : ' ttml ' ,
' ext ' : ' dfxp ' ,
} ] ,
} ,
} ,
' params ' : {
# m3u8 download
' skip_download ' : True ,
' skip_download ' : ' m3u8 ' ,
} ,
} ,
{
# 48 hours
' url ' : ' http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/ ' ,
' info_dict ' : {
' id ' : ' maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved ' ,
' title ' : ' Cold as Ice ' ,
' description ' : ' Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph? ' ,
} ,
' playlist_mincount ' : 7 ,
} ,
{
' url ' : ' https://www.cbsnews.com/video/032823-cbs-evening-news/ ' ,
' info_dict ' : {
' id ' : ' _2wuO7hD9LwtyM_TwSnVwnKp6kxlcXgE ' ,
' ext ' : ' mp4 ' ,
' title ' : ' CBS Evening News, March 28, 2023 ' ,
' description ' : ' md5:db20615aae54adc1d55a1fd69dc75d13 ' ,
' duration ' : 1189 ,
' timestamp ' : 1680042600 ,
' upload_date ' : ' 20230328 ' ,
' season ' : ' Season 2023 ' ,
' season_number ' : 2023 ,
' episode ' : ' Episode 83 ' ,
' episode_number ' : 83 ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
} ,
' params ' : {
' skip_download ' : ' m3u8 ' ,
} ,
} ,
]
def _real_extract ( self , url ) :
display_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , display_id )
playlist = self . _extract_playlist ( webpage , display_id )
if playlist :
return playlist
item = self . _get_item ( webpage , display_id )
video_id = item . get ( ' mpxRefId ' ) or display_id
video_url = self . _get_video_url ( item )
if not video_url :
self . raise_no_formats ( ' No video content was found ' , expected = True , video_id = video_id )
return self . _extract_video ( item , video_url , video_id )
class CBSLocalBaseIE ( CBSNewsBaseIE ) :
def _real_extract ( self , url ) :
display_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , display_id )
entries = [ ]
for embed_url in re . findall ( r ' <iframe[^>]+data-src= " (https?://(?:www \ .)?cbsnews \ .com/embed/video/[^#]*#[^ " ]+) " ' , webpage ) :
entries . append ( self . url_result ( embed_url , CBSNewsEmbedIE . ie_key ( ) ) )
if entries :
return self . playlist_result (
entries , playlist_title = self . _html_search_meta ( [ ' og:title ' , ' twitter:title ' ] , webpage ) ,
playlist_description = self . _html_search_meta ( [ ' og:description ' , ' twitter:description ' , ' description ' ] , webpage ) )
item = self . _get_item ( webpage , display_id )
video_id = item . get ( ' mpxRefId ' ) or display_id
anvato_id = None
video_url = self . _get_video_url ( item )
if not video_url :
anv_params = self . _search_regex (
r ' <iframe[^>]+ \ bdata-src= " https?://w3 \ .mp \ .lura \ .live/player/prod/v3/anvload \ .html \ ?key=([^ " ]+) " ' ,
webpage , ' Anvato URL ' , default = None )
if not anv_params :
playlist = self . _extract_playlist ( webpage , display_id )
if playlist :
return playlist
self . raise_no_formats ( ' No video content was found ' , expected = True , video_id = video_id )
anv_data = self . _parse_json ( base64 . urlsafe_b64decode ( f ' { anv_params } === ' ) . decode ( ) , video_id )
anvato_id = anv_data [ ' v ' ]
return self . url_result (
smuggle_url ( f ' anvato: { anv_data . get ( " anvack " ) or self . _ANVACK } : { anvato_id } ' , {
' token ' : anv_data . get ( ' token ' ) or ' default ' ,
} ) , AnvatoIE , url_transparent = True , _old_archive_ids = [ make_archive_id ( self , anvato_id ) ] )
return self . _extract_video ( item , video_url , video_id )
item = self . _parse_json ( self . _html_search_regex (
r ' CBSNEWS \ .defaultPayload \ s*= \ s*( { .+}) ' ,
webpage , ' video JSON info ' ) , display_id ) [ ' items ' ] [ 0 ]
return self . _extract_video_info ( item [ ' mpxRefId ' ] , ' cbsnews ' )
class CBSLocalIE ( CBSLocalBaseIE ) :
_VALID_URL = rf ' https?://(?:www \ .)?cbsnews \ .com/(?: { CBSNewsBaseIE . _LOCALE_RE } )/(?:live/)?video/(?P<id>[ \ w-]+) '
_TESTS = [ {
# Anvato video via defaultPayload JSON
' url ' : ' https://www.cbsnews.com/newyork/video/1st-cannabis-dispensary-opens-in-queens/ ' ,
' info_dict ' : {
' id ' : ' 6376747 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 1st cannabis dispensary opens in Queens ' ,
' description ' : ' The dispensary is women-owned and located in Jamaica. ' ,
' uploader ' : ' CBS ' ,
' duration ' : 20 ,
' timestamp ' : 1680193657 ,
' upload_date ' : ' 20230330 ' ,
' categories ' : [ ' Stations \\ Spoken Word \\ WCBSTV ' , ' Content \\ Google ' , ' Content \\ News ' , ' Content \\ News \\ Local News ' ] ,
' tags ' : ' count:11 ' ,
' thumbnail ' : ' re:^https?://.* ' ,
' _old_archive_ids ' : [ ' cbslocal 6376747 ' ] ,
} ,
' params ' : { ' skip_download ' : ' m3u8 ' } ,
} , {
# cbsnews.com video via defaultPayload JSON
' url ' : ' https://www.cbsnews.com/newyork/live/video/20230330171655-the-city-is-sounding-the-alarm-on-dangerous-social-media-challenges/ ' ,
' info_dict ' : {
' id ' : ' sJqfw7YvgSC6ant2zVmzt3y1jYKoL5J3 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' the city is sounding the alarm on dangerous social media challenges ' ,
' description ' : ' md5:8eccc9b1b73be5138a52e9c4350d2cd6 ' ,
' thumbnail ' : ' https://images-cbsn.cbsnews.com/prod/2023/03/30/story_22509622_1680196925.jpg ' ,
' duration ' : 41.0 ,
' timestamp ' : 1680196615 ,
' upload_date ' : ' 20230330 ' ,
} ,
' params ' : { ' skip_download ' : ' m3u8 ' } ,
} ]
class CBSLocalArticleIE ( CBSLocalBaseIE ) :
_VALID_URL = rf ' https?://(?:www \ .)?cbsnews \ .com/(?: { CBSNewsBaseIE . _LOCALE_RE } )/news/(?P<id>[ \ w-]+) '
_TESTS = [ {
# Anvato video via iframe embed
' url ' : ' https://www.cbsnews.com/newyork/news/mta-station-agents-leaving-their-booths-to-provide-more-direct-customer-service/ ' ,
' playlist_count ' : 2 ,
' info_dict ' : {
' id ' : ' mta-station-agents-leaving-their-booths-to-provide-more-direct-customer-service ' ,
' title ' : ' MTA station agents begin leaving their booths to provide more direct customer service ' ,
' description ' : ' The more than 2,200 agents will provide face-to-face customer service to passengers. ' ,
} ,
} , {
' url ' : ' https://www.cbsnews.com/losangeles/news/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis/ ' ,
' md5 ' : ' f0ee3081e3843f575fccef901199b212 ' ,
' info_dict ' : {
' id ' : ' 3401037 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Safety Advocates Say Fatal Car Seat Failures Are \' Public Health Crisis \' ' ,
' thumbnail ' : ' re:^https?://.* ' ,
' timestamp ' : 1463440500 ,
' upload_date ' : ' 20160516 ' ,
} ,
' skip ' : ' Video has been removed ' ,
} ]
class CBSNewsLiveBaseIE ( CBSNewsBaseIE ) :
def _get_id ( self , url ) :
raise NotImplementedError ( ' This method must be implemented by subclasses ' )
def _real_extract ( self , url ) :
video_id = self . _get_id ( url )
if not video_id :
raise ExtractorError ( ' Livestream is not available ' , expected = True )
data = traverse_obj ( self . _download_json (
' https://feeds-cbsn.cbsnews.com/2.0/rundown/ ' , video_id , query = {
' partner ' : ' cbsnsite ' ,
' edition ' : video_id ,
' type ' : ' live ' ,
} ) , ( ' navigation ' , ' data ' , 0 , { dict } ) )
video_url = traverse_obj ( data , ( ( ' videoUrlDAI ' , ( ' videoUrl ' , ' base ' ) ) , { url_or_none } ) , get_all = False )
if not video_url :
raise UserNotLive ( video_id = video_id )
formats , subtitles = self . _extract_m3u8_formats_and_subtitles ( video_url , video_id , ' mp4 ' , m3u8_id = ' hls ' )
return {
' id ' : video_id ,
' formats ' : formats ,
' subtitles ' : subtitles ,
' is_live ' : True ,
* * traverse_obj ( data , {
' title ' : ' headline ' ,
' description ' : ' rundown_slug ' ,
' thumbnail ' : ( ' images ' , ' thumbnail_url_hd ' , { url_or_none } ) ,
} ) ,
}
class CBSLocalLiveIE ( CBSNewsLiveBaseIE ) :
_VALID_URL = rf ' https?://(?:www \ .)?cbsnews \ .com/(?P<id> { CBSNewsBaseIE . _LOCALE_RE } )/live/?(?:[?#]|$) '
_TESTS = [ {
' url ' : ' https://www.cbsnews.com/losangeles/live/ ' ,
' info_dict ' : {
' id ' : ' CBSN-LA ' ,
' ext ' : ' mp4 ' ,
' title ' : str ,
' description ' : r ' re:KCBS/CBSN_LA.CRISPIN. \ w+.RUNDOWN \ w+ \ w+ ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' live_status ' : ' is_live ' ,
} ,
' params ' : { ' skip_download ' : ' m3u8 ' } ,
} ]
def _get_id ( self , url ) :
return format_field ( self . _LOCALES , self . _match_id ( url ) , ' CBSN- %s ' )
class CBSNewsLiveIE ( CBSNewsLiveBaseIE ) :
IE_NAME = ' cbsnews:live '
IE_DESC = ' CBS News Livestream '
_VALID_URL = r ' https?://(?:www \ .)?cbsnews \ .com/live/?(?:[?#]|$) '
_TESTS = [ {
' url ' : ' https://www.cbsnews.com/live/ ' ,
' info_dict ' : {
' id ' : ' CBSN-US ' ,
' ext ' : ' mp4 ' ,
' title ' : str ,
' description ' : r ' re: \ w+ \ w+ CRISPIN RUNDOWN ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' live_status ' : ' is_live ' ,
} ,
' params ' : { ' skip_download ' : ' m3u8 ' } ,
} ]
def _get_id ( self , url ) :
return ' CBSN-US '
class CBSNewsLiveVideoIE ( InfoExtractor ) :
@ -111,7 +411,7 @@ class CBSNewsLiveVideoIE(InfoExtractor):
_VALID_URL = r ' https?://(?:www \ .)?cbsnews \ .com/live/video/(?P<id>[^/?#]+) '
# Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples
_TEST = {
_TEST S = [ {
' url ' : ' http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/ ' ,
' info_dict ' : {
' id ' : ' clinton-sanders-prepare-to-face-off-in-nh ' ,
@ -120,7 +420,7 @@ class CBSNewsLiveVideoIE(InfoExtractor):
' duration ' : 334 ,
} ,
' skip ' : ' Video gone ' ,
}
} ]
def _real_extract ( self , url ) :
display_id = self . _match_id ( url )
@ -131,13 +431,13 @@ class CBSNewsLiveVideoIE(InfoExtractor):
' dvr_slug ' : display_id ,
} )
formats = self . _extract_akamai_formats ( video_info [ ' url ' ] , display_id )
return {
' id ' : display_id ,
' display_id ' : display_id ,
' title ' : video_info [ ' headline ' ] ,
' thumbnail ' : video_info . get ( ' thumbnail_url_hd ' ) or video_info . get ( ' thumbnail_url_sd ' ) ,
' duration ' : parse_duration ( video_info . get ( ' segmentDur ' ) ) ,
' formats ' : formats ,
' formats ' : self . _extract_akamai_formats ( video_info [ ' url ' ] , display_id ) ,
* * traverse_obj ( video_info , {
' title ' : ' headline ' ,
' thumbnail ' : ( ' thumbnail_url_hd ' , { url_or_none } ) ,
' duration ' : ( ' segmentDur ' , { parse_duration } ) ,
} ) ,
}