@ -3,453 +3,309 @@ from __future__ import unicode_literals
import hashlib
import itertools
import json
import re
from . common import InfoExtractor , SearchInfoExtractor
from . . compat import (
compat_str ,
compat_urllib_parse ,
compat_urlparse ,
)
from . . utils import (
clean_html ,
determine_ext ,
ExtractorError ,
extract_attributes ,
int_or_none ,
mimetype2ext ,
parse_iso8601 ,
smuggle_url ,
try_get ,
unescapeHTML ,
url_or_none ,
)
from . brightcove import (
BrightcoveLegacyIE ,
BrightcoveNewIE ,
)
from . nbc import NBCSportsVPlayerIE
from . brightcove import BrightcoveNewIE
class YahooIE ( InfoExtractor ) :
IE_DESC = ' Yahoo screen and movies '
_VALID_URL = r ' (?P< host>https?://(?:(?P<country>[a-zA-Z]{2} ) \ .)?[ \ da-zA-Z_-]+ \ .yahoo \ .com)/(?:[^/]+/)*(?:(?P<display_id>.+)?-)?(?P<id>[0-9]+)(?:-[a-z]+)?(?: \ .html)? '
_TESTS = [
{
' url ' : ' http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html ' ,
' i nfo_dict' : {
' id ' : ' 2d25e626-2378-391f-ada0-ddaf1417e588 ' ,
' ex t' : ' mp4 ' ,
' title ' : ' Julian Smith & Travis Legg W atch Julian Smith' ,
' description ' : ' Julian and Travis watch Julian Smith ' ,
' duration ' : 6863 ,
} ,
_VALID_URL = r ' (?P< url>https?://(?:(?P<country>[a-zA-Z]{2} (?:-[a-zA-Z] {2} )?|malaysia) \ .)?(?:[ \ da-zA-Z_-]+ \ .)?yahoo \ .com/(?:[^/]+/)*(?P<id>[^?&#]*-[0-9]+) \ .html) '
_TESTS = [ {
' url ' : ' http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html ' ,
' info_dict ' : {
' i d' : ' 2d25e626-2378-391f-ada0-ddaf1417e588 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Julian Smith & Travis Legg Watch Julian Smith ' ,
' description ' : ' Julian and Travis w atch Julian Smith' ,
' duration ' : 6863 ,
' timestamp ' : 1369812016 ,
' upload_date ' : ' 20130529 ' ,
} ,
{
' url ' : ' http ://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html ' ,
' md5 ' : ' 251af144a19ebc4a033e8ba91ac726bb ' ,
' info_dict ' : {
' id ' : ' d1dedf8c-d58c-38c3-8963-e899929ae0a9 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Codefellas - The Cougar Lies with Spanish Moss ' ,
' description ' : ' md5: 66b627ab0a282b26352136ca96ce73c1 ' ,
' duration ' : 1 51 ,
} ,
' skip' : ' HTTP Error 404 ' ,
} , {
' url ' : ' http s://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed ' ,
' md5 ' : ' 7993e572fac98e044588d0b5260f4352 ' ,
' info_dict ' : {
' id ' : ' 4fe78544-8d48-39d8-97cd-13f205d9fcdb ' ,
' ext ' : ' mp4 ' ,
' title ' : " Yahoo Saves ' Community ' " ,
' description ' : ' md5: 4d4145af2fd3de00cbb6c1d664105053 ' ,
' duration ' : 1 70 ,
' timestamp ' : 1406838636 ,
' upload_date' : ' 20140731 ' ,
} ,
{
' url ' : ' https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed ' ,
' md5 ' : ' 7993e572fac98e044588d0b5260f4352 ' ,
' info_dict ' : {
' id ' : ' 4fe78544-8d48-39d8-97cd-13f205d9fcdb ' ,
' ext ' : ' mp4 ' ,
' title ' : " Yahoo Saves ' Community ' " ,
' description ' : ' md5:4d4145af2fd3de00cbb6c1d664105053 ' ,
' duration ' : 170 ,
}
} , {
' url ' : ' https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html ' ,
' md5 ' : ' 0b51660361f0e27c9789e7037ef76f4b ' ,
' info_dict ' : {
' id ' : ' b3affa53-2e14-3590-852b-0e0db6cd1a58 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Cute Raccoon Freed From Drain \u00a0 Using Angle Grinder ' ,
' description ' : ' md5:f66c890e1490f4910a9953c941dee944 ' ,
' duration ' : 97 ,
' timestamp ' : 1414489862 ,
' upload_date ' : ' 20141028 ' ,
}
} , {
' url ' : ' http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html ' ,
' md5 ' : ' 88e209b417f173d86186bef6e4d1f160 ' ,
' info_dict ' : {
' id ' : ' f885cf7f-43d4-3450-9fac-46ac30ece521 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' China Moses Is Crazy About the Blues ' ,
' description ' : ' md5:9900ab8cd5808175c7b3fe55b979bed0 ' ,
' duration ' : 128 ,
' timestamp ' : 1385722202 ,
' upload_date ' : ' 20131129 ' ,
}
} , {
' url ' : ' https://www.yahoo.com/movies/v/true-story-trailer-173000497.html ' ,
' md5 ' : ' 2a9752f74cb898af5d1083ea9f661b58 ' ,
' info_dict ' : {
' id ' : ' 071c4013-ce30-3a93-a5b2-e0413cd4a9d1 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' \' True Story \' Trailer ' ,
' description ' : ' True Story ' ,
' duration ' : 150 ,
' timestamp ' : 1418919206 ,
' upload_date ' : ' 20141218 ' ,
} ,
{
' url ' : ' https://tw.news.yahoo.com/ %E 6 %95% A2 %E 5 %95% 8F %E 5 % B8 %82% E9 %95% B7 %20% E9 % BB %83% E7 % A7 %80% E9 % 9C % 9C %E 6 %89% B9 %E 8 % B3 % B4 %E 6 % B8 %85% E5 % BE % B7 %20% E9 % 9D %9E %E 5 % B8 % B8 %E 9 % AB %98% E5 %82% B2-034024051.html ' ,
' md5 ' : ' 45c024bad51e63e9b6f6fad7a43a8c23 ' ,
' info_dict ' : {
' id ' : ' cac903b3-fcf4-3c14-b632-643ab541712f ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 敢問市長/黃秀霜批賴清德「非常高傲」 ' ,
' description ' : ' 直言台南沒捷運 交通居五都之末 ' ,
' duration ' : 396 ,
} ,
} , {
' url ' : ' https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html ' ,
' only_matching ' : True ,
} , {
' note ' : ' NBC Sports embeds ' ,
' url ' : ' http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313 ' ,
' info_dict ' : {
' id ' : ' 9CsDKds0kvHI ' ,
' ext ' : ' flv ' ,
' description ' : ' md5:df390f70a9ba7c95ff1daace988f0d8d ' ,
' title ' : ' Tyler Kalinoski hits buzzer-beater to lift Davidson ' ,
' upload_date ' : ' 20150313 ' ,
' uploader ' : ' NBCU-SPORTS ' ,
' timestamp ' : 1426270238 ,
} ,
{
' url ' : ' https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html ' ,
' md5 ' : ' 71298482f7c64cbb7fa064e4553ff1c1 ' ,
' info_dict ' : {
' id ' : ' b3affa53-2e14-3590-852b-0e0db6cd1a58 ' ,
' ext ' : ' webm ' ,
' title ' : ' Cute Raccoon Freed From Drain \u00a0 Using Angle Grinder ' ,
' description ' : ' md5:f66c890e1490f4910a9953c941dee944 ' ,
' duration ' : 97 ,
}
} , {
' url ' : ' https://tw.news.yahoo.com/-100120367.html ' ,
' only_matching ' : True ,
} , {
# Query result is embedded in webpage, but explicit request to video API fails with geo restriction
' url ' : ' https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html ' ,
' md5 ' : ' 4fbafb9c9b6f07aa8f870629f6671b35 ' ,
' info_dict ' : {
' id ' : ' 1f32853c-a271-3eef-8cb6-f6d6872cb504 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Communitary - Community Episode 1: Ladders ' ,
' description ' : ' md5:8fc39608213295748e1e289807838c97 ' ,
' duration ' : 1646 ,
' timestamp ' : 1440436550 ,
' upload_date ' : ' 20150824 ' ,
' series ' : ' Communitary ' ,
' season_number ' : 6 ,
' episode_number ' : 1 ,
} ,
{
' url ' : ' https://ca.sports.yahoo.com/video/program-makes-hockey-more-affordable-013127711.html ' ,
' md5 ' : ' 57e06440778b1828a6079d2f744212c4 ' ,
' info_dict ' : {
' id ' : ' c9fa2a36-0d4d-3937-b8f6-cc0fb1881e73 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Program that makes hockey more affordable not offered in Manitoba ' ,
' description ' : ' md5:c54a609f4c078d92b74ffb9bf1f496f4 ' ,
' duration ' : 121 ,
} ,
' skip ' : ' Video gone ' ,
} , {
' url ' : ' https://ca.finance.yahoo.com/news/hackers-sony-more-trouble-well-154609075.html ' ,
' info_dict ' : {
' id ' : ' 154609075 ' ,
} ,
' playlist ' : [ {
' md5 ' : ' 000887d0dc609bc3a47c974151a40fb8 ' ,
' info_dict ' : {
' id ' : ' e624c4bc-3389-34de-9dfc-025f74943409 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' \' The Interview \' TV Spot: War ' ,
' description ' : ' The Interview ' ,
' duration ' : 30 ,
} ,
} , {
' md5 ' : ' 81bc74faf10750fe36e4542f9a184c66 ' ,
' info_dict ' : {
' id ' : ' 1fc8ada0-718e-3abe-a450-bf31f246d1a9 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' \' The Interview \' TV Spot: Guys ' ,
' description ' : ' The Interview ' ,
' duration ' : 30 ,
} ,
} ] ,
} , {
' url ' : ' http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html ' ,
' md5 ' : ' 88e209b417f173d86186bef6e4d1f160 ' ,
' info_dict ' : {
' id ' : ' f885cf7f-43d4-3450-9fac-46ac30ece521 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' China Moses Is Crazy About the Blues ' ,
' description ' : ' md5:9900ab8cd5808175c7b3fe55b979bed0 ' ,
' duration ' : 128 ,
}
} , {
' url ' : ' https://in.lifestyle.yahoo.com/video/connect-dots-dark-side-virgo-090247395.html ' ,
' md5 ' : ' d9a083ccf1379127bf25699d67e4791b ' ,
' info_dict ' : {
' id ' : ' 52aeeaa3-b3d1-30d8-9ef8-5d0cf05efb7c ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Connect the Dots: Dark Side of Virgo ' ,
' description ' : ' md5:1428185051cfd1949807ad4ff6d3686a ' ,
' duration ' : 201 ,
} ,
' skip ' : ' Domain name in.lifestyle.yahoo.com gone ' ,
} , {
' url ' : ' https://www.yahoo.com/movies/v/true-story-trailer-173000497.html ' ,
' md5 ' : ' 989396ae73d20c6f057746fb226aa215 ' ,
' info_dict ' : {
' id ' : ' 071c4013-ce30-3a93-a5b2-e0413cd4a9d1 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' \' True Story \' Trailer ' ,
' description ' : ' True Story ' ,
' duration ' : 150 ,
} ,
} , {
' url ' : ' https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html ' ,
' only_matching ' : True ,
} , {
' note ' : ' NBC Sports embeds ' ,
' url ' : ' http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313 ' ,
' info_dict ' : {
' id ' : ' 9CsDKds0kvHI ' ,
' ext ' : ' flv ' ,
' description ' : ' md5:df390f70a9ba7c95ff1daace988f0d8d ' ,
' title ' : ' Tyler Kalinoski hits buzzer-beater to lift Davidson ' ,
' upload_date ' : ' 20150313 ' ,
' uploader ' : ' NBCU-SPORTS ' ,
' timestamp ' : 1426270238 ,
}
} , {
' url ' : ' https://tw.news.yahoo.com/-100120367.html ' ,
' only_matching ' : True ,
} , {
# Query result is embedded in webpage, but explicit request to video API fails with geo restriction
' url ' : ' https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html ' ,
' md5 ' : ' 4fbafb9c9b6f07aa8f870629f6671b35 ' ,
' info_dict ' : {
' id ' : ' 1f32853c-a271-3eef-8cb6-f6d6872cb504 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Communitary - Community Episode 1: Ladders ' ,
' description ' : ' md5:8fc39608213295748e1e289807838c97 ' ,
' duration ' : 1646 ,
} ,
} , {
# it uses an alias to get the video_id
' url ' : ' https://www.yahoo.com/movies/the-stars-of-daddys-home-have-very-different-212843197.html ' ,
' info_dict ' : {
' id ' : ' 40eda9c8-8e5f-3552-8745-830f67d0c737 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Will Ferrell & Mark Wahlberg Are Pro-Spanking ' ,
' description ' : ' While they play feuding fathers in \' Daddy \' s Home, \' star Will Ferrell & Mark Wahlberg share their true feelings on parenthood. ' ,
} ,
} , {
# ytwnews://cavideo/
' url ' : ' https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html ' ,
' info_dict ' : {
' id ' : ' ba133ff2-0793-3510-b636-59dfe9ff6cff ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 單車天使 - 中文版預 ' ,
' description ' : ' 中文版預 ' ,
' timestamp ' : 1476696196 ,
' upload_date ' : ' 20161017 ' ,
} ,
{
# config['models']['applet_model']['data']['sapi'] has no query
' url ' : ' https://www.yahoo.com/music/livenation/event/galactic-2016 ' ,
' md5 ' : ' dac0c72d502bc5facda80c9e6d5c98db ' ,
' info_dict ' : {
' id ' : ' a6015640-e9e5-3efb-bb60-05589a183919 ' ,
' ext ' : ' mp4 ' ,
' description ' : ' Galactic ' ,
' title ' : ' Dolla Diva (feat. Maggie Koerner) ' ,
} ,
' skip ' : ' redirect to https://www.yahoo.com/music ' ,
' params ' : {
' skip_download ' : True ,
} ,
{
# yahoo://article/
' url ' : ' https://www.yahoo.com/movies/video/true-story-trailer-173000497.html ' ,
' info_dict ' : {
' id ' : ' 071c4013-ce30-3a93-a5b2-e0413cd4a9d1 ' ,
' ext ' : ' mp4 ' ,
' title ' : " ' True Story ' Trailer " ,
' description ' : ' True Story ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} , {
# Contains both a Yahoo hosted video and multiple Youtube embeds
' url ' : ' https://www.yahoo.com/entertainment/gwen-stefani-reveals-the-pop-hit-she-passed-on-assigns-it-to-her-voice-contestant-instead-033045672.html ' ,
' info_dict ' : {
' id ' : ' 46c5d95a-528f-3d03-b732-732fcadd51de ' ,
' title ' : ' Gwen Stefani reveals the pop hit she passed on, assigns it to her \' Voice \' contestant instead ' ,
' description ' : ' Gwen decided not to record this hit herself, but she decided it was the perfect fit for Kyndall Inskeep. ' ,
} ,
{
# ytwnews://cavideo/
' url ' : ' https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html ' ,
' playlist ' : [ {
' info_dict ' : {
' id ' : ' ba133ff2-0793-3510-b636-59dfe9ff6cff ' ,
' id ' : ' 966d4262-4fd1-3aaa-b45b-049ca6e38ba6 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 單車天使 - 中文版預 ' ,
' description ' : ' 中文版預 ' ,
' title ' : ' Gwen Stefani reveals she turned down one of Sia \' s best songs ' ,
' description ' : ' On " The Voice " Tuesday, Gwen Stefani told Taylor Swift which Sia hit was almost hers. ' ,
' timestamp ' : 1572406500 ,
' upload_date ' : ' 20191030 ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
{
# custom brightcove
' url ' : ' https://au.tv.yahoo.com/plus7/sunrise/-/watch/37083565/clown-entertainers-say-it-is-hurting-their-business/ ' ,
} , {
' info_dict ' : {
' id ' : ' 5575377707001 ' ,
' id ' : ' 352CFDOQrKg ' ,
' ext ' : ' mp4 ' ,
' title ' : " Clown entertainers say ' It ' is hurting their business " ,
' description ' : ' Stephen King s horror film has much to answer for. Jelby and Mr Loopy the Clowns join us. ' ,
' timestamp ' : 1505341164 ,
' upload_date ' : ' 20170913 ' ,
' uploader_id ' : ' 2376984109001 ' ,
} ,
' params ' : {
' skip_download ' : True ,
' title ' : ' Kyndal Inskeep " Performs the Hell Out of " Sia \' s " Elastic Heart " - The Voice Knockouts 2019 ' ,
' description ' : ' md5:35b61e94c2ae214bc965ff4245f80d11 ' ,
' uploader ' : ' The Voice ' ,
' uploader_id ' : ' NBCTheVoice ' ,
' upload_date ' : ' 20191029 ' ,
} ,
} ] ,
' params ' : {
' playlistend ' : 2 ,
} ,
{
# custom brightcove, geo-restricted to Australia, bypassable
' url ' : ' https://au.tv.yahoo.com/plus7/sunrise/-/watch/37263964/sunrise-episode-wed-27-sep/ ' ,
' only_matching ' : True ,
}
]
} , {
' url ' : ' https://malaysia.news.yahoo.com/video/bystanders-help-ontario-policeman-bust-190932818.html ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://es-us.noticias.yahoo.com/es-la-puerta-irrompible-que-110539379.html ' ,
' only_matching ' : True ,
} ]
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
page_id = mobj . group ( ' id ' )
display_id = mobj . group ( ' display_id ' ) or page_id
host = mobj . group ( ' host ' )
webpage , urlh = self . _download_webpage_handle ( url , display_id )
if ' err=404 ' in urlh . geturl ( ) :
raise ExtractorError ( ' Video gone ' , expected = True )
# Look for iframed media first
entries = [ ]
iframe_urls = re . findall ( r ' <iframe[^>]+src= " (/video/.+?- \ d+ \ .html \ ?format=embed.*?) " ' , webpage )
for idx , iframe_url in enumerate ( iframe_urls ) :
entries . append ( self . url_result ( host + iframe_url , ' Yahoo ' ) )
if entries :
return self . playlist_result ( entries , page_id )
# Look for NBCSports iframes
nbc_sports_url = NBCSportsVPlayerIE . _extract_url ( webpage )
if nbc_sports_url :
return self . url_result ( nbc_sports_url , NBCSportsVPlayerIE . ie_key ( ) )
# Look for Brightcove Legacy Studio embeds
bc_url = BrightcoveLegacyIE . _extract_brightcove_url ( webpage )
if bc_url :
return self . url_result ( bc_url , BrightcoveLegacyIE . ie_key ( ) )
def brightcove_url_result ( bc_url ) :
return self . url_result (
smuggle_url ( bc_url , { ' geo_countries ' : [ mobj . group ( ' country ' ) ] } ) ,
BrightcoveNewIE . ie_key ( ) )
# Look for Brightcove New Studio embeds
bc_url = BrightcoveNewIE . _extract_url ( self , webpage )
if bc_url :
return brightcove_url_result ( bc_url )
brightcove_iframe = self . _search_regex (
r ' (<iframe[^>]+data-video-id=[ " \' ] \ d+[^>]+>) ' , webpage ,
' brightcove iframe ' , default = None )
if brightcove_iframe :
attr = extract_attributes ( brightcove_iframe )
src = attr . get ( ' src ' )
if src :
parsed_src = compat_urlparse . urlparse ( src )
qs = compat_urlparse . parse_qs ( parsed_src . query )
account_id = qs . get ( ' accountId ' , [ ' 2376984109001 ' ] ) [ 0 ]
brightcove_id = attr . get ( ' data-video-id ' ) or qs . get ( ' videoId ' , [ None ] ) [ 0 ]
if account_id and brightcove_id :
return brightcove_url_result (
' http://players.brightcove.net/ %s /default_default/index.html?videoId= %s '
% ( account_id , brightcove_id ) )
# Query result is often embedded in webpage as JSON. Sometimes explicit requests
# to video API results in a failure with geo restriction reason therefore using
# embedded query result when present sounds reasonable.
config_json = self . _search_regex (
r ' window \ .Af \ .bootstrap \ [[^ \ ]]+ \ ] \ s*= \ s*( { .*? " applet_type " \ s*: \ s* " td-applet-videoplayer " .*?});(?:</script>|$) ' ,
webpage , ' videoplayer applet ' , default = None )
if config_json :
config = self . _parse_json ( config_json , display_id , fatal = False )
if config :
sapi = config . get ( ' models ' , { } ) . get ( ' applet_model ' , { } ) . get ( ' data ' , { } ) . get ( ' sapi ' )
if sapi and ' query ' in sapi :
info = self . _extract_info ( display_id , sapi , webpage )
self . _sort_formats ( info [ ' formats ' ] )
return info
items_json = self . _search_regex (
r ' mediaItems: ( { .*?})$ ' , webpage , ' items ' , flags = re . MULTILINE ,
default = None )
if items_json is None :
alias = self . _search_regex (
r ' " aliases " : { " video " : " (.*?) " ' , webpage , ' alias ' , default = None )
if alias is not None :
alias_info = self . _download_json (
' https://www.yahoo.com/_td/api/resource/VideoService.videos;video_aliases=[ " %s " ] ' % alias ,
display_id , ' Downloading alias info ' )
video_id = alias_info [ 0 ] [ ' id ' ]
else :
CONTENT_ID_REGEXES = [
r ' YUI \ .namespace \ ( " Media " \ ) \ .CONTENT_ID \ s*= \ s* " ([^ " ]+) " ' ,
r ' root \ .App \ .Cache \ .context \ .videoCache \ .curVideo = \ { " ([^ " ]+) " ' ,
r ' " first_videoid " \ s*: \ s* " ([^ " ]+) " ' ,
r ' %s [^}]* " ccm_id " \ s*: \ s* " ([^ " ]+) " ' % re . escape ( page_id ) ,
r ' <article[^>]data-uuid=[ " \' ]([^ " \' ]+) ' ,
r ' <meta[^<>]+yahoo://article/view \ ?.* \ buuid=([^& " \' ]+) ' ,
r ' <meta[^<>]+[ " \' ]ytwnews://cavideo/(?:[^/]+/)+([ \ da-fA-F-]+)[& " \' ] ' ,
]
video_id = self . _search_regex (
CONTENT_ID_REGEXES , webpage , ' content ID ' )
url , country , display_id = re . match ( self . _VALID_URL , url ) . groups ( )
if not country :
country = ' us '
else :
items = json . loads ( items_json )
info = items [ ' mediaItems ' ] [ ' query ' ] [ ' results ' ] [ ' mediaObj ' ] [ 0 ]
# The 'meta' field is not always in the video webpage, we request it
# from another page
video_id = info [ ' id ' ]
return self . _get_info ( video_id , display_id , webpage )
def _extract_info ( self , display_id , query , webpage ) :
info = query [ ' query ' ] [ ' results ' ] [ ' mediaObj ' ] [ 0 ]
meta = info . get ( ' meta ' )
video_id = info . get ( ' id ' )
if not meta :
msg = info [ ' status ' ] . get ( ' msg ' )
if msg :
raise ExtractorError (
' %s returned error: %s ' % ( self . IE_NAME , msg ) , expected = True )
raise ExtractorError ( ' Unable to extract media object meta ' )
country = country . split ( ' - ' ) [ 0 ]
api_base = ' https:// %s .yahoo.com/_td/api/resource/ ' % country
for i , uuid in enumerate ( [ ' url= ' + url , ' ymedia-alias= ' + display_id ] ) :
content = self . _download_json (
api_base + ' content;getDetailView=true;uuids=[ " %s " ] ' % uuid ,
display_id , ' Downloading content JSON metadata ' , fatal = i == 1 )
if content :
item = content [ ' items ' ] [ 0 ]
break
if item . get ( ' type ' ) != ' video ' :
entries = [ ]
cover = item . get ( ' cover ' ) or { }
if cover . get ( ' type ' ) == ' yvideo ' :
cover_url = cover . get ( ' url ' )
if cover_url :
entries . append ( self . url_result (
cover_url , ' Yahoo ' , cover . get ( ' uuid ' ) ) )
for e in item . get ( ' body ' , [ ] ) :
if e . get ( ' type ' ) == ' videoIframe ' :
iframe_url = e . get ( ' url ' )
if not iframe_url :
continue
entries . append ( self . url_result ( iframe_url ) )
return self . playlist_result (
entries , item . get ( ' uuid ' ) ,
item . get ( ' title ' ) , item . get ( ' summary ' ) )
video_id = item [ ' uuid ' ]
video = self . _download_json (
api_base + ' VideoService.videos;view=full;video_ids=[ " %s " ] ' % video_id ,
video_id , ' Downloading video JSON metadata ' ) [ 0 ]
title = video [ ' title ' ]
if country == ' malaysia ' :
country = ' my '
is_live = video . get ( ' live_state ' ) == ' live '
fmts = ( ' m3u8 ' , ) if is_live else ( ' web ' , ' mp4 ' )
urls = [ ]
formats = [ ]
for s in info [ ' streams ' ] :
tbr = int_or_none ( s . get ( ' bitrate ' ) )
format_info = {
' width ' : int_or_none ( s . get ( ' width ' ) ) ,
' height ' : int_or_none ( s . get ( ' height ' ) ) ,
' tbr ' : tbr ,
}
host = s [ ' host ' ]
path = s [ ' path ' ]
if host . startswith ( ' rtmp ' ) :
fmt = ' rtmp '
format_info . update ( {
' url ' : host ,
' play_path ' : path ,
' ext ' : ' flv ' ,
} )
else :
if s . get ( ' format ' ) == ' m3u8_playlist ' :
fmt = ' hls '
format_info . update ( {
' protocol ' : ' m3u8_native ' ,
' ext ' : ' mp4 ' ,
} )
else :
fmt = format_info [ ' ext ' ] = determine_ext ( path )
format_url = compat_urlparse . urljoin ( host , path )
format_info [ ' url ' ] = format_url
format_info [ ' format_id ' ] = fmt + ( ' - %d ' % tbr if tbr else ' ' )
formats . append ( format_info )
closed_captions = self . _html_search_regex (
r ' " closedcaptions " :( \ [[^ \ ]]+ \ ]) ' , webpage , ' closed captions ' ,
default = ' [] ' )
cc_json = self . _parse_json ( closed_captions , video_id , fatal = False )
subtitles = { }
if cc_json :
for closed_caption in cc_json :
lang = closed_caption [ ' lang ' ]
if lang not in subtitles :
subtitles [ lang ] = [ ]
subtitles [ lang ] . append ( {
' url ' : closed_caption [ ' url ' ] ,
' ext ' : mimetype2ext ( closed_caption [ ' content_type ' ] ) ,
for fmt in fmts :
media_obj = self . _download_json (
' https://video-api.yql.yahoo.com/v1/video/sapi/streams/ ' + video_id ,
video_id , ' Downloading %s JSON metadata ' % fmt ,
headers = self . geo_verification_headers ( ) , query = {
' format ' : fmt ,
' region ' : country . upper ( ) ,
} ) [ ' query ' ] [ ' results ' ] [ ' mediaObj ' ] [ 0 ]
msg = media_obj . get ( ' status ' , { } ) . get ( ' msg ' )
for s in media_obj . get ( ' streams ' , [ ] ) :
host = s . get ( ' host ' )
path = s . get ( ' path ' )
if not host or not path :
continue
s_url = host + path
if s . get ( ' format ' ) == ' m3u8 ' :
formats . extend ( self . _extract_m3u8_formats (
s_url , video_id , ' mp4 ' , m3u8_id = ' hls ' , fatal = False ) )
continue
tbr = int_or_none ( s . get ( ' bitrate ' ) )
formats . append ( {
' url ' : s_url ,
' format_id ' : fmt + ( ' - %d ' % tbr if tbr else ' ' ) ,
' width ' : int_or_none ( s . get ( ' width ' ) ) ,
' height ' : int_or_none ( s . get ( ' height ' ) ) ,
' tbr ' : tbr ,
' fps ' : int_or_none ( s . get ( ' framerate ' ) ) ,
} )
for cc in media_obj . get ( ' closedcaptions ' , [ ] ) :
cc_url = cc . get ( ' url ' )
if not cc_url or cc_url in urls :
continue
urls . append ( cc_url )
subtitles . setdefault ( cc . get ( ' lang ' ) or ' en-US ' , [ ] ) . append ( {
' url ' : cc_url ,
' ext ' : mimetype2ext ( cc . get ( ' content_type ' ) ) ,
} )
streaming_url = video . get ( ' streaming_url ' )
if streaming_url and not is_live :
formats . extend ( self . _extract_m3u8_formats (
streaming_url , video_id , ' mp4 ' ,
' m3u8_native ' , m3u8_id = ' hls ' , fatal = False ) )
if not formats and msg == ' geo restricted ' :
self . raise_geo_restricted ( )
self . _sort_formats ( formats )
thumbnails = [ ]
for thumb in video . get ( ' thumbnails ' , [ ] ) :
thumb_url = thumb . get ( ' url ' )
if not thumb_url :
continue
thumbnails . append ( {
' id ' : thumb . get ( ' tag ' ) ,
' url ' : thumb . get ( ' url ' ) ,
' width ' : int_or_none ( thumb . get ( ' width ' ) ) ,
' height ' : int_or_none ( thumb . get ( ' height ' ) ) ,
} )
series_info = video . get ( ' series_info ' ) or { }
return {
' id ' : video_id ,
' display_id ' : display_id ,
' title ' : unescapeHTML ( meta [ ' title ' ] ) ,
' title ' : self . _live_title ( title ) if is_live else title ,
' formats ' : formats ,
' description ' : clean_html ( meta [ ' description ' ] ) ,
' thumbnail ' : meta [ ' thumbnail ' ] if meta . get ( ' thumbnail ' ) else self . _og_search_thumbnail ( webpage ) ,
' duration ' : int_or_none ( meta . get ( ' duration ' ) ) ,
' display_id ' : display_id ,
' thumbnails ' : thumbnails ,
' description ' : clean_html ( video . get ( ' description ' ) ) ,
' timestamp ' : parse_iso8601 ( video . get ( ' publish_time ' ) ) ,
' subtitles ' : subtitles ,
' duration ' : int_or_none ( video . get ( ' duration ' ) ) ,
' view_count ' : int_or_none ( video . get ( ' view_count ' ) ) ,
' is_live ' : is_live ,
' series ' : video . get ( ' show_name ' ) ,
' season_number ' : int_or_none ( series_info . get ( ' season_number ' ) ) ,
' episode_number ' : int_or_none ( series_info . get ( ' episode_number ' ) ) ,
}
def _get_info ( self , video_id , display_id , webpage ) :
region = self . _search_regex (
r ' \\ ? " region \\ ? " \ s*: \ s* \\ ? " ([^ " ]+?) \\ ? " ' ,
webpage , ' region ' , fatal = False , default = ' US ' ) . upper ( )
formats = [ ]
info = { }
for fmt in ( ' webm ' , ' mp4 ' ) :
query_result = self . _download_json (
' https://video.media.yql.yahoo.com/v1/video/sapi/streams/ ' + video_id ,
display_id , ' Downloading %s video info ' % fmt , query = {
' protocol ' : ' http ' ,
' region ' : region ,
' format ' : fmt ,
} )
info = self . _extract_info ( display_id , query_result , webpage )
formats . extend ( info [ ' formats ' ] )
formats . extend ( self . _extract_m3u8_formats (
' http://video.media.yql.yahoo.com/v1/hls/ %s ?region= %s ' % ( video_id , region ) ,
video_id , ' mp4 ' , ' m3u8_native ' , m3u8_id = ' hls ' , fatal = False ) )
self . _sort_formats ( formats )
info [ ' formats ' ] = formats
return info
class YahooSearchIE ( SearchInfoExtractor ) :
IE_DESC = ' Yahoo screen search '