@ -42,6 +42,7 @@ from ..utils import (
unescapeHTML ,
unified_strdate ,
unsmuggle_url ,
update_url ,
update_url_query ,
url_or_none ,
urlencode_postdata ,
@ -286,15 +287,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_YT_INITIAL_PLAYER_RESPONSE_RE = r ' ytInitialPlayerResponse \ s*= \ s*( { .+?}) \ s*; '
_YT_INITIAL_BOUNDARY_RE = r ' (?:var \ s+meta|</script| \ n) '
def _call_api ( self , ep , query , video_id , fatal = True ):
def _call_api ( self , ep , query , video_id , fatal = True , headers = None ):
data = self . _DEFAULT_API_DATA . copy ( )
data . update ( query )
real_headers = { ' content-type ' : ' application/json ' }
if headers :
real_headers . update ( headers )
return self . _download_json (
' https://www.youtube.com/youtubei/v1/ %s ' % ep , video_id = video_id ,
note = ' Downloading API JSON ' , errnote = ' Unable to download API page ' ,
data = json . dumps ( data ) . encode ( ' utf8 ' ) , fatal = fatal ,
headers = { ' content-type ' : ' application/json ' } ,
headers = real_headers ,
query = { ' key ' : ' AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8 ' } )
def _extract_yt_initial_data ( self , video_id , webpage ) :
@ -515,6 +519,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
' uploader ' : ' Philipp Hagemeister ' ,
' uploader_id ' : ' phihag ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/phihag ' ,
' channel ' : ' Philipp Hagemeister ' ,
' channel_id ' : ' UCLqxVugv74EIW3VWh2NOa3Q ' ,
' channel_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UCLqxVugv74EIW3VWh2NOa3Q ' ,
' upload_date ' : ' 20121002 ' ,
@ -524,10 +529,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
' duration ' : 10 ,
' view_count ' : int ,
' like_count ' : int ,
' dislike_count' : int ,
' thumbnail' : ' https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg ' ,
' start_time ' : 1 ,
' end_time ' : 9 ,
}
} ,
} ,
{
' url ' : ' //www.YouTube.com/watch?v=yZIXLfi8CZQ ' ,
@ -562,7 +567,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
' duration ' : 10 ,
' view_count ' : int ,
' like_count ' : int ,
' dislike_count ' : int ,
} ,
' params ' : {
' skip_download ' : True ,
@ -621,8 +625,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
' description ' : ' SUBSCRIBE: http://www.youtube.com/saturninefilms \r \n \r \n Even Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html ' ,
}
} ,
# Normal age-gate video (No vevo, embed allowed), available via embed page
# Age-gated videos
{
' note ' : ' Age-gated video (No vevo, embed allowed) ' ,
' url ' : ' https://youtube.com/watch?v=HtVdAasjOgU ' ,
' info_dict ' : {
' id ' : ' HtVdAasjOgU ' ,
@ -631,17 +636,97 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
' description ' : r ' re:(?s). { 100,}About the Game \ n.*?The Witcher 3: Wild Hunt. { 100,} ' ,
' duration ' : 142 ,
' uploader ' : ' The Witcher ' ,
' uploader_id ' : ' WitcherGame ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/WitcherGame ' ,
' upload_date ' : ' 20140605 ' ,
' thumbnail ' : ' https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg ' ,
' age_limit ' : 18 ,
' categories ' : [ ' Gaming ' ] ,
' tags ' : ' count:17 ' ,
' channel ' : ' The Witcher ' ,
' channel_url ' : ' https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg ' ,
' channel_id ' : ' UCzybXLxv08IApdjdN0mJhEg ' ,
' view_count ' : int ,
' like_count ' : int ,
} ,
} ,
{
' note ' : ' Age-gated video with embed allowed in public site ' ,
' url ' : ' https://youtube.com/watch?v=HsUATh_Nc2U ' ,
' info_dict ' : {
' id ' : ' HsUATh_Nc2U ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Godzilla 2 (Official Video) ' ,
' description ' : ' md5:bf77e03fcae5529475e500129b05668a ' ,
' duration ' : 177 ,
' uploader ' : ' FlyingKitty ' ,
' upload_date ' : ' 20200408 ' ,
' thumbnail ' : ' https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg ' ,
' age_limit ' : 18 ,
' categories ' : [ ' Entertainment ' ] ,
' tags ' : [ ' Flyingkitty ' , ' godzilla 2 ' ] ,
' channel ' : ' FlyingKitty ' ,
' channel_url ' : ' https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg ' ,
' channel_id ' : ' UCYQT13AtrJC0gsM1far_zJg ' ,
' view_count ' : int ,
' like_count ' : int ,
} ,
} ,
{
# Age-gated video only available with authentication (unavailable
# via embed page workaround)
' note ' : ' Age-gated video embedable only with clientScreen=EMBED ' ,
' url ' : ' https://youtube.com/watch?v=Tq92D6wQ1mg ' ,
' info_dict ' : {
' id ' : ' Tq92D6wQ1mg ' ,
' ext ' : ' mp4 ' ,
' title ' : ' [MMD] Adios - EVERGLOW [+Motion DL] ' ,
' description ' : ' md5:17eccca93a786d51bc67646756894066 ' ,
' duration ' : 106 ,
' uploader ' : ' Projekt Melody ' ,
' upload_date ' : ' 20191227 ' ,
' age_limit ' : 18 ,
' thumbnail ' : ' https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg ' ,
' tags ' : [ ' mmd ' , ' dance ' , ' mikumikudance ' , ' kpop ' , ' vtuber ' ] ,
' categories ' : [ ' Entertainment ' ] ,
' channel ' : ' Projekt Melody ' ,
' channel_url ' : ' https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ ' ,
' channel_id ' : ' UC1yoRdFoFJaCY-AGfD9W0wQ ' ,
' view_count ' : int ,
' like_count ' : int ,
} ,
} ,
{
' note ' : ' Non-Age-gated non-embeddable video ' ,
' url ' : ' https://youtube.com/watch?v=MeJVWBSsPAY ' ,
' info_dict ' : {
' id ' : ' MeJVWBSsPAY ' ,
' ext ' : ' mp4 ' ,
' title ' : ' OOMPH! - Such Mich Find Mich (Lyrics) ' ,
' description ' : ' Fan Video. Music & Lyrics by OOMPH!. ' ,
' duration ' : 210 ,
' uploader ' : ' Herr Lurik ' ,
' uploader_id ' : ' st3in234 ' ,
' upload_date ' : ' 20130730 ' ,
' uploader_url ' : ' http://www.youtube.com/user/st3in234 ' ,
' age_limit ' : 0 ,
' thumbnail ' : ' https://i.ytimg.com/vi/MeJVWBSsPAY/hqdefault.jpg ' ,
' tags ' : [ ' oomph ' , ' such mich find mich ' , ' lyrics ' , ' german industrial ' , ' musica industrial ' ] ,
' categories ' : [ ' Music ' ] ,
' channel ' : ' Herr Lurik ' ,
' channel_url ' : ' https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA ' ,
' channel_id ' : ' UCdR3RSDPqub28LjZx0v9-aA ' ,
' artist ' : ' OOMPH! ' ,
' view_count ' : int ,
' like_count ' : int ,
} ,
} ,
{
' note ' : ' Non-bypassable age-gated video ' ,
' url ' : ' https://youtube.com/watch?v=Cr381pDsSsA ' ,
' only_matching ' : True ,
} ,
{
' note ' : ' Age-gated video only available with authentication (not via embed workaround) ' ,
' url ' : ' XgnwCQzjau8 ' ,
' only_matching ' : True ,
' skip ' : ''' This video has been removed for violating YouTube ' s Community Guidelines ''' ,
} ,
# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
# YouTube Red ad is not captured for creator
@ -670,17 +755,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
' info_dict ' : {
' id ' : ' lqQg6PlCWgI ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Hockey - Women - GER-AUS - London 2012 Olympic Games ' ,
' description ' : r ' re:(?s)(?:.+ \ s)?HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games \ s* ' ,
' duration ' : 6085 ,
' upload_date ' : ' 20150827 ' ,
' uploader_id ' : ' olympic ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/olympic ' ,
' description ' : ' HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games ' ,
' uploader ' : ' Olympic ' ,
' title ' : ' Hockey - Women - GER-AUS - London 2012 Olympic Games ' ,
' uploader ' : r ' re:Olympics? ' ,
' age_limit ' : 0 ,
' thumbnail ' : ' https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg ' ,
' categories ' : [ ' Sports ' ] ,
' tags ' : [ ' Hockey ' , ' 2012-07-31 ' , ' 31 July 2012 ' , ' Riverbank Arena ' , ' Session ' , ' Olympics ' , ' Olympic Games ' , ' London 2012 ' , ' 2012 Summer Olympics ' , ' Summer Games ' ] ,
' channel ' : ' Olympics ' ,
' channel_url ' : ' https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q ' ,
' channel_id ' : ' UCTl3QQTvqHFjurroKxexy2Q ' ,
' view_count ' : int ,
' like_count ' : int ,
} ,
' params ' : {
' skip_download ' : ' requires avconv ' ,
}
} ,
# Non-square pixels
{
@ -1683,27 +1774,52 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
player_response = self . _call_api (
' player ' , { ' videoId ' : video_id } , video_id )
playability_status = player_response . get ( ' playabilityStatus ' ) or { }
if playability_status . get ( ' reason ' ) == ' Sign in to confirm your age ' :
video_info = self . _download_webpage (
base_url + ' get_video_info ' , video_id ,
' Refetching age-gated info webpage ' ,
' unable to download video info webpage ' , query = {
' video_id ' : video_id ,
' eurl ' : ' https://youtube.googleapis.com/v/ ' + video_id ,
' html5 ' : 1 ,
# See https://github.com/ytdl-org/youtube-dl/issues/29333#issuecomment-864049544
' c ' : ' TVHTML5 ' ,
' cver ' : ' 6.20180913 ' ,
} , fatal = False )
if video_info :
pr = self . _parse_json (
try_get (
compat_parse_qs ( video_info ) ,
lambda x : x [ ' player_response ' ] [ 0 ] , compat_str ) or ' {} ' ,
video_id , fatal = False )
if pr and isinstance ( pr , dict ) :
player_response = pr
def is_agegated ( playability ) :
if not isinstance ( playability , dict ) :
return
if playability . get ( ' desktopLegacyAgeGateReason ' ) :
return True
reasons = filter ( None , ( playability . get ( r ) for r in ( ' status ' , ' reason ' ) ) )
AGE_GATE_REASONS = (
' confirm your age ' , ' age-restricted ' , ' inappropriate ' , # reason
' age_verification_required ' , ' age_check_required ' , # status
)
return any ( expected in reason for expected in AGE_GATE_REASONS for reason in reasons )
def get_playability_status ( response ) :
return try_get ( response , lambda x : x [ ' playabilityStatus ' ] , dict ) or { }
playability_status = get_playability_status ( player_response )
if ( is_agegated ( playability_status )
and int_or_none ( self . _downloader . params . get ( ' age_limit ' ) , default = 18 ) > = 18 ) :
self . report_age_confirmation ( )
# Thanks: https://github.com/yt-dlp/yt-dlp/pull/3233
pb_context = { ' html5Preference ' : ' HTML5_PREF_WANTS ' }
query = {
' playbackContext ' : { ' contentPlaybackContext ' : { ' html5Preference ' : ' HTML5_PREF_WANTS ' } } ,
' contentCheckOk ' : True ,
' racyCheckOk ' : True ,
' context ' : {
' client ' : { ' clientName ' : ' TVHTML5_SIMPLY_EMBEDDED_PLAYER ' , ' clientVersion ' : ' 2.0 ' , ' hl ' : ' en ' , ' clientScreen ' : ' EMBED ' } ,
' thirdParty ' : { ' embedUrl ' : ' https://google.com ' } ,
} ,
' videoId ' : video_id ,
}
headers = {
' X-YouTube-Client-Name ' : ' 85 ' ,
' X-YouTube-Client-Version ' : ' 2.0 ' ,
' Origin ' : ' https://www.youtube.com '
}
video_info = self . _call_api ( ' player ' , query , video_id , fatal = False , headers = headers )
age_gate_status = get_playability_status ( video_info )
if age_gate_status . get ( ' status ' ) == ' OK ' :
player_response = video_info
playability_status = age_gate_status
trailer_video_id = try_get (
playability_status ,
@ -1932,12 +2048,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
for thumbnail in ( try_get (
container ,
lambda x : x [ ' thumbnail ' ] [ ' thumbnails ' ] , list ) or [ ] ) :
thumbnail_url = thumbnail. get ( ' url ' )
thumbnail_url = url_or_none( thumbnail. get ( ' url ' ) )
if not thumbnail_url :
continue
thumbnails . append ( {
' height ' : int_or_none ( thumbnail . get ( ' height ' ) ) ,
' url ' : thumbnail_url,
' url ' : update_url( thumbnail_url, query = None , fragment = None ) ,
' width ' : int_or_none ( thumbnail . get ( ' width ' ) ) ,
} )
if thumbnails :
@ -2142,6 +2258,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
sbr_tooltip = try_get (
vpir , lambda x : x [ ' sentimentBar ' ] [ ' sentimentBarRenderer ' ] [ ' tooltip ' ] )
if sbr_tooltip :
# however dislike_count was hidden by YT, as if there could ever be dislikable content on YT
like_count , dislike_count = sbr_tooltip . split ( ' / ' )
info . update ( {
' like_count ' : str_to_int ( like_count ) ,
@ -2411,7 +2528,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
' tags ' : list ,
' view_count ' : int ,
' like_count ' : int ,
' dislike_count ' : int ,
} ,
' params ' : {
' skip_download ' : True ,
@ -2438,7 +2554,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
' categories ' : [ ' News & Politics ' ] ,
' tags ' : list ,
' like_count ' : int ,
' dislike_count ' : int ,
} ,
' params ' : {
' skip_download ' : True ,
@ -2458,7 +2573,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
' categories ' : [ ' News & Politics ' ] ,
' tags ' : [ ' Cenk Uygur (TV Program Creator) ' , ' The Young Turks (Award-Winning Work) ' , ' Talk Show (TV Genre) ' ] ,
' like_count ' : int ,
' dislike_count ' : int ,
} ,
' params ' : {
' skip_download ' : True ,
@ -3043,8 +3157,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
def _real_extract ( self , url ) :
item_id = self . _match_id ( url )
url = compat_urlparse . urlunparse (
compat_urlparse . urlparse ( url ) . _replace ( netloc = ' www.youtube.com ' ) )
url = update_url ( url , netloc = ' www.youtube.com ' )
# Handle both video/playlist URLs
qs = parse_qs ( url )
video_id = qs . get ( ' v ' , [ None ] ) [ 0 ]
@ -3178,7 +3291,6 @@ class YoutubeYtBeIE(InfoExtractor):
' categories ' : [ ' Nonprofits & Activism ' ] ,
' tags ' : list ,
' like_count ' : int ,
' dislike_count ' : int ,
} ,
' params ' : {
' noplaylist ' : True ,