@ -279,6 +279,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return super ( YoutubeBaseInfoExtractor , self ) . _download_webpage_handle (
return super ( YoutubeBaseInfoExtractor , self ) . _download_webpage_handle (
* args , * * compat_kwargs ( kwargs ) )
* args , * * compat_kwargs ( kwargs ) )
def _get_yt_initial_data ( self , video_id , webpage ) :
config = self . _search_regex (
( r ' window \ [ " ytInitialData " \ ] \ s*= \ s*(.*?)(?<=}); ' ,
r ' var \ s+ytInitialData \ s*= \ s*(.*?)(?<=}); ' ) ,
webpage , ' ytInitialData ' , default = None )
if config :
return self . _parse_json (
uppercase_escape ( config ) , video_id , fatal = False )
def _real_initialize ( self ) :
def _real_initialize ( self ) :
if self . _downloader is None :
if self . _downloader is None :
return
return
@ -1390,6 +1399,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# https://github.com/ytdl-org/youtube-dl/pull/7599)
# https://github.com/ytdl-org/youtube-dl/pull/7599)
r ' ;ytplayer \ .config \ s*= \ s*( { .+?});ytplayer ' ,
r ' ;ytplayer \ .config \ s*= \ s*( { .+?});ytplayer ' ,
r ' ;ytplayer \ .config \ s*= \ s*( { .+?}); ' ,
r ' ;ytplayer \ .config \ s*= \ s*( { .+?}); ' ,
r ' ytInitialPlayerResponse \ s*= \ s*( { .+?});var meta '
)
)
config = self . _search_regex (
config = self . _search_regex (
patterns , webpage , ' ytplayer.config ' , default = None )
patterns , webpage , ' ytplayer.config ' , default = None )
@ -1397,15 +1407,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self . _parse_json (
return self . _parse_json (
uppercase_escape ( config ) , video_id , fatal = False )
uppercase_escape ( config ) , video_id , fatal = False )
def _get_yt_initial_data ( self , video_id , webpage ) :
config = self . _search_regex (
( r ' window \ [ " ytInitialData " \ ] \ s*= \ s*(.*?)(?<=}); ' ,
r ' var \ s+ytInitialData \ s*= \ s*(.*?)(?<=}); ' ) ,
webpage , ' ytInitialData ' , default = None )
if config :
return self . _parse_json (
uppercase_escape ( config ) , video_id , fatal = False )
def _get_music_metadata_from_yt_initial ( self , yt_initial ) :
def _get_music_metadata_from_yt_initial ( self , yt_initial ) :
music_metadata = [ ]
music_metadata = [ ]
key_map = {
key_map = {
@ -1454,10 +1455,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self . _downloader . report_warning ( err_msg )
self . _downloader . report_warning ( err_msg )
return { }
return { }
try :
try :
if " args " in player_config and " ttsurl " in player_config [ " args " ] :
args = player_config [ ' args ' ]
args = player_config [ ' args ' ]
caption_url = args . get ( ' ttsurl ' )
caption_url = args [ ' ttsurl ' ]
if caption_url :
timestamp = args [ ' timestamp ' ]
timestamp = args [ ' timestamp ' ]
# We get the available subtitles
# We get the available subtitles
list_params = compat_urllib_parse_urlencode ( {
list_params = compat_urllib_parse_urlencode ( {
' type ' : ' list ' ,
' type ' : ' list ' ,
@ -1513,11 +1515,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return captions
return captions
# New captions format as of 22.06.2017
# New captions format as of 22.06.2017
player_response = args . get ( ' player_response ' )
if " args " in player_config :
if player_response and isinstance ( player_response , compat_str ) :
player_response = player_config [ " args " ] . get ( ' player_response ' )
else :
# New player system (ytInitialPlayerResponse) as of October 2020
player_response = player_config
if player_response :
if isinstance ( player_response , compat_str ) :
player_response = self . _parse_json (
player_response = self . _parse_json (
player_response , video_id , fatal = False )
player_response , video_id , fatal = False )
if player_response :
renderer = player_response [ ' captions ' ] [ ' playerCaptionsTracklistRenderer ' ]
renderer = player_response [ ' captions ' ] [ ' playerCaptionsTracklistRenderer ' ]
caption_tracks = renderer [ ' captionTracks ' ]
caption_tracks = renderer [ ' captionTracks ' ]
for caption_track in caption_tracks :
for caption_track in caption_tracks :
@ -1534,6 +1542,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self . _downloader . report_warning ( " Couldn ' t find automatic captions for %s " % video_id )
self . _downloader . report_warning ( " Couldn ' t find automatic captions for %s " % video_id )
return { }
return { }
if " args " in player_config :
args = player_config [ " args " ]
# Some videos don't provide ttsurl but rather caption_tracks and
# Some videos don't provide ttsurl but rather caption_tracks and
# caption_translation_languages (e.g. 20LmZk1hakA)
# caption_translation_languages (e.g. 20LmZk1hakA)
# Does not used anymore as of 22.06.2017
# Does not used anymore as of 22.06.2017
@ -1822,7 +1834,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Try looking directly into the video webpage
# Try looking directly into the video webpage
ytplayer_config = self . _get_ytplayer_config ( video_id , video_webpage )
ytplayer_config = self . _get_ytplayer_config ( video_id , video_webpage )
if ytplayer_config :
if ytplayer_config :
args = ytplayer_config [ ' args ' ]
args = ytplayer_config . get ( " args " )
if args is not None :
if args . get ( ' url_encoded_fmt_stream_map ' ) or args . get ( ' hlsvp ' ) :
if args . get ( ' url_encoded_fmt_stream_map ' ) or args . get ( ' hlsvp ' ) :
# Convert to the same format returned by compat_parse_qs
# Convert to the same format returned by compat_parse_qs
video_info = dict ( ( k , [ v ] ) for k , v in args . items ( ) )
video_info = dict ( ( k , [ v ] ) for k , v in args . items ( ) )
@ -1837,6 +1850,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
is_live = True
is_live = True
if not player_response :
if not player_response :
player_response = extract_player_response ( args . get ( ' player_response ' ) , video_id )
player_response = extract_player_response ( args . get ( ' player_response ' ) , video_id )
elif not player_response :
player_response = ytplayer_config
if not video_info or self . _downloader . params . get ( ' youtube_include_dash_manifest ' , True ) :
if not video_info or self . _downloader . params . get ( ' youtube_include_dash_manifest ' , True ) :
add_dash_mpd_pr ( player_response )
add_dash_mpd_pr ( player_response )
else :
else :
@ -1866,8 +1881,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
age_gate = False
age_gate = False
# Try looking directly into the video webpage
# Try looking directly into the video webpage
ytplayer_config = self . _get_ytplayer_config ( video_id , video_webpage )
ytplayer_config = self . _get_ytplayer_config ( video_id , video_webpage )
if ytplayer_config :
args = ytplayer_config . get ( " args " )
args = ytplayer_config [ ' args ' ]
if args is not None :
if args . get ( ' url_encoded_fmt_stream_map ' ) or args . get ( ' hlsvp ' ) :
if args . get ( ' url_encoded_fmt_stream_map ' ) or args . get ( ' hlsvp ' ) :
# Convert to the same format returned by compat_parse_qs
# Convert to the same format returned by compat_parse_qs
video_info = dict ( ( k , [ v ] ) for k , v in args . items ( ) )
video_info = dict ( ( k , [ v ] ) for k , v in args . items ( ) )
@ -1882,6 +1897,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
is_live = True
is_live = True
if not player_response :
if not player_response :
player_response = extract_player_response ( args . get ( ' player_response ' ) , video_id )
player_response = extract_player_response ( args . get ( ' player_response ' ) , video_id )
elif not player_response :
player_response = ytplayer_config
if not video_info or self . _downloader . params . get ( ' youtube_include_dash_manifest ' , True ) :
if not video_info or self . _downloader . params . get ( ' youtube_include_dash_manifest ' , True ) :
add_dash_mpd_pr ( player_response )
add_dash_mpd_pr ( player_response )
@ -2614,6 +2631,12 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
_VIDEO_RE_TPL = r ' href= " \ s*/watch \ ?v= %s (?:&(?:[^ " ]*?index=(?P<index> \ d+))?(?:[^>]+>(?P<title>[^<]+))?)? '
_VIDEO_RE_TPL = r ' href= " \ s*/watch \ ?v= %s (?:&(?:[^ " ]*?index=(?P<index> \ d+))?(?:[^>]+>(?P<title>[^<]+))?)? '
_VIDEO_RE = _VIDEO_RE_TPL % r ' (?P<id>[0-9A-Za-z_-] {11} ) '
_VIDEO_RE = _VIDEO_RE_TPL % r ' (?P<id>[0-9A-Za-z_-] {11} ) '
IE_NAME = ' youtube:playlist '
IE_NAME = ' youtube:playlist '
_YTM_PLAYLIST_PREFIX = ' RDCLAK5uy_ '
_YTM_CHANNEL_INFO = {
' uploader ' : ' Youtube Music ' ,
' uploader_id ' : ' music ' , # or "UC-9-kyTW8ZkZNDHQJ6FgpwQ"
' uploader_url ' : ' https://www.youtube.com/music '
}
_TESTS = [ {
_TESTS = [ {
' url ' : ' https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc ' ,
' url ' : ' https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc ' ,
' info_dict ' : {
' info_dict ' : {
@ -2811,10 +2834,21 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
return zip ( ids_in_page , titles_in_page )
return zip ( ids_in_page , titles_in_page )
def _extract_mix_ids_from_yt_initial ( self , yt_initial ) :
ids = [ ]
playlist_contents = try_get ( yt_initial , lambda x : x [ ' contents ' ] [ ' twoColumnWatchNextResults ' ] [ ' playlist ' ] [ ' playlist ' ] [ ' contents ' ] , list )
if playlist_contents :
for item in playlist_contents :
videoId = try_get ( item , lambda x : x [ ' playlistPanelVideoRenderer ' ] [ ' videoId ' ] , compat_str )
if videoId :
ids . append ( videoId )
return ids
def _extract_mix ( self , playlist_id ) :
def _extract_mix ( self , playlist_id ) :
# The mixes are generated from a single video
# The mixes are generated from a single video
# the id of the playlist is just 'RD' + video_id
# the id of the playlist is just 'RD' + video_id
ids = [ ]
ids = [ ]
yt_initial = None
last_id = playlist_id [ - 11 : ]
last_id = playlist_id [ - 11 : ]
for n in itertools . count ( 1 ) :
for n in itertools . count ( 1 ) :
url = ' https://www.youtube.com/watch?v= %s &list= %s ' % ( last_id , playlist_id )
url = ' https://www.youtube.com/watch?v= %s &list= %s ' % ( last_id , playlist_id )
@ -2824,6 +2858,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
r ''' (?xs)data-video-username= " .*? " .*?
r ''' (?xs)data-video-username= " .*? " .*?
href = " /watch \ ?v=([0-9A-Za-z_-] {11} )&[^ " ] * ? list = % s ''' % r e.escape(playlist_id),
href = " /watch \ ?v=([0-9A-Za-z_-] {11} )&[^ " ] * ? list = % s ''' % r e.escape(playlist_id),
webpage ) )
webpage ) )
# if no ids in html of page, try using embedded json
if ( len ( new_ids ) == 0 ) :
yt_initial = self . _get_yt_initial_data ( playlist_id , webpage )
if yt_initial :
new_ids = self . _extract_mix_ids_from_yt_initial ( yt_initial )
# Fetch new pages until all the videos are repeated, it seems that
# Fetch new pages until all the videos are repeated, it seems that
# there are always 51 unique videos.
# there are always 51 unique videos.
new_ids = [ _id for _id in new_ids if _id not in ids ]
new_ids = [ _id for _id in new_ids if _id not in ids ]
@ -2841,6 +2882,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
or search_title ( ' title ' ) )
or search_title ( ' title ' ) )
title = clean_html ( title_span )
title = clean_html ( title_span )
if not title :
title = try_get ( yt_initial , lambda x : x [ ' contents ' ] [ ' twoColumnWatchNextResults ' ] [ ' playlist ' ] [ ' playlist ' ] [ ' title ' ] , compat_str )
return self . playlist_result ( url_results , playlist_id , title )
return self . playlist_result ( url_results , playlist_id , title )
def _extract_playlist ( self , playlist_id ) :
def _extract_playlist ( self , playlist_id ) :
@ -2902,6 +2946,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
' uploader_id ' : uploader_id ,
' uploader_id ' : uploader_id ,
' uploader_url ' : uploader_url ,
' uploader_url ' : uploader_url ,
} )
} )
if playlist_id . startswith ( self . _YTM_PLAYLIST_PREFIX ) :
playlist . update ( self . _YTM_CHANNEL_INFO )
return has_videos , playlist
return has_videos , playlist
@ -2932,7 +2978,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
return video
return video
if playlist_id . startswith ( ( ' RD ' , ' UL ' , ' PU ' ) ) :
if playlist_id . startswith ( ( ' RD ' , ' UL ' , ' PU ' ) ) :
# Mixes require a custom extraction process
if not playlist_id . startswith ( self . _YTM_PLAYLIST_PREFIX ) :
# Mixes require a custom extraction process,
# Youtube Music playlists act like normal playlists (with randomized order)
return self . _extract_mix ( playlist_id )
return self . _extract_mix ( playlist_id )
has_videos , playlist = self . _extract_playlist ( playlist_id )
has_videos , playlist = self . _extract_playlist ( playlist_id )