@ -27,6 +27,8 @@ from ..utils import (
dict_get ,
dict_get ,
error_to_compat_str ,
error_to_compat_str ,
float_or_none ,
float_or_none ,
extract_attributes ,
get_element_by_attribute ,
int_or_none ,
int_or_none ,
js_to_json ,
js_to_json ,
mimetype2ext ,
mimetype2ext ,
@ -38,6 +40,7 @@ from ..utils import (
smuggle_url ,
smuggle_url ,
str_or_none ,
str_or_none ,
str_to_int ,
str_to_int ,
traverse_obj ,
try_get ,
try_get ,
unescapeHTML ,
unescapeHTML ,
unified_strdate ,
unified_strdate ,
@ -656,6 +659,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
' description ' : ' md5:bf77e03fcae5529475e500129b05668a ' ,
' description ' : ' md5:bf77e03fcae5529475e500129b05668a ' ,
' duration ' : 177 ,
' duration ' : 177 ,
' uploader ' : ' FlyingKitty ' ,
' uploader ' : ' FlyingKitty ' ,
' uploader_id ' : ' FlyingKitty900 ' ,
' upload_date ' : ' 20200408 ' ,
' upload_date ' : ' 20200408 ' ,
' thumbnail ' : ' https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg ' ,
' thumbnail ' : ' https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg ' ,
' age_limit ' : 18 ,
' age_limit ' : 18 ,
@ -678,6 +682,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
' description ' : ' md5:17eccca93a786d51bc67646756894066 ' ,
' description ' : ' md5:17eccca93a786d51bc67646756894066 ' ,
' duration ' : 106 ,
' duration ' : 106 ,
' uploader ' : ' Projekt Melody ' ,
' uploader ' : ' Projekt Melody ' ,
' uploader_id ' : ' UC1yoRdFoFJaCY-AGfD9W0wQ ' ,
' upload_date ' : ' 20191227 ' ,
' upload_date ' : ' 20191227 ' ,
' age_limit ' : 18 ,
' age_limit ' : 18 ,
' thumbnail ' : ' https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg ' ,
' thumbnail ' : ' https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg ' ,
@ -929,16 +934,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
' id ' : ' lsguqyKfVQg ' ,
' id ' : ' lsguqyKfVQg ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' { dark walk}; Loki/AC/Dishonored; collab w/Elflover21 ' ,
' title ' : ' { dark walk}; Loki/AC/Dishonored; collab w/Elflover21 ' ,
' alt_title ' : ' Dark Walk - Position Music ' ,
' alt_title ' : ' Dark Walk ' ,
' description ' : ' md5:8085699c11dc3f597ce0410b0dcbb34a ' ,
' description ' : ' md5:8085699c11dc3f597ce0410b0dcbb34a ' ,
' duration ' : 133 ,
' duration ' : 133 ,
' upload_date ' : ' 20151119 ' ,
' upload_date ' : ' 20151119 ' ,
' uploader_id ' : ' IronSoulElf ' ,
' uploader_id ' : ' IronSoulElf ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/IronSoulElf ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/IronSoulElf ' ,
' uploader ' : ' IronSoulElf ' ,
' uploader ' : ' IronSoulElf ' ,
' creator ' : ' Todd Haberman, Daniel Law Heath and Aaron Kaplan' ,
' creator ' : r' re:Todd Haberman[;,] \ s+ Daniel Law Heath and Aaron Kaplan' ,
' track ' : ' Dark Walk - Position Music ' ,
' track ' : ' Dark Walk ' ,
' artist ' : ' Todd Haberman, Daniel Law Heath and Aaron Kaplan' ,
' artist ' : r' re:Todd Haberman[;,] \ s+ Daniel Law Heath and Aaron Kaplan' ,
' album ' : ' Position Music - Production Music Vol. 143 - Dark Walk ' ,
' album ' : ' Position Music - Production Music Vol. 143 - Dark Walk ' ,
} ,
} ,
' params ' : {
' params ' : {
@ -2091,7 +2096,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
or microformat . get ( ' lengthSeconds ' ) ) \
or microformat . get ( ' lengthSeconds ' ) ) \
or parse_duration ( search_meta ( ' duration ' ) )
or parse_duration ( search_meta ( ' duration ' ) )
is_live = video_details . get ( ' isLive ' )
is_live = video_details . get ( ' isLive ' )
owner_profile_url = microformat . get ( ' ownerProfileUrl ' )
def gen_owner_profile_url ( ) :
yield microformat . get ( ' ownerProfileUrl ' )
yield extract_attributes ( self . _search_regex (
r ''' (?s)(<link \ b[^>]+ \ bitemprop \ s*= \ s*( " | ' )url \ 2[^>]*>) ''' ,
get_element_by_attribute ( ' itemprop ' , ' author ' , webpage ) ,
' owner_profile_url ' , default = ' ' ) ) . get ( ' href ' )
owner_profile_url = next (
( x for x in map ( url_or_none , gen_owner_profile_url ( ) ) if x ) ,
None )
if not player_url :
if not player_url :
player_url = self . _extract_player_url ( webpage )
player_url = self . _extract_player_url ( webpage )
@ -2176,6 +2191,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
info [ d_k ] = parse_duration ( query [ k ] [ 0 ] )
info [ d_k ] = parse_duration ( query [ k ] [ 0 ] )
if video_description :
if video_description :
# Youtube Music Auto-generated description
mobj = re . search ( r ' (?s)(?P<track>[^· \ n]+)·(?P<artist>[^ \ n]+) \ n+(?P<album>[^ \ n]+)(?:.+?℗ \ s*(?P<release_year> \ d {4} )(?! \ d))?(?:.+?Released on \ s*: \ s*(?P<release_date> \ d {4} - \ d {2} - \ d {2} ))?(.+? \ nArtist \ s*: \ s*(?P<clean_artist>[^ \ n]+))?.+ \ nAuto-generated by YouTube \ . \ s*$ ' , video_description )
mobj = re . search ( r ' (?s)(?P<track>[^· \ n]+)·(?P<artist>[^ \ n]+) \ n+(?P<album>[^ \ n]+)(?:.+?℗ \ s*(?P<release_year> \ d {4} )(?! \ d))?(?:.+?Released on \ s*: \ s*(?P<release_date> \ d {4} - \ d {2} - \ d {2} ))?(.+? \ nArtist \ s*: \ s*(?P<clean_artist>[^ \ n]+))?.+ \ nAuto-generated by YouTube \ . \ s*$ ' , video_description )
if mobj :
if mobj :
release_year = mobj . group ( ' release_year ' )
release_year = mobj . group ( ' release_year ' )
@ -2250,7 +2266,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
lambda x : x [ ' superTitleIcon ' ] [ ' iconType ' ] ) == ' LOCATION_PIN ' :
lambda x : x [ ' superTitleIcon ' ] [ ' iconType ' ] ) == ' LOCATION_PIN ' :
info [ ' location ' ] = stl
info [ ' location ' ] = stl
else :
else :
mobj = re . search ( r ' (.+?) \ s*S( \ d+) \ s*• \ s*E( \ d+) ' , stl )
# •? doesn't match, but [•]? does; \xa0 = non-breaking space
mobj = re . search ( r ' ([^ \ xa0 \ s].*?)[ \ xa0 \ s]*S( \ d+)[ \ xa0 \ s]*[•]?[ \ xa0 \ s]*E( \ d+) ' , stl )
if mobj :
if mobj :
info . update ( {
info . update ( {
' series ' : mobj . group ( 1 ) ,
' series ' : mobj . group ( 1 ) ,
@ -2261,7 +2278,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
vpir ,
vpir ,
lambda x : x [ ' videoActions ' ] [ ' menuRenderer ' ] [ ' topLevelButtons ' ] ,
lambda x : x [ ' videoActions ' ] [ ' menuRenderer ' ] [ ' topLevelButtons ' ] ,
list ) or [ ] ) :
list ) or [ ] ) :
tbr = t lb. get ( ' toggleButtonRenderer ' ) or { }
tbr = t raverse_obj( tlb , ( ' segmentedLikeDislikeButtonRenderer ' , ' likeButton ' , ' toggleButtonRenderer ' ) , ' toggleButtonRenderer ' ) or { }
for getter , regex in [ (
for getter , regex in [ (
lambda x : x [ ' defaultText ' ] [ ' accessibility ' ] [ ' accessibilityData ' ] ,
lambda x : x [ ' defaultText ' ] [ ' accessibility ' ] [ ' accessibilityData ' ] ,
r ' (?P<count>[ \ d,]+) \ s*(?P<type>(?:dis)?like) ' ) , ( [
r ' (?P<count>[ \ d,]+) \ s*(?P<type>(?:dis)?like) ' ) , ( [
@ -2315,6 +2332,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif mrr_title == ' Song ' :
elif mrr_title == ' Song ' :
info [ ' track ' ] = mrr_contents_text
info [ ' track ' ] = mrr_contents_text
# this is not extraction but spelunking!
carousel_lockups = traverse_obj (
initial_data ,
( ' engagementPanels ' , Ellipsis , ' engagementPanelSectionListRenderer ' ,
' content ' , ' structuredDescriptionContentRenderer ' , ' items ' , Ellipsis ,
' videoDescriptionMusicSectionRenderer ' , ' carouselLockups ' , Ellipsis ) ,
expected_type = dict ) or [ ]
# try to reproduce logic from metadataRowContainerRenderer above (if it still is)
fields = ( ( ' ALBUM ' , ' album ' ) , ( ' ARTIST ' , ' artist ' ) , ( ' SONG ' , ' track ' ) , ( ' LICENSES ' , ' license ' ) )
# multiple_songs ?
if len ( carousel_lockups ) > 1 :
fields = fields [ - 1 : ]
for info_row in traverse_obj (
carousel_lockups ,
( 0 , ' carouselLockupRenderer ' , ' infoRows ' , Ellipsis , ' infoRowRenderer ' ) ,
expected_type = dict ) :
row_title = traverse_obj ( info_row , ( ' title ' , ' simpleText ' ) )
row_text = traverse_obj ( info_row , ' defaultMetadata ' , ' expandedMetadata ' , expected_type = get_text )
if not row_text :
continue
for name , field in fields :
if name == row_title and not info . get ( field ) :
info [ field ] = row_text
for s_k , d_k in [ ( ' artist ' , ' creator ' ) , ( ' track ' , ' alt_title ' ) ] :
for s_k , d_k in [ ( ' artist ' , ' creator ' ) , ( ' track ' , ' alt_title ' ) ] :
v = info . get ( s_k )
v = info . get ( s_k )
if v :
if v :