@ -34,6 +34,7 @@ from ...utils import (
clean_html ,
datetime_from_str ,
filesize_from_tbr ,
filter_dict ,
float_or_none ,
format_field ,
get_first ,
@ -1986,12 +1987,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
assert os . path . basename ( func_id ) == func_id
self . write_debug ( f ' Extracting signature function { func_id } ' )
cache_spec , code = self . cache . load ( ' youtube-sigfuncs ' , func_id ), None
cache_spec , code = self . cache . load ( ' youtube-sigfuncs ' , func_id , min_ver = ' 2025.03.27 ' ), None
if not cache_spec :
code = self . _load_player ( video_id , player_url )
if code :
res = self . _parse_sig_js ( code )
res = self . _parse_sig_js ( code , player_url )
test_string = ' ' . join ( map ( chr , range ( len ( example_sig ) ) ) )
cache_spec = [ ord ( c ) for c in res ( test_string ) ]
self . cache . store ( ' youtube-sigfuncs ' , func_id , cache_spec )
@ -2039,7 +2040,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f ' return { expr_code } \n ' )
self . to_screen ( ' Extracted signature function: \n ' + code )
def _parse_sig_js ( self , jscode ):
def _parse_sig_js ( self , jscode , player_url ):
# Examples where `sig` is funcname:
# sig=function(a){a=a.split(""); ... ;return a.join("")};
# ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
@ -2063,12 +2064,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r ' \ bc \ s*&& \ s*[a-zA-Z0-9]+ \ .set \ ([^,]+ \ s*, \ s* \ ([^)]* \ ) \ s* \ ( \ s*(?P<sig>[a-zA-Z0-9$]+) \ ( ' ) ,
jscode , ' Initial JS player signature function name ' , group = ' sig ' )
varname , global_list = self . _interpret_player_js_global_var ( jscode , player_url )
jsi = JSInterpreter ( jscode )
global_var_map = { }
_ , varname , value = self . _extract_player_js_global_var ( jscode )
if varname :
global_var_map [ varname ] = jsi . interpret_expression ( value , { } , allow_recursion = 100 )
initial_function = jsi . extract_function ( funcname , global_var_map )
initial_function = jsi . extract_function ( funcname , filter_dict ( { varname : global_list } ) )
return lambda s : initial_function ( [ s ] )
def _cached ( self , func , * cache_id ) :
@ -2093,7 +2091,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if func_code := self . _player_cache . get ( cache_id ) :
return func_code
func_code = self . cache . load ( ' youtube-nsig ' , player_id , min_ver = ' 2025.03.2 6 ' )
func_code = self . cache . load ( ' youtube-nsig ' , player_id , min_ver = ' 2025.03.2 7 ' )
if func_code :
self . _player_cache [ cache_id ] = func_code
@ -2150,6 +2148,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return ret
def _extract_n_function_name ( self , jscode , player_url = None ) :
varname , global_list = self . _interpret_player_js_global_var ( jscode , player_url )
if debug_str := traverse_obj ( global_list , ( lambda _ , v : v . endswith ( ' _w8_ ' ) , any ) ) :
funcname = self . _search_regex (
r ''' (?xs)
[ ; \n ] ( ? :
( ? P < f > function \s + ) |
( ? : var \s + ) ?
) ( ? P < funcname > [ a - zA - Z0 - 9 _ $ ] + ) \s * ( ? ( f ) | = \s * function \s * )
\( ( ? P < argname > [ a - zA - Z0 - 9 _ $ ] + ) \) \s * \{
( ? : ( ? ! \} [ ; \n ] ) . ) +
\} \s * catch \( \s * [ a - zA - Z0 - 9 _ $ ] + \s * \) \s *
\{ \s * return \s + % s \[ % d \] \s * \+ \s * ( ? P = argname ) \s * \} \s * return \s + [ ^ } ] + \} [ ; \n ]
''' % (re.escape(varname), global_list.index(debug_str)),
jscode , ' nsig function name ' , group = ' funcname ' , default = None )
if funcname :
return funcname
self . write_debug ( join_nonempty (
' Initial search was unable to find nsig function name ' ,
player_url and f ' player = { player_url } ' , delim = ' \n ' ) , only_once = True )
# Examples (with placeholders nfunc, narray, idx):
# * .get("n"))&&(b=nfunc(b)
# * .get("n"))&&(b=narray[idx](b)
@ -2179,7 +2197,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not funcname :
self . report_warning ( join_nonempty (
' Falling back to generic n function search ' ,
player_url and f ' player = { player_url } ' , delim = ' \n ' ) )
player_url and f ' player = { player_url } ' , delim = ' \n ' ) , only_once = True )
return self . _search_regex (
r ''' (?xs)
; \s * ( ? P < name > [ a - zA - Z0 - 9 _ $ ] + ) \s * = \s * function \( [ a - zA - Z0 - 9 _ $ ] + \)
@ -2192,9 +2210,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
rf ' var { re . escape ( funcname ) } \ s*= \ s*( \ [.+? \ ]) \ s*[,;] ' , jscode ,
f ' Initial JS player n function list ( { funcname } . { idx } ) ' ) ) ) [ int ( idx ) ]
def _extract_player_js_global_var ( self , jscode ):
def _extract_player_js_global_var ( self , jscode , player_url ):
""" Returns tuple of strings: variable assignment code, variable name, variable value code """
return self . _search_regex (
extract_global_var = self . _cached ( self . _search_regex , ' js global array ' , player_url )
varcode , varname , varvalue = extract_global_var (
r ''' (?x)
( ? P < q1 > [ " \' ])use \ s+strict(?P=q1); \ s*
( ? P < code >
@ -2206,17 +2225,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
)
) [ ; , ]
''' , jscode, ' global variable ' , group=( ' code ' , ' name ' , ' value ' ), default=(None, None, None))
def _fixup_n_function_code ( self , argnames , code , full_code ) :
global_var , varname , _ = self . _extract_player_js_global_var ( full_code )
if global_var :
self . write_debug ( f ' Prepending n function code with global array variable " { varname } " ' )
code = global_var + ' ; ' + code
if not varcode :
self . write_debug ( join_nonempty (
' No global array variable found in player JS ' ,
player_url and f ' player = { player_url } ' , delim = ' \n ' ) , only_once = True )
return varcode , varname , varvalue
def _interpret_player_js_global_var ( self , jscode , player_url ) :
""" Returns tuple of: variable name string, variable value list """
_ , varname , array_code = self . _extract_player_js_global_var ( jscode , player_url )
jsi = JSInterpreter ( array_code )
interpret_global_var = self . _cached ( jsi . interpret_expression , ' js global list ' , player_url )
return varname , interpret_global_var ( array_code , { } , allow_recursion = 10 )
def _fixup_n_function_code ( self , argnames , nsig_code , jscode , player_url ) :
varcode , varname , _ = self . _extract_player_js_global_var ( jscode , player_url )
if varcode and varname :
nsig_code = varcode + ' ; ' + nsig_code
_ , global_list = self . _interpret_player_js_global_var ( jscode , player_url )
else :
self . write_debug ( ' No global array variable found in player JS ' )
return argnames , re . sub (
rf ' ; \ s*if \ s* \ ( \ s*typeof \ s+[a-zA-Z0-9_$]+ \ s*===? \ s*(?:([ " \' ])undefined \ 1| { varname } \ [ \ d+ \ ]) \ s* \ ) \ s*return \ s+ { argnames [ 0 ] } ; ' ,
' ; ' , code )
varname = ' dlp_wins '
global_list = [ ]
undefined_idx = global_list . index ( ' undefined ' ) if ' undefined ' in global_list else r ' \ d+ '
fixed_code = re . sub (
rf ''' (?x)
; \s * if \s * \( \s * typeof \s + [ a - zA - Z0 - 9 _ $ ] + \s * == = ? \s * ( ? :
( [ " \' ])undefined \1 |
{ re . escape ( varname ) } \[ { undefined_idx } \]
) \s * \) \s * return \s + { re . escape ( argnames [ 0 ] ) } ;
''' , ' ; ' , nsig_code)
if fixed_code == nsig_code :
self . write_debug ( join_nonempty (
' No typeof statement found in nsig function code ' ,
player_url and f ' player = { player_url } ' , delim = ' \n ' ) , only_once = True )
return argnames , fixed_code
def _extract_n_function_code ( self , video_id , player_url ) :
player_id = self . _extract_player_info ( player_url )
@ -2230,7 +2273,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
func_name = self . _extract_n_function_name ( jscode , player_url = player_url )
# XXX: Workaround for the global array variable and lack of `typeof` implementation
func_code = self . _fixup_n_function_code ( * jsi . extract_function_code ( func_name ) , jscode )
func_code = self . _fixup_n_function_code ( * jsi . extract_function_code ( func_name ) , jscode , player_url )
return jsi , player_id , func_code