|
|
@ -1579,19 +1579,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
self.to_screen('Extracted signature function:\n' + code)
|
|
|
|
self.to_screen('Extracted signature function:\n' + code)
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_sig_js(self, jscode):
|
|
|
|
def _parse_sig_js(self, jscode):
|
|
|
|
|
|
|
|
# Examples where `sig` is funcname:
|
|
|
|
|
|
|
|
# sig=function(a){a=a.split(""); ... ;return a.join("")};
|
|
|
|
|
|
|
|
# ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
|
|
|
|
|
|
|
|
# {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))}
|
|
|
|
|
|
|
|
# sig=function(J){J=J.split(""); ... ;return J.join("")};
|
|
|
|
|
|
|
|
# ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J};
|
|
|
|
|
|
|
|
# {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))}
|
|
|
|
funcname = self._search_regex(
|
|
|
|
funcname = self._search_regex(
|
|
|
|
(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
|
|
|
(r'\b(?P<var>[a-zA-Z0-9$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\((?P=var)\)\)',
|
|
|
|
|
|
|
|
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)',
|
|
|
|
|
|
|
|
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
|
|
|
|
|
|
|
|
# Old patterns
|
|
|
|
|
|
|
|
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
|
|
|
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
|
|
|
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
|
|
|
r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
|
|
|
|
r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
|
|
|
|
r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
|
|
|
|
|
|
|
|
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
|
|
|
|
|
|
|
|
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
|
|
|
|
|
|
|
# Obsolete patterns
|
|
|
|
# Obsolete patterns
|
|
|
|
r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
|
|
|
r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
|
|
|
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
|
|
|
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
|
|
|
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
|
|
|
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
|
|
|
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
|
|
|
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
|
|
|
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
|
|
|
|
|
|
|
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
|
|
|
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
|
|
|
jscode, 'Initial JS player signature function name', group='sig')
|
|
|
|
jscode, 'Initial JS player signature function name', group='sig')
|
|
|
|
|
|
|
|
|
|
|
@ -1658,6 +1665,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
|
|
|
|
|
|
|
|
def _extract_n_function_name(self, jscode):
|
|
|
|
def _extract_n_function_name(self, jscode):
|
|
|
|
func_name, idx = self._search_regex(
|
|
|
|
func_name, idx = self._search_regex(
|
|
|
|
|
|
|
|
# (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
|
|
|
|
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
|
|
|
|
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
|
|
|
|
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
|
|
|
|
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
|
|
|
|
# or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
|
|
|
|
# or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
|
|
|
@ -1666,7 +1674,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
# older: (b=a.get("n"))&&(b=nfunc(b)
|
|
|
|
# older: (b=a.get("n"))&&(b=nfunc(b)
|
|
|
|
r'''(?x)
|
|
|
|
r'''(?x)
|
|
|
|
\((?:[\w$()\s]+,)*?\s* # (
|
|
|
|
\((?:[\w$()\s]+,)*?\s* # (
|
|
|
|
(?P<b>[a-z])\s*=\s* # b=
|
|
|
|
(?P<b>[a-zA-Z])\s*=\s* # b=, R=
|
|
|
|
(?:
|
|
|
|
(?:
|
|
|
|
(?: # expect ,c=a.get(b) (etc)
|
|
|
|
(?: # expect ,c=a.get(b) (etc)
|
|
|
|
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
|
|
|
|
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
|
|
|
@ -1679,7 +1687,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
|
|
|
|
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
|
|
|
|
(?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
|
|
|
|
(?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
|
|
|
|
| # ,c=a.get(b)
|
|
|
|
| # ,c=a.get(b)
|
|
|
|
,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
|
|
|
|
,\s*(?P<c>[a-zA-Z])\s*=\s*[a-zA-Z]\s*
|
|
|
|
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
|
|
|
|
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
|
|
|
|
(?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
|
|
|
|
(?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
|
|
|
|
)
|
|
|
|
)
|
|
|
@ -1697,15 +1705,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
r'''(?xs)
|
|
|
|
r'''(?xs)
|
|
|
|
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
|
|
|
|
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
|
|
|
|
(?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
|
|
|
|
(?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
|
|
|
|
\s*\{(?:(?!};).)+?["']enhanced_except_
|
|
|
|
\s*\{(?:(?!};).)+?(?:
|
|
|
|
|
|
|
|
["']enhanced_except_ |
|
|
|
|
|
|
|
|
return\s*(?P<q>"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[a-zA-Z0-9_$]+
|
|
|
|
|
|
|
|
)
|
|
|
|
''', jscode, 'Initial JS player n function name', group='name')
|
|
|
|
''', jscode, 'Initial JS player n function name', group='name')
|
|
|
|
if not idx:
|
|
|
|
if not idx:
|
|
|
|
return func_name
|
|
|
|
return func_name
|
|
|
|
|
|
|
|
|
|
|
|
return self._parse_json(self._search_regex(
|
|
|
|
return self._search_json(
|
|
|
|
r'var\s+{0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
|
|
|
|
r'var\s+{0}\s*='.format(re.escape(func_name)), jscode,
|
|
|
|
'Initial JS player n function list ({0}.{1})'.format(func_name, idx)),
|
|
|
|
'Initial JS player n function list ({0}.{1})'.format(func_name, idx),
|
|
|
|
func_name, transform_source=js_to_json)[int(idx)]
|
|
|
|
func_name, contains_pattern=r'\[[\s\S]+\]', end_pattern='[,;]',
|
|
|
|
|
|
|
|
transform_source=js_to_json)[int(idx)]
|
|
|
|
|
|
|
|
|
|
|
|
def _extract_n_function_code(self, video_id, player_url):
|
|
|
|
def _extract_n_function_code(self, video_id, player_url):
|
|
|
|
player_id = self._extract_player_info(player_url)
|
|
|
|
player_id = self._extract_player_info(player_url)
|
|
|
@ -1734,7 +1746,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
except Exception as e:
|
|
|
|
except Exception as e:
|
|
|
|
raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
|
|
|
|
raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
|
|
|
|
|
|
|
|
|
|
|
|
if ret.startswith('enhanced_except_'):
|
|
|
|
if ret.startswith('enhanced_except_') or ret.endswith(s):
|
|
|
|
raise JSInterpreter.Exception('Signature function returned an exception')
|
|
|
|
raise JSInterpreter.Exception('Signature function returned an exception')
|
|
|
|
return ret
|
|
|
|
return ret
|
|
|
|
|
|
|
|
|
|
|
|