|
|
|
@ -1588,15 +1588,13 @@ class InfoExtractor:
|
|
|
|
|
webpage, 'next.js data', fatal=fatal, **kw),
|
|
|
|
|
video_id, transform_source=transform_source, fatal=fatal)
|
|
|
|
|
|
|
|
|
|
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', return_full_data=False):
|
|
|
|
|
''' Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function. '''
|
|
|
|
|
# not all website do this, but it can be changed
|
|
|
|
|
# https://stackoverflow.com/questions/67463109/how-to-change-or-hide-nuxt-and-nuxt-keyword-in-page-source
|
|
|
|
|
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
|
|
|
|
|
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
|
|
|
|
rectx = re.escape(context_name)
|
|
|
|
|
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
|
|
|
|
|
js, arg_keys, arg_vals = self._search_regex(
|
|
|
|
|
(r'<script>window\.%s=\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.+?)\)\);?</script>' % rectx,
|
|
|
|
|
r'%s\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)' % rectx),
|
|
|
|
|
webpage, context_name, group=['js', 'arg_keys', 'arg_vals'])
|
|
|
|
|
(rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
|
|
|
|
|
webpage, context_name, group=('js', 'arg_keys', 'arg_vals'), fatal=fatal)
|
|
|
|
|
|
|
|
|
|
args = dict(zip(arg_keys.split(','), arg_vals.split(',')))
|
|
|
|
|
|
|
|
|
@ -1604,10 +1602,8 @@ class InfoExtractor:
|
|
|
|
|
if val in ('undefined', 'void 0'):
|
|
|
|
|
args[key] = 'null'
|
|
|
|
|
|
|
|
|
|
ret = self._parse_json(js_to_json(js, args), video_id)
|
|
|
|
|
if return_full_data:
|
|
|
|
|
return ret
|
|
|
|
|
return ret['data'][0]
|
|
|
|
|
ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
|
|
|
|
|
return traverse_obj(ret, traverse) or {}
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _hidden_inputs(html):
|
|
|
|
|