|
|
@ -929,39 +929,37 @@ class InfoExtractor:
|
|
|
|
|
|
|
|
|
|
|
|
return content
|
|
|
|
return content
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
|
|
|
|
def __print_error(self, errnote, fatal, video_id, err):
|
|
|
|
|
|
|
|
if fatal:
|
|
|
|
|
|
|
|
raise ExtractorError(f'{video_id}: {errnote} ', cause=err)
|
|
|
|
|
|
|
|
elif errnote:
|
|
|
|
|
|
|
|
self.report_warning(f'{video_id}: {errnote} {err}')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True, errnote=None):
|
|
|
|
if transform_source:
|
|
|
|
if transform_source:
|
|
|
|
xml_string = transform_source(xml_string)
|
|
|
|
xml_string = transform_source(xml_string)
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
return compat_etree_fromstring(xml_string.encode('utf-8'))
|
|
|
|
return compat_etree_fromstring(xml_string.encode('utf-8'))
|
|
|
|
except xml.etree.ElementTree.ParseError as ve:
|
|
|
|
except xml.etree.ElementTree.ParseError as ve:
|
|
|
|
errmsg = '%s: Failed to parse XML ' % video_id
|
|
|
|
self.__print_error('Failed to parse XML' if errnote is None else errnote, fatal, video_id, ve)
|
|
|
|
if fatal:
|
|
|
|
|
|
|
|
raise ExtractorError(errmsg, cause=ve)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
self.report_warning(errmsg + str(ve))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_json(self, json_string, video_id, transform_source=None, fatal=True, **parser_kwargs):
|
|
|
|
def _parse_json(self, json_string, video_id, transform_source=None, fatal=True, errnote=None, **parser_kwargs):
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
return json.loads(
|
|
|
|
return json.loads(
|
|
|
|
json_string, cls=LenientJSONDecoder, strict=False, transform_source=transform_source, **parser_kwargs)
|
|
|
|
json_string, cls=LenientJSONDecoder, strict=False, transform_source=transform_source, **parser_kwargs)
|
|
|
|
except ValueError as ve:
|
|
|
|
except ValueError as ve:
|
|
|
|
errmsg = f'{video_id}: Failed to parse JSON'
|
|
|
|
self.__print_error('Failed to parse JSON' if errnote is None else errnote, fatal, video_id, ve)
|
|
|
|
if fatal:
|
|
|
|
|
|
|
|
raise ExtractorError(errmsg, cause=ve)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
self.report_warning(f'{errmsg}: {ve}')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_socket_response_as_json(self, data, video_id, transform_source=None, fatal=True):
|
|
|
|
def _parse_socket_response_as_json(self, data, *args, **kwargs):
|
|
|
|
return self._parse_json(
|
|
|
|
return self._parse_json(data[data.find('{'):data.rfind('}') + 1], *args, **kwargs)
|
|
|
|
data[data.find('{'):data.rfind('}') + 1],
|
|
|
|
|
|
|
|
video_id, transform_source, fatal)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __create_download_methods(name, parser, note, errnote, return_value):
|
|
|
|
def __create_download_methods(name, parser, note, errnote, return_value):
|
|
|
|
|
|
|
|
|
|
|
|
def parse(ie, content, *args, **kwargs):
|
|
|
|
def parse(ie, content, *args, errnote=errnote, **kwargs):
|
|
|
|
if parser is None:
|
|
|
|
if parser is None:
|
|
|
|
return content
|
|
|
|
return content
|
|
|
|
|
|
|
|
if errnote is False:
|
|
|
|
|
|
|
|
kwargs['errnote'] = errnote
|
|
|
|
# parser is fetched by name so subclasses can override it
|
|
|
|
# parser is fetched by name so subclasses can override it
|
|
|
|
return getattr(ie, parser)(content, *args, **kwargs)
|
|
|
|
return getattr(ie, parser)(content, *args, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
@ -973,7 +971,7 @@ class InfoExtractor:
|
|
|
|
if res is False:
|
|
|
|
if res is False:
|
|
|
|
return res
|
|
|
|
return res
|
|
|
|
content, urlh = res
|
|
|
|
content, urlh = res
|
|
|
|
return parse(self, content, video_id, transform_source=transform_source, fatal=fatal), urlh
|
|
|
|
return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote), urlh
|
|
|
|
|
|
|
|
|
|
|
|
def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
|
|
|
|
def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
|
|
|
|
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
|
|
|
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
|
|
@ -988,7 +986,7 @@ class InfoExtractor:
|
|
|
|
self.report_warning(f'Unable to load request from disk: {e}')
|
|
|
|
self.report_warning(f'Unable to load request from disk: {e}')
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
content = self.__decode_webpage(webpage_bytes, encoding, url_or_request.headers)
|
|
|
|
content = self.__decode_webpage(webpage_bytes, encoding, url_or_request.headers)
|
|
|
|
return parse(self, content, video_id, transform_source, fatal)
|
|
|
|
return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote)
|
|
|
|
kwargs = {
|
|
|
|
kwargs = {
|
|
|
|
'note': note,
|
|
|
|
'note': note,
|
|
|
|
'errnote': errnote,
|
|
|
|
'errnote': errnote,
|
|
|
|