mirror of https://github.com/yt-dlp/yt-dlp
[extractor] Use classmethod/property where possible
and refactor lazy extractors accordingly. This reduces the need to create extractor instancespull/3723/head
parent
7ddbf09c25
commit
82d020804d
@ -1,30 +1,28 @@
|
||||
import importlib
|
||||
import random
|
||||
import re
|
||||
|
||||
from ..utils import bug_reports_message, write_string
|
||||
from ..utils import bug_reports_message, classproperty, write_string
|
||||
|
||||
|
||||
class LazyLoadMetaClass(type):
|
||||
def __getattr__(cls, name):
|
||||
if '_real_class' not in cls.__dict__:
|
||||
# "is_suitable" requires "_TESTS". However, they bloat the lazy_extractors
|
||||
if '_real_class' not in cls.__dict__ and name not in ('is_suitable', 'get_testcases'):
|
||||
write_string(
|
||||
'WARNING: Falling back to normal extractor since lazy extractor '
|
||||
f'{cls.__name__} does not have attribute {name}{bug_reports_message()}')
|
||||
return getattr(cls._get_real_class(), name)
|
||||
f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n')
|
||||
return getattr(cls.real_class, name)
|
||||
|
||||
|
||||
class LazyLoadExtractor(metaclass=LazyLoadMetaClass):
|
||||
_module = None
|
||||
_WORKING = True
|
||||
|
||||
@classmethod
|
||||
def _get_real_class(cls):
|
||||
@classproperty
|
||||
def real_class(cls):
|
||||
if '_real_class' not in cls.__dict__:
|
||||
mod = __import__(cls._module, fromlist=(cls.__name__,))
|
||||
cls._real_class = getattr(mod, cls.__name__)
|
||||
cls._real_class = getattr(importlib.import_module(cls._module), cls.__name__)
|
||||
return cls._real_class
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
real_cls = cls._get_real_class()
|
||||
instance = real_cls.__new__(real_cls)
|
||||
instance = cls.real_class.__new__(cls.real_class)
|
||||
instance.__init__(*args, **kwargs)
|
||||
return instance
|
||||
|
@ -1,101 +1,125 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import optparse
|
||||
import sys
|
||||
from inspect import getsource
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
lazy_extractors_filename = sys.argv[1] if len(sys.argv) > 1 else 'yt_dlp/extractor/lazy_extractors.py'
|
||||
|
||||
NO_ATTR = object()
|
||||
STATIC_CLASS_PROPERTIES = ['IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_WORKING', '_NETRC_MACHINE']
|
||||
CLASS_METHODS = [
|
||||
'ie_key', 'working', 'description', 'suitable', '_match_valid_url', '_match_id', 'get_temp_id',
|
||||
]
|
||||
IE_TEMPLATE = '''
|
||||
class {name}({bases}):
|
||||
_module = {module!r}
|
||||
'''
|
||||
with open('devscripts/lazy_load_template.py', encoding='utf-8') as f:
|
||||
MODULE_TEMPLATE = f.read()
|
||||
|
||||
|
||||
def main():
|
||||
parser = optparse.OptionParser(usage='%prog [OUTFILE.py]')
|
||||
args = parser.parse_args()[1] or ['yt_dlp/extractor/lazy_extractors.py']
|
||||
if len(args) != 1:
|
||||
parser.error('Expected only an output filename')
|
||||
|
||||
lazy_extractors_filename = args[0]
|
||||
if os.path.exists(lazy_extractors_filename):
|
||||
os.remove(lazy_extractors_filename)
|
||||
|
||||
# Block plugins from loading
|
||||
plugins_dirname = 'ytdlp_plugins'
|
||||
plugins_blocked_dirname = 'ytdlp_plugins_blocked'
|
||||
if os.path.exists(plugins_dirname):
|
||||
os.rename(plugins_dirname, plugins_blocked_dirname)
|
||||
_ALL_CLASSES = get_all_ies() # Must be before import
|
||||
|
||||
from yt_dlp.extractor import _ALL_CLASSES
|
||||
from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
|
||||
|
||||
if os.path.exists(plugins_blocked_dirname):
|
||||
os.rename(plugins_blocked_dirname, plugins_dirname)
|
||||
DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR})
|
||||
module_src = '\n'.join((
|
||||
MODULE_TEMPLATE,
|
||||
' _module = None',
|
||||
*extra_ie_code(DummyInfoExtractor),
|
||||
'\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
|
||||
*build_ies(_ALL_CLASSES, (InfoExtractor, SearchInfoExtractor), DummyInfoExtractor),
|
||||
))
|
||||
|
||||
with open('devscripts/lazy_load_template.py', encoding='utf-8') as f:
|
||||
module_template = f.read()
|
||||
with open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
|
||||
f.write(f'{module_src}\n')
|
||||
|
||||
CLASS_PROPERTIES = ['ie_key', 'working', '_match_valid_url', 'suitable', '_match_id', 'get_temp_id']
|
||||
module_contents = [
|
||||
module_template,
|
||||
*[getsource(getattr(InfoExtractor, k)) for k in CLASS_PROPERTIES],
|
||||
'\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
|
||||
|
||||
ie_template = '''
|
||||
class {name}({bases}):
|
||||
_module = '{module}'
|
||||
'''
|
||||
def get_all_ies():
|
||||
PLUGINS_DIRNAME = 'ytdlp_plugins'
|
||||
BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked'
|
||||
if os.path.exists(PLUGINS_DIRNAME):
|
||||
os.rename(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
|
||||
try:
|
||||
from yt_dlp.extractor import _ALL_CLASSES
|
||||
finally:
|
||||
if os.path.exists(BLOCKED_DIRNAME):
|
||||
os.rename(BLOCKED_DIRNAME, PLUGINS_DIRNAME)
|
||||
return _ALL_CLASSES
|
||||
|
||||
|
||||
def get_base_name(base):
|
||||
if base is InfoExtractor:
|
||||
return 'LazyLoadExtractor'
|
||||
elif base is SearchInfoExtractor:
|
||||
return 'LazyLoadSearchExtractor'
|
||||
else:
|
||||
return base.__name__
|
||||
def extra_ie_code(ie, base=None):
|
||||
for var in STATIC_CLASS_PROPERTIES:
|
||||
val = getattr(ie, var)
|
||||
if val != (getattr(base, var) if base else NO_ATTR):
|
||||
yield f' {var} = {val!r}'
|
||||
yield ''
|
||||
|
||||
for name in CLASS_METHODS:
|
||||
f = getattr(ie, name)
|
||||
if not base or f.__func__ != getattr(base, name).__func__:
|
||||
yield getsource(f)
|
||||
|
||||
def build_lazy_ie(ie, name):
|
||||
s = ie_template.format(
|
||||
name=name,
|
||||
bases=', '.join(map(get_base_name, ie.__bases__)),
|
||||
module=ie.__module__)
|
||||
valid_url = getattr(ie, '_VALID_URL', None)
|
||||
if not valid_url and hasattr(ie, '_make_valid_url'):
|
||||
valid_url = ie._make_valid_url()
|
||||
if valid_url:
|
||||
s += f' _VALID_URL = {valid_url!r}\n'
|
||||
if not ie._WORKING:
|
||||
s += ' _WORKING = False\n'
|
||||
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
|
||||
s += f'\n{getsource(ie.suitable)}'
|
||||
return s
|
||||
|
||||
def build_ies(ies, bases, attr_base):
|
||||
names = []
|
||||
for ie in sort_ies(ies, bases):
|
||||
yield build_lazy_ie(ie, ie.__name__, attr_base)
|
||||
if ie in ies:
|
||||
names.append(ie.__name__)
|
||||
|
||||
# find the correct sorting and add the required base classes so that subclasses
|
||||
# can be correctly created
|
||||
classes = _ALL_CLASSES[:-1]
|
||||
ordered_cls = []
|
||||
yield f'\n_ALL_CLASSES = [{", ".join(names)}]'
|
||||
|
||||
|
||||
def sort_ies(ies, ignored_bases):
|
||||
"""find the correct sorting and add the required base classes so that subclasses can be correctly created"""
|
||||
classes, returned_classes = ies[:-1], set()
|
||||
assert ies[-1].__name__ == 'GenericIE', 'Last IE must be GenericIE'
|
||||
while classes:
|
||||
for c in classes[:]:
|
||||
bases = set(c.__bases__) - {object, InfoExtractor, SearchInfoExtractor}
|
||||
stop = False
|
||||
bases = set(c.__bases__) - {object, *ignored_bases}
|
||||
restart = False
|
||||
for b in bases:
|
||||
if b not in classes and b not in ordered_cls:
|
||||
if b.__name__ == 'GenericIE':
|
||||
exit()
|
||||
if b not in classes and b not in returned_classes:
|
||||
assert b.__name__ != 'GenericIE', 'Cannot inherit from GenericIE'
|
||||
classes.insert(0, b)
|
||||
stop = True
|
||||
if stop:
|
||||
restart = True
|
||||
if restart:
|
||||
break
|
||||
if all(b in ordered_cls for b in bases):
|
||||
ordered_cls.append(c)
|
||||
if bases <= returned_classes:
|
||||
yield c
|
||||
returned_classes.add(c)
|
||||
classes.remove(c)
|
||||
break
|
||||
ordered_cls.append(_ALL_CLASSES[-1])
|
||||
yield ies[-1]
|
||||
|
||||
names = []
|
||||
for ie in ordered_cls:
|
||||
name = ie.__name__
|
||||
src = build_lazy_ie(ie, name)
|
||||
module_contents.append(src)
|
||||
if ie in _ALL_CLASSES:
|
||||
names.append(name)
|
||||
|
||||
module_contents.append(
|
||||
'\n_ALL_CLASSES = [{}]'.format(', '.join(names)))
|
||||
def build_lazy_ie(ie, name, attr_base):
|
||||
bases = ', '.join({
|
||||
'InfoExtractor': 'LazyLoadExtractor',
|
||||
'SearchInfoExtractor': 'LazyLoadSearchExtractor',
|
||||
}.get(base.__name__, base.__name__) for base in ie.__bases__)
|
||||
|
||||
module_src = '\n'.join(module_contents) + '\n'
|
||||
s = IE_TEMPLATE.format(name=name, module=ie.__module__, bases=bases)
|
||||
valid_url = getattr(ie, '_VALID_URL', None)
|
||||
if not valid_url and hasattr(ie, '_make_valid_url'):
|
||||
valid_url = ie._make_valid_url()
|
||||
if valid_url:
|
||||
s += f' _VALID_URL = {valid_url!r}\n'
|
||||
return s + '\n'.join(extra_ie_code(ie, attr_base))
|
||||
|
||||
with open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
|
||||
f.write(module_src)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
Loading…
Reference in New Issue