From f20148e1d7ccfd1b6e2401cfb0ab78964a84bff6 Mon Sep 17 00:00:00 2001 From: wesson Date: Wed, 16 Oct 2024 13:26:12 +0200 Subject: [PATCH 1/3] Add option to list in json extractors matching URL or all extractors --- yt_dlp/__init__.py | 40 ++++++++++++++++++++++++++++++++++++++ yt_dlp/extractor/common.py | 18 +++++++++++++++++ yt_dlp/options.py | 4 ++++ 3 files changed, 62 insertions(+) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 20111175b1..31fa1d7b54 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -13,6 +13,7 @@ import optparse import os import re import traceback +import json from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError from .downloader.external import get_external_downloader @@ -119,6 +120,45 @@ def print_extractor_information(opts, urls): out = 'Supported TV Providers:\n{}\n'.format(render_table( ['mso', 'mso name'], [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()])) + elif opts.list_extractors_json: + from .extractor.generic import GenericIE + dicts = [] + e_index = 0 + urls = dict.fromkeys(urls, False) + if len(urls): + for ie in gen_extractors(): + if ie == GenericIE: + matched_urls = [url for url, matched in urls.items() if not matched] + else: + matched_urls = tuple(filter(ie.suitable, urls.keys())) + urls.update(dict.fromkeys(matched_urls, True)) + # show only extractor with matched URL + if len(matched_urls): + data = {'index': e_index, + 'name': ie.IE_NAME, + 'desc': ie.IE_DESC if ie.IE_DESC else '', + 'working': ie.working(), + 'enabled': ie.is_enabled(), + 'return_type': ie.return_type(), + 'regex_url': ie.list_regex_url(), + 'matched_urls': matched_urls, + } + e_index += 1 + dicts.append(data) + else: + # show all extractors + for ie in gen_extractors(): + data = {'index': e_index, + 'name': ie.IE_NAME, + 'desc': ie.IE_DESC if ie.IE_DESC else '', + 'working': ie.working(), + 'enabled': ie.is_enabled(), + 'return_type': ie.return_type(), + 'regex_url': ie.list_regex_url(), + } + dicts.append(data) + e_index += 1 + out = json.dumps(dicts, indent=4) else: return False write_string(out, out=sys.stdout) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 92ddad2b76..85597fe8c6 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -14,6 +14,7 @@ import netrc import os import random import re +import string import subprocess import sys import time @@ -610,6 +611,23 @@ class InfoExtractor: # so that lazy_extractors works correctly return cls._match_valid_url(url) is not None + + @classmethod + def list_regex_url(cls): + return cls._VALID_URL if type(cls._VALID_URL) in [list, tuple] \ + else (cls._VALID_URL.translate({ord(c): None for c in string.whitespace}),) if type(cls._VALID_URL) is str \ + else [] + + @classmethod + def return_type(cls): + if '_RETURN_TYPE' not in cls.__dict__: + return '' + return cls._RETURN_TYPE + + @classmethod + def is_enabled(cls): + return cls._ENABLED + @classmethod def _match_id(cls, url): return cls._match_valid_url(url).group('id') diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 930d9d4bef..ba75beec4c 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -362,6 +362,10 @@ def create_parser(): '--list-extractors', action='store_true', dest='list_extractors', default=False, help='List all supported extractors and exit') + general.add_option( + '--list-extractors-json', + action='store_true', dest='list_extractors_json', default=False, + help='List all supported extractors parameters in JSON format and exit') general.add_option( '--extractor-descriptions', action='store_true', dest='list_extractor_descriptions', default=False, From 25b73a440bbe8f672aac4c337b851b1a2e6c2442 Mon Sep 17 00:00:00 2001 From: wesson Date: Wed, 4 Dec 2024 22:22:42 +0100 Subject: [PATCH 2/3] document --list-extractors-json option and fix a minor typo --- README.md | 17 ++++++++++++++++- yt_dlp/__init__.py | 4 ++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 772395d24d..5808924316 100644 --- a/README.md +++ b/README.md @@ -302,6 +302,7 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git error occurs (Alias: --no-ignore-errors) --dump-user-agent Display the current user-agent and exit --list-extractors List all supported extractors and exit + --list-extractors-json List all supported extractors in json and exit --extractor-descriptions Output descriptions of all supported extractors and exit --use-extractors NAMES Extractor names to use separated by commas. @@ -1875,7 +1876,21 @@ The following extractors use this feature: - +# EXTRACTOR INFO JSON +parameter `--list-extractors-json` output information from extractor(s) formated as JSON. If some URL(s) are specified, only the extractors matching at list one URL are listed. If none is specified, all extractors are listed. The generic extractor is always the last in the list. + +### List of values returned +key | type | description +:------------|:----------------|:---------------------------- +index | int | index in list, starting from 0 +name | string | name of the extractor +desc | string | description of the extractor +working | bool | true if the extractor is working +enabled | bool | true if the extractor is enabled +return_type | string | type of data returned by the extractor ("video", "playlist", "any", or None) +regex_urls | array of string | list of regex used by the extractor to match a given url +matched_urls | array of string | list of url(s) passed in the command line that matched the given extractor. Present only if URL(s) are specified. + # PLUGINS Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. **Use plugins at your own risk and only if you trust the code!** diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 31fa1d7b54..87f6faea39 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -140,7 +140,7 @@ def print_extractor_information(opts, urls): 'working': ie.working(), 'enabled': ie.is_enabled(), 'return_type': ie.return_type(), - 'regex_url': ie.list_regex_url(), + 'regex_urls': ie.list_regex_url(), 'matched_urls': matched_urls, } e_index += 1 @@ -154,7 +154,7 @@ def print_extractor_information(opts, urls): 'working': ie.working(), 'enabled': ie.is_enabled(), 'return_type': ie.return_type(), - 'regex_url': ie.list_regex_url(), + 'regex_urls': ie.list_regex_url(), } dicts.append(data) e_index += 1 From ecab7b56f37a7d1b3837bcd238a48264177f4d10 Mon Sep 17 00:00:00 2001 From: wesson Date: Wed, 4 Dec 2024 22:41:22 +0100 Subject: [PATCH 3/3] make autopep8 happy --- yt_dlp/__init__.py | 2 +- yt_dlp/extractor/common.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 87f6faea39..b741d429a0 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -142,7 +142,7 @@ def print_extractor_information(opts, urls): 'return_type': ie.return_type(), 'regex_urls': ie.list_regex_url(), 'matched_urls': matched_urls, - } + } e_index += 1 dicts.append(data) else: diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 85597fe8c6..ca99c75760 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -611,7 +611,6 @@ class InfoExtractor: # so that lazy_extractors works correctly return cls._match_valid_url(url) is not None - @classmethod def list_regex_url(cls): return cls._VALID_URL if type(cls._VALID_URL) in [list, tuple] \