Merge ecab7b56f3 into 74e90dd9b8

2 days ago · 1e2d5e4d9f
parent 74e90dd9b8 ecab7b56f3
commit 1e2d5e4d9f
4 changed files with 77 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -301,6 +301,7 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
                                    error occurs (Alias: --no-ignore-errors)
    --dump-user-agent               Display the current user-agent and exit
    --list-extractors               List all supported extractors and exit
+    --list-extractors-json          List all supported extractors in json and exit 
    --extractor-descriptions        Output descriptions of all supported
                                    extractors and exit
    --use-extractors NAMES          Extractor names to use separated by commas.
@ -1874,7 +1875,21 @@ The following extractors use this feature:

 <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->

-
+# EXTRACTOR INFO JSON
+parameter `--list-extractors-json` output information from extractor(s) formated as JSON. If some URL(s) are specified, only the extractors matching at list one URL are listed. If none is specified, all extractors are listed. The generic extractor is always the last in the list.  
+
+### List of values returned
+key         | type            | description
+:------------|:----------------|:----------------------------
+index        | int             | index in list, starting from 0  
+name         | string          | name of the extractor
+desc         | string          | description of the extractor
+working      | bool            | true if the extractor is working
+enabled      | bool            | true if the extractor is enabled
+return_type  | string          | type of data returned by the extractor ("video", "playlist", "any", or None)
+regex_urls   | array of string | list of regex used by the extractor to match a given url
+matched_urls | array of string | list of url(s) passed in the command line that matched the given extractor. Present only if URL(s) are specified.
+ 
 # PLUGINS

 Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. **Use plugins at your own risk and only if you trust the code!**
--- a/yt_dlp/init.py
+++ b/yt_dlp/init.py
@ -13,6 +13,7 @@ import optparse
 import os
 import re
 import traceback
+import json

 from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError
 from .downloader.external import get_external_downloader
@ -118,6 +119,45 @@ def print_extractor_information(opts, urls):
        out = 'Supported TV Providers:\n{}\n'.format(render_table(
            ['mso', 'mso name'],
            [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]))
+    elif opts.list_extractors_json:
+        from .extractor.generic import GenericIE
+        dicts = []
+        e_index = 0
+        urls = dict.fromkeys(urls, False)
+        if len(urls):
+            for ie in gen_extractors():
+                if ie == GenericIE:
+                    matched_urls = [url for url, matched in urls.items() if not matched]
+                else:
+                    matched_urls = tuple(filter(ie.suitable, urls.keys()))
+                    urls.update(dict.fromkeys(matched_urls, True))
+                # show only extractor with matched URL
+                if len(matched_urls):
+                    data = {'index': e_index,
+                            'name': ie.IE_NAME,
+                            'desc': ie.IE_DESC if ie.IE_DESC else '',
+                            'working': ie.working(),
+                            'enabled': ie.is_enabled(),
+                            'return_type': ie.return_type(),
+                            'regex_urls': ie.list_regex_url(),
+                            'matched_urls': matched_urls,
+                            }
+                    e_index += 1
+                    dicts.append(data)
+        else:
+            # show all extractors
+            for ie in gen_extractors():
+                data = {'index': e_index,
+                        'name': ie.IE_NAME,
+                        'desc': ie.IE_DESC if ie.IE_DESC else '',
+                        'working': ie.working(),
+                        'enabled': ie.is_enabled(),
+                        'return_type': ie.return_type(),
+                        'regex_urls': ie.list_regex_url(),
+                        }
+                dicts.append(data)
+                e_index += 1
+        out = json.dumps(dicts, indent=4)
    else:
        return False
    write_string(out, out=sys.stdout)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -13,6 +13,7 @@ import netrc
 import os
 import random
 import re
+import string
 import subprocess
 import sys
 import time
@ -616,6 +617,22 @@ class InfoExtractor:
        # so that lazy_extractors works correctly
        return cls._match_valid_url(url) is not None

+    @classmethod
+    def list_regex_url(cls):
+        return cls._VALID_URL if type(cls._VALID_URL) in [list, tuple] \
+            else (cls._VALID_URL.translate({ord(c): None for c in string.whitespace}),) if type(cls._VALID_URL) is str \
+            else []
+
+    @classmethod
+    def return_type(cls):
+        if '_RETURN_TYPE' not in cls.__dict__:
+            return ''
+        return cls._RETURN_TYPE
+
+    @classmethod
+    def is_enabled(cls):
+        return cls._ENABLED
+
    @classmethod
    def _match_id(cls, url):
        return cls._match_valid_url(url).group('id')
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@ -362,6 +362,10 @@ def create_parser():
        '--list-extractors',
        action='store_true', dest='list_extractors', default=False,
        help='List all supported extractors and exit')
+    general.add_option(
+        '--list-extractors-json',
+        action='store_true', dest='list_extractors_json', default=False,
+        help='List all supported extractors parameters in JSON format and exit')
    general.add_option(
        '--extractor-descriptions',
        action='store_true', dest='list_extractor_descriptions', default=False,