From f20148e1d7ccfd1b6e2401cfb0ab78964a84bff6 Mon Sep 17 00:00:00 2001
From: wesson <wesson@vsosoftware.fr>
Date: Wed, 16 Oct 2024 13:26:12 +0200
Subject: [PATCH 1/3] Add option to list in json extractors matching URL or all
 extractors

---
 yt_dlp/__init__.py         | 40 ++++++++++++++++++++++++++++++++++++++
 yt_dlp/extractor/common.py | 18 +++++++++++++++++
 yt_dlp/options.py          |  4 ++++
 3 files changed, 62 insertions(+)

diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index 20111175b1..31fa1d7b54 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -13,6 +13,7 @@ import optparse
 import os
 import re
 import traceback
+import json
 
 from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError
 from .downloader.external import get_external_downloader
@@ -119,6 +120,45 @@ def print_extractor_information(opts, urls):
         out = 'Supported TV Providers:\n{}\n'.format(render_table(
             ['mso', 'mso name'],
             [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]))
+    elif opts.list_extractors_json:
+        from .extractor.generic import GenericIE
+        dicts = []
+        e_index = 0
+        urls = dict.fromkeys(urls, False)
+        if len(urls):
+            for ie in gen_extractors():
+                if ie == GenericIE:
+                    matched_urls = [url for url, matched in urls.items() if not matched]
+                else:
+                    matched_urls = tuple(filter(ie.suitable, urls.keys()))
+                    urls.update(dict.fromkeys(matched_urls, True))
+                # show only extractor with matched URL
+                if len(matched_urls):
+                    data = {'index': e_index,
+                            'name': ie.IE_NAME,
+                            'desc': ie.IE_DESC if ie.IE_DESC else '',
+                            'working': ie.working(),
+                            'enabled': ie.is_enabled(),
+                            'return_type': ie.return_type(),
+                            'regex_url': ie.list_regex_url(),
+                            'matched_urls': matched_urls,
+                    }
+                    e_index += 1
+                    dicts.append(data)
+        else:
+            # show all extractors
+            for ie in gen_extractors():
+                data = {'index': e_index,
+                        'name': ie.IE_NAME,
+                        'desc': ie.IE_DESC if ie.IE_DESC else '',
+                        'working': ie.working(),
+                        'enabled': ie.is_enabled(),
+                        'return_type': ie.return_type(),
+                        'regex_url': ie.list_regex_url(),
+                        }
+                dicts.append(data)
+                e_index += 1
+        out = json.dumps(dicts, indent=4)
     else:
         return False
     write_string(out, out=sys.stdout)
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 92ddad2b76..85597fe8c6 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -14,6 +14,7 @@ import netrc
 import os
 import random
 import re
+import string
 import subprocess
 import sys
 import time
@@ -610,6 +611,23 @@ class InfoExtractor:
         # so that lazy_extractors works correctly
         return cls._match_valid_url(url) is not None
 
+
+    @classmethod
+    def list_regex_url(cls):
+        return cls._VALID_URL if type(cls._VALID_URL) in [list, tuple] \
+            else (cls._VALID_URL.translate({ord(c): None for c in string.whitespace}),) if type(cls._VALID_URL) is str \
+            else []
+
+    @classmethod
+    def return_type(cls):
+        if '_RETURN_TYPE' not in cls.__dict__:
+            return ''
+        return cls._RETURN_TYPE
+
+    @classmethod
+    def is_enabled(cls):
+        return cls._ENABLED
+
     @classmethod
     def _match_id(cls, url):
         return cls._match_valid_url(url).group('id')
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index 930d9d4bef..ba75beec4c 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -362,6 +362,10 @@ def create_parser():
         '--list-extractors',
         action='store_true', dest='list_extractors', default=False,
         help='List all supported extractors and exit')
+    general.add_option(
+        '--list-extractors-json',
+        action='store_true', dest='list_extractors_json', default=False,
+        help='List all supported extractors parameters in JSON format and exit')
     general.add_option(
         '--extractor-descriptions',
         action='store_true', dest='list_extractor_descriptions', default=False,

From 25b73a440bbe8f672aac4c337b851b1a2e6c2442 Mon Sep 17 00:00:00 2001
From: wesson <wesson@vsosoftware.fr>
Date: Wed, 4 Dec 2024 22:22:42 +0100
Subject: [PATCH 2/3] document --list-extractors-json option and fix a minor
 typo

---
 README.md          | 17 ++++++++++++++++-
 yt_dlp/__init__.py |  4 ++--
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 772395d24d..5808924316 100644
--- a/README.md
+++ b/README.md
@@ -302,6 +302,7 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
                                     error occurs (Alias: --no-ignore-errors)
     --dump-user-agent               Display the current user-agent and exit
     --list-extractors               List all supported extractors and exit
+    --list-extractors-json          List all supported extractors in json and exit 
     --extractor-descriptions        Output descriptions of all supported
                                     extractors and exit
     --use-extractors NAMES          Extractor names to use separated by commas.
@@ -1875,7 +1876,21 @@ The following extractors use this feature:
 
 <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
 
-
+# EXTRACTOR INFO JSON
+parameter `--list-extractors-json` output information from extractor(s) formated as JSON. If some URL(s) are specified, only the extractors matching at list one URL are listed. If none is specified, all extractors are listed. The generic extractor is always the last in the list.  
+
+### List of values returned
+key         | type            | description
+:------------|:----------------|:----------------------------
+index        | int             | index in list, starting from 0  
+name         | string          | name of the extractor
+desc         | string          | description of the extractor
+working      | bool            | true if the extractor is working
+enabled      | bool            | true if the extractor is enabled
+return_type  | string          | type of data returned by the extractor ("video", "playlist", "any", or None)
+regex_urls   | array of string | list of regex used by the extractor to match a given url
+matched_urls | array of string | list of url(s) passed in the command line that matched the given extractor. Present only if URL(s) are specified.
+ 
 # PLUGINS
 
 Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. **Use plugins at your own risk and only if you trust the code!**
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index 31fa1d7b54..87f6faea39 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -140,7 +140,7 @@ def print_extractor_information(opts, urls):
                             'working': ie.working(),
                             'enabled': ie.is_enabled(),
                             'return_type': ie.return_type(),
-                            'regex_url': ie.list_regex_url(),
+                            'regex_urls': ie.list_regex_url(),
                             'matched_urls': matched_urls,
                     }
                     e_index += 1
@@ -154,7 +154,7 @@ def print_extractor_information(opts, urls):
                         'working': ie.working(),
                         'enabled': ie.is_enabled(),
                         'return_type': ie.return_type(),
-                        'regex_url': ie.list_regex_url(),
+                        'regex_urls': ie.list_regex_url(),
                         }
                 dicts.append(data)
                 e_index += 1

From ecab7b56f37a7d1b3837bcd238a48264177f4d10 Mon Sep 17 00:00:00 2001
From: wesson <wesson@vsosoftware.fr>
Date: Wed, 4 Dec 2024 22:41:22 +0100
Subject: [PATCH 3/3] make autopep8 happy

---
 yt_dlp/__init__.py         | 2 +-
 yt_dlp/extractor/common.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index 87f6faea39..b741d429a0 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -142,7 +142,7 @@ def print_extractor_information(opts, urls):
                             'return_type': ie.return_type(),
                             'regex_urls': ie.list_regex_url(),
                             'matched_urls': matched_urls,
-                    }
+                            }
                     e_index += 1
                     dicts.append(data)
         else:
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 85597fe8c6..ca99c75760 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -611,7 +611,6 @@ class InfoExtractor:
         # so that lazy_extractors works correctly
         return cls._match_valid_url(url) is not None
 
-
     @classmethod
     def list_regex_url(cls):
         return cls._VALID_URL if type(cls._VALID_URL) in [list, tuple] \