Merge remote-tracking branch 'upstream/master' into feat/youtube/sabr

# Conflicts: # .github/workflows/build.yml
1 week ago · 2b85ffed9d
parent 04493bb8b7 e8d49b1c7f
commit 2b85ffed9d
16 changed files with 114 additions and 58 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -208,7 +208,7 @@ jobs:
            python3.9 -m pip install -U pip wheel 'setuptools>=71.0.2'
            # XXX: Keep this in sync with pyproject.toml (it can't be accessed at this stage) and exclude secretstorage
            python3.9 -m pip install -U Pyinstaller mutagen pycryptodomex brotli certifi cffi \
-              'requests>=2.32.2,<3' 'urllib3>=1.26.17,<3' 'websockets>=13.0' 'protobug==0.3.0'
+              'requests>=2.32.2,<3' 'urllib3>=2.0.2,<3' 'websockets>=13.0' 'protobug==0.3.0'
          run: |
            cd repo
--- a/README.md
+++ b/README.md
@ -171,6 +171,9 @@ yt-dlp --update-to nightly
 python3 -m pip install -U --pre "yt-dlp[default]"
 ```
 When running a yt-dlp version that is older than 90 days, you will see a warning message suggesting to update to the latest version.
 You can suppress this warning by adding `--no-update` to your command or configuration file.
 ## DEPENDENCIES
 Python versions 3.9+ (CPython) and 3.11+ (PyPy) are supported. Other versions and implementations may or may not work correctly.
--- a/devscripts/cli_to_api.py
+++ b/devscripts/cli_to_api.py
@ -20,6 +20,7 @@ def parse_patched_options(opts):
        'fragment_retries': 0,
        'extract_flat': False,
        'concat_playlist': 'never',
        'update_self': False,
    })
    yt_dlp.options.create_parser = lambda: patched_parser
    try:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -51,7 +51,7 @@ default = [
    "mutagen",
    "pycryptodomex",
    "requests>=2.32.2,<3",
-    "urllib3>=1.26.17,<3",
+    "urllib3>=2.0.2,<3",
    "websockets>=13.0",
    "protobug==0.3.0",
 ]
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -73,6 +73,7 @@ from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
 from .update import (
    REPOSITORY,
    _get_system_deprecation,
    _get_outdated_warning,
    _make_label,
    current_git_head,
    detect_variant,
@ -504,6 +505,7 @@ class YoutubeDL:
    force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
    noprogress:        Do not print the progress bar
    live_from_start:   Whether to download livestreams videos from the start
    warn_when_outdated: Emit a warning if the yt-dlp version is older than 90 days
    The following parameters are not used by YoutubeDL itself, they are used by
    the downloader (see yt_dlp/downloader/common.py):
@ -703,6 +705,9 @@ class YoutubeDL:
        system_deprecation = _get_system_deprecation()
        if system_deprecation:
            self.deprecated_feature(system_deprecation.replace('\n', '\n                    '))
        elif self.params.get('warn_when_outdated'):
            if outdated_warning := _get_outdated_warning():
                self.report_warning(outdated_warning)
        if self.params.get('allow_unplayable_formats'):
            self.report_warning(
--- a/yt_dlp/init.py
+++ b/yt_dlp/init.py
@ -971,6 +971,7 @@ def parse_options(argv=None):
        'geo_bypass': opts.geo_bypass,
        'geo_bypass_country': opts.geo_bypass_country,
        'geo_bypass_ip_block': opts.geo_bypass_ip_block,
        'warn_when_outdated': opts.update_self is None,
        '_warnings': warnings,
        '_deprecation_warnings': deprecation_warnings,
        'compat_opts': opts.compat_opts,
--- a/yt_dlp/downloader/dash.py
+++ b/yt_dlp/downloader/dash.py
@ -3,7 +3,7 @@ import urllib.parse
 from . import get_suitable_downloader
 from .fragment import FragmentFD
-from ..utils import update_url_query, urljoin
+from ..utils import ReExtractInfo, update_url_query, urljoin
 class DashSegmentsFD(FragmentFD):
@ -28,6 +28,11 @@ class DashSegmentsFD(FragmentFD):
        requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])]
        args = []
        for fmt in requested_formats or [info_dict]:
            # Re-extract if --load-info-json is used and 'fragments' was originally a generator
            # See https://github.com/yt-dlp/yt-dlp/issues/13906
            if isinstance(fmt['fragments'], str):
                raise ReExtractInfo('the stream needs to be re-extracted', expected=True)
            try:
                fragment_count = 1 if self.params.get('test') else len(fmt['fragments'])
            except TypeError:
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -1866,6 +1866,7 @@ from .shahid import (
 from .sharepoint import SharePointIE
 from .sharevideos import ShareVideosEmbedIE
 from .shemaroome import ShemarooMeIE
 from .shiey import ShieyIE
 from .showroomlive import ShowRoomLiveIE
 from .sibnet import SibnetEmbedIE
 from .simplecast import (
--- a/yt_dlp/extractor/digitalconcerthall.py
+++ b/yt_dlp/extractor/digitalconcerthall.py
@ -4,6 +4,7 @@ from .common import InfoExtractor
 from ..networking.exceptions import HTTPError
 from ..utils import (
    ExtractorError,
    determine_ext,
    jwt_decode_hs256,
    parse_codecs,
    try_get,
@ -222,11 +223,18 @@ class DigitalConcertHallIE(InfoExtractor):
                    raise
            formats = []
-            for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})):
+            for fmt_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})):
-                formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+                ext = determine_ext(fmt_url)
-            for fmt in formats:
+                if ext == 'm3u8':
-                if fmt.get('format_note') and fmt.get('vcodec') == 'none':
+                    fmts = self._extract_m3u8_formats(fmt_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
-                    fmt.update(parse_codecs(fmt['format_note']))
+                    for fmt in fmts:
                        if fmt.get('format_note') and fmt.get('vcodec') == 'none':
                            fmt.update(parse_codecs(fmt['format_note']))
                    formats.extend(fmts)
                elif ext == 'mpd':
                    formats.extend(self._extract_mpd_formats(fmt_url, video_id, mpd_id='dash', fatal=False))
                else:
                    self.report_warning(f'Skipping unsupported format extension "{ext}"')
            yield {
                'id': video_id,
--- a/yt_dlp/extractor/motherless.py
+++ b/yt_dlp/extractor/motherless.py
@ -51,23 +51,7 @@ class MotherlessIE(InfoExtractor):
        'skip': '404',
    }, {
        'url': 'http://motherless.com/g/cosplay/633979F',
-        'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
+        'expected_exception': 'ExtractorError',
        'info_dict': {
            'id': '633979F',
            'ext': 'mp4',
            'title': 'Turtlette',
            'categories': ['superheroine heroine superher'],
            'upload_date': '20140827',
            'uploader_id': 'shade0230',
            'thumbnail': r're:https?://.*\.jpg',
            'age_limit': 18,
            'like_count': int,
            'comment_count': int,
            'view_count': int,
        },
        'params': {
            'nocheckcertificate': True,
        },
    }, {
        'url': 'http://motherless.com/8B4BBC1',
        'info_dict': {
@ -113,8 +97,10 @@ class MotherlessIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
        if any(p in webpage for p in (
-                '<title>404 - MOTHERLESS.COM<',
+            '<title>404 - MOTHERLESS.COM<',
-                ">The page you're looking for cannot be found.<")):
+            ">The page you're looking for cannot be found.<",
            '<div class="error-page',
        )):
            raise ExtractorError(f'Video {video_id} does not exist', expected=True)
        if '>The content you are trying to view is for friends only.' in webpage:
@ -183,6 +169,9 @@ class MotherlessPaginatedIE(InfoExtractor):
    def _correct_path(self, url, item_id):
        raise NotImplementedError('This method must be implemented by subclasses')
    def _correct_title(self, title, /):
        return title.partition(' - Videos')[0] if title else None
    def _extract_entries(self, webpage, base):
        for mobj in re.finditer(r'href="[^"]*(?P<href>/[A-F0-9]+)"\s+title="(?P<title>[^"]+)',
                                webpage):
@ -205,7 +194,7 @@ class MotherlessPaginatedIE(InfoExtractor):
        return self.playlist_result(
            OnDemandPagedList(get_page, self._PAGE_SIZE), item_id,
-            remove_end(self._html_extract_title(webpage), ' | MOTHERLESS.COM ™'))
+            self._correct_title(self._html_extract_title(webpage)))
 class MotherlessGroupIE(MotherlessPaginatedIE):
@ -214,7 +203,7 @@ class MotherlessGroupIE(MotherlessPaginatedIE):
        'url': 'http://motherless.com/gv/movie_scenes',
        'info_dict': {
            'id': 'movie_scenes',
-            'title': 'Movie Scenes - Videos - Hot and sexy scenes from "regular" movies... Beautiful actresses fully',
+            'title': 'Movie Scenes',
        },
        'playlist_mincount': 540,
    }, {
@ -230,7 +219,7 @@ class MotherlessGroupIE(MotherlessPaginatedIE):
            'id': 'beautiful_cock',
            'title': 'Beautiful Cock',
        },
-        'playlist_mincount': 2040,
+        'playlist_mincount': 371,
    }]
    def _correct_path(self, url, item_id):
@ -245,14 +234,14 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
            'id': '338999F',
            'title': 'Random',
        },
-        'playlist_mincount': 171,
+        'playlist_mincount': 100,
    }, {
        'url': 'https://motherless.com/GVABD6213',
        'info_dict': {
            'id': 'ABD6213',
            'title': 'Cuties',
        },
-        'playlist_mincount': 2,
+        'playlist_mincount': 1,
    }, {
        'url': 'https://motherless.com/GVBCF7622',
        'info_dict': {
@ -266,9 +255,12 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
            'id': '035DE2F',
            'title': 'General',
        },
-        'playlist_mincount': 420,
+        'playlist_mincount': 234,
    }]
    def _correct_title(self, title, /):
        return remove_end(title, ' | MOTHERLESS.COM ™')
    def _correct_path(self, url, item_id):
        return urllib.parse.urljoin(url, f'/GV{item_id}')
@ -279,14 +271,14 @@ class MotherlessUploaderIE(MotherlessPaginatedIE):
        'url': 'https://motherless.com/u/Mrgo4hrs2023',
        'info_dict': {
            'id': 'Mrgo4hrs2023',
-            'title': "Mrgo4hrs2023's Uploads - Videos",
+            'title': "Mrgo4hrs2023's Uploads",
        },
        'playlist_mincount': 32,
    }, {
        'url': 'https://motherless.com/u/Happy_couple?t=v',
        'info_dict': {
            'id': 'Happy_couple',
-            'title': "Happy_couple's Uploads - Videos",
+            'title': "Happy_couple's Uploads",
        },
        'playlist_mincount': 8,
    }]
--- a/yt_dlp/extractor/shiey.py
+++ b/yt_dlp/extractor/shiey.py
@ -0,0 +1,34 @@
 import json
 from .common import InfoExtractor
 from .vimeo import VimeoIE
 from ..utils import extract_attributes
 from ..utils.traversal import find_element, traverse_obj
 class ShieyIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?shiey\.com/videos/v/(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'https://www.shiey.com/videos/v/train-journey-to-edge-of-serbia-ep-2',
        'info_dict': {
            'id': '1103409448',
            'ext': 'mp4',
            'title': 'Train Journey To Edge of Serbia (Ep. 2)',
            'uploader': 'shiey',
            'uploader_url': '',
            'duration': 1364,
            'thumbnail': r're:^https?://.+',
        },
        'params': {'skip_download': True},
        'expected_warnings': ['Failed to parse XML: not well-formed'],
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        oembed_html = traverse_obj(webpage, (
            {find_element(attr='data-controller', value='VideoEmbed', html=True)},
            {extract_attributes}, 'data-config-embed-video', {json.loads}, 'oembedHtml', {str}))
        return self.url_result(VimeoIE._extract_url(url, oembed_html), VimeoIE)
--- a/yt_dlp/extractor/yandexdisk.py
+++ b/yt_dlp/extractor/yandexdisk.py
@ -16,7 +16,7 @@ class YandexDiskIE(InfoExtractor):
    _VALID_URL = r'''(?x)https?://
        (?P<domain>
            yadi\.sk|
-            disk\.yandex\.
+            disk\.(?:360\.)?yandex\.
                (?:
                    az|
                    by|
@ -51,6 +51,9 @@ class YandexDiskIE(InfoExtractor):
    }, {
        'url': 'https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6%2B%2FuhNqk38%3D',
        'only_matching': True,
    }, {
        'url': 'https://disk.360.yandex.ru/i/TM2xsIVsgjY4uw',
        'only_matching': True,
    }]
    def _real_extract(self, url):
--- a/yt_dlp/extractor/youtube/_base.py
+++ b/yt_dlp/extractor/youtube/_base.py
@ -311,6 +311,7 @@ INNERTUBE_CLIENTS = {
                'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
            },
        },
        'PLAYER_PARAMS': '8AEB',
        'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
        'GVS_PO_TOKEN_POLICY': {
            StreamingProtocol.HTTPS: GvsPoTokenPolicy(
--- a/yt_dlp/networking/_requests.py
+++ b/yt_dlp/networking/_requests.py
@ -1,6 +1,5 @@
 from __future__ import annotations
 import contextlib
 import functools
 import http.client
 import logging
@ -20,9 +19,9 @@ if urllib3 is None:
 urllib3_version = tuple(int_or_none(x, default=0) for x in urllib3.__version__.split('.'))
-if urllib3_version < (1, 26, 17):
+if urllib3_version < (2, 0, 2):
    urllib3._yt_dlp__version = f'{urllib3.__version__} (unsupported)'
-    raise ImportError('Only urllib3 >= 1.26.17 is supported')
+    raise ImportError('Only urllib3 >= 2.0.2 is supported')
 if requests.__build__ < 0x023202:
    requests._yt_dlp__version = f'{requests.__version__} (unsupported)'
@ -101,27 +100,10 @@ class Urllib3PercentREOverride:
 # https://github.com/urllib3/urllib3/commit/a2697e7c6b275f05879b60f593c5854a816489f0
 import urllib3.util.url
-if hasattr(urllib3.util.url, 'PERCENT_RE'):
+if hasattr(urllib3.util.url, '_PERCENT_RE'):  # was 'PERCENT_RE' in urllib3 < 2.0.0
    urllib3.util.url.PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url.PERCENT_RE)
 elif hasattr(urllib3.util.url, '_PERCENT_RE'):  # urllib3 >= 2.0.0
    urllib3.util.url._PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url._PERCENT_RE)
 else:
-    warnings.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message())
+    warnings.warn('Failed to patch _PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message())
 '''
 Workaround for issue in urllib.util.ssl_.py: ssl_wrap_context does not pass
 server_hostname to SSLContext.wrap_socket if server_hostname is an IP,
 however this is an issue because we set check_hostname to True in our SSLContext.
 Monkey-patching IS_SECURETRANSPORT forces ssl_wrap_context to pass server_hostname regardless.
 This has been fixed in urllib3 2.0+.
 See: https://github.com/urllib3/urllib3/issues/517
 '''
 if urllib3_version < (2, 0, 0):
    with contextlib.suppress(Exception):
        urllib3.util.IS_SECURETRANSPORT = urllib3.util.ssl_.IS_SECURETRANSPORT = True
 # Requests will not automatically handle no_proxy by default
--- a/yt_dlp/postprocessor/xattrpp.py
+++ b/yt_dlp/postprocessor/xattrpp.py
@ -54,6 +54,9 @@ class XAttrMetadataPP(PostProcessor):
                    if infoname == 'upload_date':
                        value = hyphenate_date(value)
                    elif xattrname == 'com.apple.metadata:kMDItemWhereFroms':
                        # NTFS ADS doesn't support colons in names
                        if os.name == 'nt':
                            continue
                        value = self.APPLE_PLIST_TEMPLATE % value
                    write_xattr(info['filepath'], xattrname, value.encode())
--- a/yt_dlp/update.py
+++ b/yt_dlp/update.py
@ -2,6 +2,7 @@ from __future__ import annotations
 import atexit
 import contextlib
 import datetime as dt
 import functools
 import hashlib
 import json
@ -171,6 +172,22 @@ def _get_system_deprecation():
    return f'Support for Python version {major}.{minor} has been deprecated. {PYTHON_MSG}'
 def _get_outdated_warning():
    # Only yt-dlp guarantees a stable release at least every 90 days
    if not ORIGIN.startswith('yt-dlp/'):
        return None
    with contextlib.suppress(Exception):
        last_updated = dt.date(*version_tuple(__version__)[:3])
        if last_updated < dt.datetime.now(dt.timezone.utc).date() - dt.timedelta(days=90):
            return ('\n         '.join((
                f'Your yt-dlp version ({__version__}) is older than 90 days!',
                'It is strongly recommended to always use the latest version.',
                f'{is_non_updateable() or """Run "yt-dlp --update" or "yt-dlp -U" to update"""}.',
                'To suppress this warning, add --no-update to your command/config.')))
    return None
 def _sha256_file(path):
    h = hashlib.sha256()
    mv = memoryview(bytearray(128 * 1024))