From bdeb3eb3f29eebbe8237fbc5186e51e7293eea4a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 12 Aug 2025 02:58:22 -0500 Subject: [PATCH 01/11] [pp/XAttrMetadata] Only set "Where From" attribute on macOS (#13999) Fix 3e918d825d7ff367812658957b281b8cda8f9ebb Closes #14004 Authored by: bashonly --- yt_dlp/postprocessor/xattrpp.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py index f879989761..52404b7d7a 100644 --- a/yt_dlp/postprocessor/xattrpp.py +++ b/yt_dlp/postprocessor/xattrpp.py @@ -1,4 +1,5 @@ import os +import sys from .common import PostProcessor from ..utils import ( @@ -54,8 +55,8 @@ class XAttrMetadataPP(PostProcessor): if infoname == 'upload_date': value = hyphenate_date(value) elif xattrname == 'com.apple.metadata:kMDItemWhereFroms': - # NTFS ADS doesn't support colons in names - if os.name == 'nt': + # Colon in xattr name throws errors on Windows/NTFS and Linux + if sys.platform != 'darwin': continue value = self.APPLE_PLIST_TEMPLATE % value write_xattr(info['filepath'], xattrname, value.encode()) From 681ed2153de754c2c885fdad09ab71fffa8114f9 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 12 Aug 2025 18:17:13 -0500 Subject: [PATCH 02/11] [build] Bump PyInstaller version to 6.15.0 for Windows (#14002) Authored by: bashonly --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 810490f735..6679eb0221 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -410,7 +410,7 @@ jobs: run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds python devscripts/install_deps.py -o --include build python devscripts/install_deps.py --include curl-cffi - python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.13.0-py3-none-any.whl" + python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x64/pyinstaller-6.15.0-py3-none-any.whl" - name: Prepare run: | @@ -459,7 +459,7 @@ jobs: run: | python devscripts/install_deps.py -o --include build python devscripts/install_deps.py - python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.13.0-py3-none-any.whl" + python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86/pyinstaller-6.15.0-py3-none-any.whl" - name: Prepare run: | From f2919bd28eac905f1267c62b83738a02bb5b4e04 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 12 Aug 2025 18:24:31 -0500 Subject: [PATCH 03/11] [ie/youtube] Add `es5` and `es6` player JS variants (#14005) Authored by: bashonly --- test/test_youtube_signature.py | 18 ++++++++++++++++++ yt_dlp/extractor/youtube/_video.py | 2 ++ 2 files changed, 20 insertions(+) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 4562467534..684a6175db 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -138,6 +138,16 @@ _SIG_TESTS = [ 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt', 'JC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit0zJAtIEsgOV2SXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-a', ), + ( + 'https://www.youtube.com/s/player/010fbc8d/player_es5.vflset/en_US/base.js', + 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt', + 'ttJC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit2zJAsIEggOVaSXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-', + ), + ( + 'https://www.youtube.com/s/player/010fbc8d/player_es6.vflset/en_US/base.js', + 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt', + 'ttJC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit2zJAsIEggOVaSXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-', + ), ] _NSIG_TESTS = [ @@ -377,6 +387,14 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/ef259203/player_ias_tce.vflset/en_US/base.js', 'rPqBC01nJpqhhi2iA2U', 'hY7dbiKFT51UIA', ), + ( + 'https://www.youtube.com/s/player/010fbc8d/player_es5.vflset/en_US/base.js', + '0hlOAlqjFszVvF4Z', 'R-H23bZGAsRFTg', + ), + ( + 'https://www.youtube.com/s/player/010fbc8d/player_es6.vflset/en_US/base.js', + '0hlOAlqjFszVvF4Z', 'R-H23bZGAsRFTg', + ), ] diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 9ff727657c..14582c5f98 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -1817,6 +1817,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): _PLAYER_JS_VARIANT_MAP = { 'main': 'player_ias.vflset/en_US/base.js', 'tce': 'player_ias_tce.vflset/en_US/base.js', + 'es5': 'player_es5.vflset/en_US/base.js', + 'es6': 'player_es6.vflset/en_US/base.js', 'tv': 'tv-player-ias.vflset/tv-player-ias.js', 'tv_es6': 'tv-player-es6.vflset/tv-player-es6.js', 'phone': 'player-plasma-ias-phone-en_US.vflset/base.js', From aea85d525e1007bb64baec0e170c054292d0858a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 13 Aug 2025 17:02:58 -0500 Subject: [PATCH 04/11] [build] Discontinue `darwin_legacy_exe` support (#13860) * Removes "yt-dlp_macos_legacy" from release assets * Discontinues executable support for macOS < 10.15 Closes #13856 Authored by: bashonly --- .github/workflows/build.yml | 72 +++++-------------------------------- README.md | 1 - test/test_update.py | 16 ++++++--- yt_dlp/update.py | 43 +++++++++++----------- 4 files changed, 40 insertions(+), 92 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6679eb0221..ec5d4020ab 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -21,9 +21,6 @@ on: macos: default: true type: boolean - macos_legacy: - default: true - type: boolean windows: default: true type: boolean @@ -67,10 +64,6 @@ on: description: yt-dlp_macos, yt-dlp_macos.zip default: true type: boolean - macos_legacy: - description: yt-dlp_macos_legacy - default: true - type: boolean windows: description: yt-dlp.exe, yt-dlp_win.zip default: true @@ -344,58 +337,6 @@ jobs: ~/yt-dlp-build-venv key: cache-reqs-${{ github.job }}-${{ github.ref }} - macos_legacy: - needs: process - if: inputs.macos_legacy - runs-on: macos-13 - - steps: - - uses: actions/checkout@v4 - - name: Install Python - # We need the official Python, because the GA ones only support newer macOS versions - env: - PYTHON_VERSION: 3.10.5 - MACOSX_DEPLOYMENT_TARGET: 10.9 # Used up by the Python build tools - run: | - # Hack to get the latest patch version. Uncomment if needed - #brew install python@3.10 - #export PYTHON_VERSION=$( $(brew --prefix)/opt/python@3.10/bin/python3 --version | cut -d ' ' -f 2 ) - curl "https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-macos11.pkg" -o "python.pkg" - sudo installer -pkg python.pkg -target / - python3 --version - - name: Install Requirements - run: | - brew install coreutils - python3 devscripts/install_deps.py --user -o --include build - python3 devscripts/install_deps.py --user --include pyinstaller - - - name: Prepare - run: | - python3 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" - python3 devscripts/make_lazy_extractors.py - - name: Build - run: | - python3 -m bundle.pyinstaller - mv dist/yt-dlp_macos dist/yt-dlp_macos_legacy - - - name: Verify --update-to - if: vars.UPDATE_TO_VERIFICATION - run: | - chmod +x ./dist/yt-dlp_macos_legacy - cp ./dist/yt-dlp_macos_legacy ./dist/yt-dlp_macos_legacy_downgraded - version="$(./dist/yt-dlp_macos_legacy --version)" - ./dist/yt-dlp_macos_legacy_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04 - downgraded_version="$(./dist/yt-dlp_macos_legacy_downgraded --version)" - [[ "$version" != "$downgraded_version" ]] - - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - name: build-bin-${{ github.job }} - path: | - dist/yt-dlp_macos_legacy - compression-level: 0 - windows: needs: process if: inputs.windows @@ -498,7 +439,6 @@ jobs: - linux_static - linux_arm - macos - - macos_legacy - windows - windows32 runs-on: ubuntu-latest @@ -530,27 +470,31 @@ jobs: lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lock 2024.10.22 py2exe .+ lock 2024.10.22 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b - lock 2024.10.22 (?!\w+_exe).+ Python 3\.8 + lock 2024.10.22 zip Python 3\.8 lock 2024.10.22 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) + lock 2025.08.11 darwin_legacy_exe .+ lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6 lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp 2024.10.22 py2exe .+ lockV2 yt-dlp/yt-dlp 2024.10.22 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b - lockV2 yt-dlp/yt-dlp 2024.10.22 (?!\w+_exe).+ Python 3\.8 + lockV2 yt-dlp/yt-dlp 2024.10.22 zip Python 3\.8 lockV2 yt-dlp/yt-dlp 2024.10.22 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) + lockV2 yt-dlp/yt-dlp 2025.08.11 darwin_legacy_exe .+ lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 py2exe .+ lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b - lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 (?!\w+_exe).+ Python 3\.8 + lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 zip Python 3\.8 lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) + lockV2 yt-dlp/yt-dlp-nightly-builds 2025.08.12.233030 darwin_legacy_exe .+ lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.045052 py2exe .+ lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b - lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 (?!\w+_exe).+ Python 3\.8 + lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 zip Python 3\.8 lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) + lockV2 yt-dlp/yt-dlp-master-builds 2025.08.12.232447 darwin_legacy_exe .+ EOF - name: Sign checksum files diff --git a/README.md b/README.md index 9b28147f2c..aa8b1d4f24 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,6 @@ File|Description [yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary [yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update) [yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS (10.15+) executable (no auto-update) -[yt-dlp_macos_legacy](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos_legacy)|MacOS (10.9+) standalone x64 executable #### Misc diff --git a/test/test_update.py b/test/test_update.py index 23c12d38c1..b4979bc92c 100644 --- a/test/test_update.py +++ b/test/test_update.py @@ -84,8 +84,9 @@ lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lock 2024.10.22 py2exe .+ lock 2024.10.22 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b -lock 2024.10.22 (?!\w+_exe).+ Python 3\.8 +lock 2024.10.22 zip Python 3\.8 lock 2024.10.22 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) +lock 2025.08.11 darwin_legacy_exe .+ ''' TEST_LOCKFILE_V2_TMPL = r'''%s @@ -94,20 +95,23 @@ lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp 2024.10.22 py2exe .+ lockV2 yt-dlp/yt-dlp 2024.10.22 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b -lockV2 yt-dlp/yt-dlp 2024.10.22 (?!\w+_exe).+ Python 3\.8 +lockV2 yt-dlp/yt-dlp 2024.10.22 zip Python 3\.8 lockV2 yt-dlp/yt-dlp 2024.10.22 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) +lockV2 yt-dlp/yt-dlp 2025.08.11 darwin_legacy_exe .+ lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 py2exe .+ lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b -lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 (?!\w+_exe).+ Python 3\.8 +lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 zip Python 3\.8 lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) +lockV2 yt-dlp/yt-dlp-nightly-builds 2025.08.12.233030 darwin_legacy_exe .+ lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.045052 py2exe .+ lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b -lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 (?!\w+_exe).+ Python 3\.8 +lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 zip Python 3\.8 lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) +lockV2 yt-dlp/yt-dlp-master-builds 2025.08.12.232447 darwin_legacy_exe .+ ''' TEST_LOCKFILE_V2 = TEST_LOCKFILE_V2_TMPL % TEST_LOCKFILE_COMMENT @@ -217,6 +221,10 @@ class TestUpdate(unittest.TestCase): test( # linux_aarch64_exe w/glibc2.3 should only update to glibc<2.31 lock lockfile, 'linux_aarch64_exe Python 3.8.0 (CPython aarch64 64bit) - Linux-6.5.0-1025-azure-aarch64-with-glibc2.3 (OpenSSL', '2025.01.01', '2024.10.22') + test(lockfile, 'darwin_legacy_exe Python 3.10.5', '2025.08.11', '2025.08.11') + test(lockfile, 'darwin_legacy_exe Python 3.10.5', '2025.08.11', '2025.08.11', exact=True) + test(lockfile, 'darwin_legacy_exe Python 3.10.5', '2025.08.12', '2025.08.11') + test(lockfile, 'darwin_legacy_exe Python 3.10.5', '2025.08.12', None, exact=True) # Forks can block updates to non-numeric tags rather than lock test(TEST_LOCKFILE_FORK, 'zip Python 3.6.3', 'pr0000', None, repo='fork/yt-dlp') diff --git a/yt_dlp/update.py b/yt_dlp/update.py index ca69fbbada..dd948cd521 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -58,26 +58,30 @@ def _get_variant_and_executable_path(): """@returns (variant, executable_path)""" if getattr(sys, 'frozen', False): path = sys.executable + # py2exe is unsupported but we should still correctly identify it for debugging purposes if not hasattr(sys, '_MEIPASS'): return 'py2exe', path - elif sys._MEIPASS == os.path.dirname(path): + if sys._MEIPASS == os.path.dirname(path): return f'{sys.platform}_dir', path - elif sys.platform == 'darwin': + if sys.platform == 'darwin': + # darwin_legacy_exe is no longer supported, but still identify it to block updates machine = '_legacy' if version_tuple(platform.mac_ver()[0]) < (10, 15) else '' - else: - machine = f'_{platform.machine().lower()}' - is_64bits = sys.maxsize > 2**32 - # Ref: https://en.wikipedia.org/wiki/Uname#Examples - if machine[1:] in ('x86', 'x86_64', 'amd64', 'i386', 'i686'): - machine = '_x86' if not is_64bits else '' - # platform.machine() on 32-bit raspbian OS may return 'aarch64', so check "64-bitness" - # See: https://github.com/yt-dlp/yt-dlp/issues/11813 - elif machine[1:] == 'aarch64' and not is_64bits: - machine = '_armv7l' - # sys.executable returns a /tmp/ path for staticx builds (linux_static) - # Ref: https://staticx.readthedocs.io/en/latest/usage.html#run-time-information - if static_exe_path := os.getenv('STATICX_PROG_PATH'): - path = static_exe_path + return f'darwin{machine}_exe', path + + machine = f'_{platform.machine().lower()}' + is_64bits = sys.maxsize > 2**32 + # Ref: https://en.wikipedia.org/wiki/Uname#Examples + if machine[1:] in ('x86', 'x86_64', 'amd64', 'i386', 'i686'): + machine = '_x86' if not is_64bits else '' + # platform.machine() on 32-bit raspbian OS may return 'aarch64', so check "64-bitness" + # See: https://github.com/yt-dlp/yt-dlp/issues/11813 + elif machine[1:] == 'aarch64' and not is_64bits: + machine = '_armv7l' + # sys.executable returns a /tmp/ path for staticx builds (linux_static) + # Ref: https://staticx.readthedocs.io/en/latest/usage.html#run-time-information + if static_exe_path := os.getenv('STATICX_PROG_PATH'): + path = static_exe_path + return f'{remove_end(sys.platform, "32")}{machine}_exe', path path = os.path.dirname(__file__) @@ -111,7 +115,6 @@ _FILE_SUFFIXES = { 'win_exe': '.exe', 'win_x86_exe': '_x86.exe', 'darwin_exe': '_macos', - 'darwin_legacy_exe': '_macos_legacy', 'linux_exe': '_linux', 'linux_aarch64_exe': '_linux_aarch64', 'linux_armv7l_exe': '_linux_armv7l', @@ -147,12 +150,6 @@ def _get_system_deprecation(): STOP_MSG = 'You may stop receiving updates on this version at any time!' variant = detect_variant() - # Temporary until macos_legacy executable builds are discontinued - if variant == 'darwin_legacy_exe': - return EXE_MSG_TMPL.format( - f'{variant} (the PyInstaller-bundled executable for macOS versions older than 10.15)', - 'issues/13856', STOP_MSG) - # Temporary until linux_armv7l executable builds are discontinued if variant == 'linux_armv7l_exe': return EXE_MSG_TMPL.format( From 8e3f8065af1415caeff788c5c430703dd0d8f576 Mon Sep 17 00:00:00 2001 From: "Arseniy D." <110495618+AzartX47@users.noreply.github.com> Date: Fri, 15 Aug 2025 17:07:35 -1000 Subject: [PATCH 05/11] [ie/weibo] Fix extractors (#14012) Closes #14012 Authored by: AzartX47, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/weibo.py | 43 +++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py index 420ac38299..d5210850cc 100644 --- a/yt_dlp/extractor/weibo.py +++ b/yt_dlp/extractor/weibo.py @@ -52,13 +52,16 @@ class WeiboBaseIE(InfoExtractor): '_rand': random.random(), }) - def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs): - # XXX: Always fatal; _download_webpage_handle only returns False (not a tuple) on error - webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs) + def _weibo_download_json(self, url, video_id, note='Downloading JSON metadata', data=None, headers=None, query=None): + headers = { + 'Referer': 'https://weibo.com/', + **(headers or {}), + } + webpage, urlh = self._download_webpage_handle(url, video_id, note=note, data=data, headers=headers, query=query) if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com': self._update_visitor_cookies(urlh.url, video_id) - webpage = self._download_webpage(url, video_id, *args, fatal=fatal, note=note, **kwargs) - return self._parse_json(webpage, video_id, fatal=fatal) + webpage = self._download_webpage(url, video_id, note=note, data=data, headers=headers, query=query) + return self._parse_json(webpage, video_id) def _extract_formats(self, video_info): media_info = traverse_obj(video_info, ('page_info', 'media_info')) @@ -189,7 +192,8 @@ class WeiboIE(WeiboBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - meta = self._weibo_download_json(f'https://weibo.com/ajax/statuses/show?id={video_id}', video_id) + meta = self._weibo_download_json( + 'https://weibo.com/ajax/statuses/show', video_id, query={'id': video_id}) mix_media_info = traverse_obj(meta, ('mix_media_info', 'items', ...)) if not mix_media_info: return self._parse_video_info(meta) @@ -205,7 +209,7 @@ class WeiboIE(WeiboBaseIE): class WeiboVideoIE(WeiboBaseIE): - _VALID_URL = r'https?://(?:www\.)?weibo\.com/tv/show/(?P\d+:\d+)' + _VALID_URL = r'https?://(?:www\.)?weibo\.com/tv/show/(?P\d+:(?:[\da-f]{32}|\d{16,}))' _TESTS = [{ 'url': 'https://weibo.com/tv/show/1034:4797699866951785?from=old_pc_videoshow', 'info_dict': { @@ -227,6 +231,27 @@ class WeiboVideoIE(WeiboBaseIE): 'repost_count': int, '_old_archive_ids': ['weibomobile 4797700463137878'], }, + }, { + 'url': 'https://weibo.com/tv/show/1034:633c288cc043d0ca7808030f1157da64', + 'info_dict': { + 'id': '4189191225395228', + 'ext': 'mp4', + 'display_id': 'FBqgOmDxO', + 'title': '柴犬柴犬的秒拍视频', + 'alt_title': '柴犬柴犬的秒拍视频', + 'description': '午睡当然是要甜甜蜜蜜的啦![坏笑] Instagram:shibainu.gaku http://t.cn/RHbmjzW \u200B\u200B\u200B', + 'uploader': '柴犬柴犬', + 'uploader_id': '5926682210', + 'uploader_url': 'https://weibo.com/u/5926682210', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'duration': 53, + 'thumbnail': 'https://wx1.sinaimg.cn/large/006t5KMygy1fmu31fsqbej30hs0hstav.jpg', + 'timestamp': 1514264429, + 'upload_date': '20171226', + '_old_archive_ids': ['weibomobile 4189191225395228'], + }, }] def _real_extract(self, url): @@ -234,8 +259,8 @@ class WeiboVideoIE(WeiboBaseIE): post_data = f'data={{"Component_Play_Playinfo":{{"oid":"{video_id}"}}}}'.encode() video_info = self._weibo_download_json( - f'https://weibo.com/tv/api/component?page=%2Ftv%2Fshow%2F{video_id.replace(":", "%3A")}', - video_id, headers={'Referer': url}, data=post_data)['data']['Component_Play_Playinfo'] + 'https://weibo.com/tv/api/component', video_id, data=post_data, headers={'Referer': url}, + query={'page': f'/tv/show/{video_id}'})['data']['Component_Play_Playinfo'] return self.url_result(f'https://weibo.com/0/{video_info["mid"]}', WeiboIE) From 770119bdd15c525ba4338503f0eb68ea4baedf10 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Sat, 16 Aug 2025 12:32:21 +0900 Subject: [PATCH 06/11] [ie] Extract avif storyboard formats from MPD manifests (#14016) Authored by: doe1080 --- yt_dlp/extractor/common.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index a2970d0774..e003f6be74 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2968,7 +2968,7 @@ class InfoExtractor: else: codecs = parse_codecs(codec_str) if content_type not in ('video', 'audio', 'text'): - if mime_type == 'image/jpeg': + if mime_type in ('image/avif', 'image/jpeg'): content_type = mime_type elif codecs.get('vcodec', 'none') != 'none': content_type = 'video' @@ -3028,14 +3028,14 @@ class InfoExtractor: 'manifest_url': mpd_url, 'filesize': filesize, } - elif content_type == 'image/jpeg': + elif content_type in ('image/avif', 'image/jpeg'): # See test case in VikiIE # https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1 f = { 'format_id': format_id, 'ext': 'mhtml', 'manifest_url': mpd_url, - 'format_note': 'DASH storyboards (jpeg)', + 'format_note': f'DASH storyboards ({mimetype2ext(mime_type)})', 'acodec': 'none', 'vcodec': 'none', } @@ -3177,7 +3177,7 @@ class InfoExtractor: 'url': mpd_url or base_url, 'fragment_base_url': base_url, 'fragments': [], - 'protocol': 'http_dash_segments' if mime_type != 'image/jpeg' else 'mhtml', + 'protocol': 'mhtml' if mime_type in ('image/avif', 'image/jpeg') else 'http_dash_segments', }) if 'initialization_url' in representation_ms_info: initialization_url = representation_ms_info['initialization_url'] @@ -3192,7 +3192,7 @@ class InfoExtractor: else: # Assuming direct URL to unfragmented media. f['url'] = base_url - if content_type in ('video', 'audio', 'image/jpeg'): + if content_type in ('video', 'audio', 'image/avif', 'image/jpeg'): f['manifest_stream_number'] = stream_numbers[f['url']] stream_numbers[f['url']] += 1 period_entry['formats'].append(f) From 6ae3543d5a1feea0c546571fd2782b024c108eac Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Sat, 16 Aug 2025 13:28:58 +0900 Subject: [PATCH 07/11] [ie] `_rta_search`: Do not assume `age_limit` is `0` (#13985) Authored by: doe1080 --- yt_dlp/extractor/common.py | 4 ++-- yt_dlp/extractor/generic.py | 18 ++---------------- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index e003f6be74..a96fb9c4cb 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1527,11 +1527,11 @@ class InfoExtractor: r'>\s*(?:18\s+U(?:\.S\.C\.|SC)\s+)?(?:§+\s*)?2257\b', ] - age_limit = 0 + age_limit = None for marker in AGE_LIMIT_MARKERS: mobj = re.search(marker, html) if mobj: - age_limit = max(age_limit, int(traverse_obj(mobj, 1, default=18))) + age_limit = max(age_limit or 0, int(traverse_obj(mobj, 1, default=18))) return age_limit def _media_rating_search(self, html): diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index bc34aafa7a..b3a27f31e8 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -121,7 +121,6 @@ class GenericIE(InfoExtractor): 'id': 'cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867', 'ext': 'mp4', 'title': 'čauky lidi 70 finall', - 'age_limit': 0, 'description': 'md5:47b2673a5b76780d9d329783e1fbf5aa', 'direct': True, 'duration': 318.0, @@ -244,7 +243,6 @@ class GenericIE(InfoExtractor): 'id': 'paris-d-moll', 'ext': 'mp4', 'title': 'Paris d-moll', - 'age_limit': 0, 'description': 'md5:319e37ea5542293db37e1e13072fe330', 'thumbnail': r're:https?://www\.filmarkivet\.se/wp-content/uploads/.+\.jpg', }, @@ -255,7 +253,6 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': '60413035', 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans', - 'age_limit': 0, 'description': 'md5:bbb4e12e42e78609a74fd421b93b1239', 'thumbnail': r're:https?://www\.dagbladet\.no/images/.+', }, @@ -267,7 +264,6 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': 'single_clip', 'title': 'Single Clip player examples', - 'age_limit': 0, }, 'playlist_count': 3, }, { @@ -324,7 +320,6 @@ class GenericIE(InfoExtractor): 'id': 'videos-1', 'ext': 'mp4', 'title': 'Videos & Audio - King Machine (1)', - 'age_limit': 0, 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.', 'thumbnail': r're:https?://media\.indiedb\.com/cache/images/.+\.jpg', '_old_archive_ids': ['generic videos'], @@ -363,7 +358,6 @@ class GenericIE(InfoExtractor): 'id': '21217', 'ext': 'mp4', 'title': '40 ночей (2016) - BogMedia.org', - 'age_limit': 0, 'description': 'md5:4e6d7d622636eb7948275432eb256dc3', 'display_id': '40-nochey-2016', 'thumbnail': r're:https?://bogmedia\.org/contents/videos_screenshots/.+\.jpg', @@ -378,7 +372,6 @@ class GenericIE(InfoExtractor): 'id': '18485', 'ext': 'mp4', 'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com', - 'age_limit': 0, 'display_id': 'leningrad-zoj', 'thumbnail': r're:https?://youix\.com/contents/videos_screenshots/.+\.jpg', }, @@ -419,7 +412,6 @@ class GenericIE(InfoExtractor): 'id': '105', 'ext': 'mp4', 'title': 'Kelis - 4th Of July / Embed Player', - 'age_limit': 0, 'display_id': 'kelis-4th-of-july', 'thumbnail': r're:https?://www\.kvs-demo\.com/contents/videos_screenshots/.+\.jpg', }, @@ -430,9 +422,8 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': 'beltzlaw-1', 'ext': 'mp4', - 'title': 'Beltz Law Group | Dallas Traffic Ticket, Accident & Criminal Attorney (1)', - 'age_limit': 0, - 'description': 'md5:5bdf23fcb76801dc3b31e74cabf82147', + 'title': str, + 'description': str, 'thumbnail': r're:https?://beltzlaw\.com/wp-content/uploads/.+\.jpg', 'timestamp': int, # varies 'upload_date': str, @@ -447,7 +438,6 @@ class GenericIE(InfoExtractor): 'id': 'cine-1', 'ext': 'webm', 'title': 'CINE.AR (1)', - 'age_limit': 0, 'description': 'md5:a4e58f9e2291c940e485f34251898c4a', 'thumbnail': r're:https?://cine\.ar/img/.+\.png', '_old_archive_ids': ['generic cine'], @@ -461,7 +451,6 @@ class GenericIE(InfoExtractor): 'id': 'ipy2AcGL', 'ext': 'mp4', 'title': 'Hoe een bladvlo dit verwoestende Japanse onkruid moet vernietigen', - 'age_limit': 0, 'description': 'md5:6a9d644bab0dc2dc06849c2505d8383d', 'duration': 111.0, 'thumbnail': r're:https?://images\.nu\.nl/.+\.jpg', @@ -477,7 +466,6 @@ class GenericIE(InfoExtractor): 'id': 'porsche-911-gt3-rs-rij-impressie-2', 'ext': 'mp4', 'title': 'Test: Porsche 911 GT3 RS - AutoWeek', - 'age_limit': 0, 'description': 'md5:a17b5bd84288448d8f11b838505718fc', 'direct': True, 'thumbnail': r're:https?://images\.autoweek\.nl/.+', @@ -493,7 +481,6 @@ class GenericIE(InfoExtractor): 'id': 'k6gl2kt2eq', 'ext': 'mp4', 'title': 'Breezy HR\'s ATS helps you find & hire employees sooner', - 'age_limit': 0, 'average_rating': 4.5, 'description': 'md5:eee75fdd3044c538003f3be327ba01e1', 'duration': 60.1, @@ -509,7 +496,6 @@ class GenericIE(InfoExtractor): 'id': 'videojs_hls_test', 'ext': 'mp4', 'title': 'video', - 'age_limit': 0, 'duration': 1800, }, 'params': {'skip_download': 'm3u8'}, From 70f56699515e0854a4853d214dce11b61d432387 Mon Sep 17 00:00:00 2001 From: sepro Date: Sun, 17 Aug 2025 00:35:46 +0200 Subject: [PATCH 08/11] Warn against using `-f mp4` (#13915) Authored by: seproDev --- yt_dlp/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index bc9384b085..3277cbfa1a 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -500,6 +500,14 @@ def validate_options(opts): 'To let yt-dlp download and merge the best available formats, simply do not pass any format selection', 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning'))) + # Common mistake: -f mp4 + if opts.format == 'mp4': + warnings.append('.\n '.join(( + '"-f mp4" selects the best pre-merged mp4 format which is often not what\'s intended', + 'Pre-merged mp4 formats are not available from all sites, or may only be available in lower quality', + 'To prioritize the best h264 video and aac audio in an mp4 container, use "-t mp4" instead', + 'If you know what you are doing and want a pre-merged mp4 format, use "-f b[ext=mp4]" instead to suppress this warning'))) + # --(postprocessor/downloader)-args without name def report_args_compat(name, value, key1, key2=None, where=None): if key1 in value and key2 not in value: From 8a8861d53864c8a38e924bc0657ead5180f17268 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 16 Aug 2025 17:55:21 -0500 Subject: [PATCH 09/11] [ie/youtube:tab] Fix playlists tab extraction (#14030) Closes #14028 Authored by: bashonly --- yt_dlp/extractor/youtube/_tab.py | 55 ++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/yt_dlp/extractor/youtube/_tab.py b/yt_dlp/extractor/youtube/_tab.py index 226e5ede3b..5870786978 100644 --- a/yt_dlp/extractor/youtube/_tab.py +++ b/yt_dlp/extractor/youtube/_tab.py @@ -566,6 +566,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): 'gridContinuation': (self._grid_entries, None), 'itemSectionContinuation': (self._post_thread_continuation_entries, None), 'sectionListContinuation': (extract_entries, None), # for feeds + 'lockupViewModel': (self._grid_entries, 'items'), # for playlists tab } continuation_items = traverse_obj(response, ( @@ -1026,7 +1027,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'info_dict': { 'id': 'UCqj7Cz7revf5maW9g5pgNcg', 'title': 'Igor Kleiner - Playlists', - 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a', + 'description': r're:(?s)Добро пожаловать на мой канал! Здесь вы найдете видео .{504}/a1/50b/10a$', 'uploader': 'Igor Kleiner ', 'uploader_id': '@IgorDataScience', 'uploader_url': 'https://www.youtube.com/@IgorDataScience', @@ -1043,7 +1044,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'info_dict': { 'id': 'UCqj7Cz7revf5maW9g5pgNcg', 'title': 'Igor Kleiner - Playlists', - 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a', + 'description': r're:(?s)Добро пожаловать на мой канал! Здесь вы найдете видео .{504}/a1/50b/10a$', 'uploader': 'Igor Kleiner ', 'uploader_id': '@IgorDataScience', 'uploader_url': 'https://www.youtube.com/@IgorDataScience', @@ -1093,7 +1094,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists', 'only_matching': True, }, { - # TODO: fix availability extraction + # TODO: fix availability and view_count extraction 'note': 'basic, single video playlist', 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU', 'info_dict': { @@ -1215,6 +1216,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader': 'lex will', }, 'playlist_mincount': 18, + 'skip': 'This Community isn\'t available', }, { # TODO: fix channel_is_verified extraction 'note': 'Search tab', @@ -1399,7 +1401,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): }, { 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live', 'info_dict': { - 'id': 'YDvsBbKfLPA', # This will keep changing + 'id': 'VFGoUmo74wE', # This will keep changing 'ext': 'mp4', 'title': str, 'upload_date': r're:\d{8}', @@ -1578,6 +1580,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'playlist_count': 50, 'expected_warnings': ['YouTube Music is not directly supported'], }, { + # TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test 'note': 'unlisted single video playlist', 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_', 'info_dict': { @@ -1597,19 +1600,19 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): }, 'playlist': [{ 'info_dict': { - 'title': 'youtube-dl test video "\'/\\ä↭𝕐', - 'id': 'BaW_jenozKc', + 'title': 'Big Buck Bunny 60fps 4K - Official Blender Foundation Short Film', + 'id': 'aqz-KE-bpKQ', '_type': 'url', 'ie_key': 'Youtube', - 'duration': 10, - 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q', - 'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q', + 'duration': 635, + 'channel_id': 'UCSMOQeBJ2RAnuFungnQOxLg', + 'channel_url': 'https://www.youtube.com/channel/UCSMOQeBJ2RAnuFungnQOxLg', 'view_count': int, - 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc', - 'channel': 'Philipp Hagemeister', - 'uploader_id': '@PhilippHagemeister', - 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister', - 'uploader': 'Philipp Hagemeister', + 'url': 'https://www.youtube.com/watch?v=aqz-KE-bpKQ', + 'channel': 'Blender', + 'uploader_id': '@BlenderOfficial', + 'uploader_url': 'https://www.youtube.com/@BlenderOfficial', + 'uploader': 'Blender', }, }], 'playlist_count': 1, @@ -1675,7 +1678,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ', 'only_matching': True, }, { - # TODO: fix metadata extraction 'note': 'collaborative playlist (uploader name in the form "by and x other(s)")', 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', 'info_dict': { @@ -1694,6 +1696,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader': 'pukkandan', }, 'playlist_mincount': 2, + 'skip': 'https://github.com/yt-dlp/yt-dlp/issues/13690', }, { 'note': 'translated tab name', 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists', @@ -1801,7 +1804,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'title': 'Not Just Bikes - Shorts', 'tags': 'count:10', 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A', - 'description': 'md5:1d9fc1bad7f13a487299d1fe1712e031', + 'description': 'md5:295758591d0d43d8594277be54584da7', 'channel_follower_count': int, 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A', 'channel': 'Not Just Bikes', @@ -1822,7 +1825,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig', 'channel': '中村悠一', 'channel_follower_count': int, - 'description': 'md5:e8fd705073a594f27d6d6d020da560dc', + 'description': 'md5:76b312b48a26c3b0e4d90e2dfc1b417d', 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura', 'uploader_id': '@Yuichi-Nakamura', 'uploader': '中村悠一', @@ -1865,12 +1868,13 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'tags': [], }, 'playlist_mincount': 30, + 'skip': 'The channel/playlist does not exist and the URL redirected to youtube.com home page', }, { # Trending Gaming Tab. tab id is empty 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D', 'info_dict': { 'id': 'trending', - 'title': 'trending - Gaming', + 'title': 'trending', 'tags': [], }, 'playlist_mincount': 30, @@ -2018,7 +2022,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader': 'A Himitsu', 'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A', 'tags': 'count:12', - 'description': 'I make music', + 'description': 'Music producer, sometimes.', 'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A', 'channel_follower_count': int, 'channel_is_verified': True, @@ -2304,19 +2308,20 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): ) IE_NAME = 'youtube:playlist' _TESTS = [{ + # TODO: fix availability extraction 'note': 'issue #673', 'url': 'PLBB231211A4F62143', 'info_dict': { - 'title': '[OLD]Team Fortress 2 (Class-based LP)', + 'title': 'Team Fortress 2 [2010 Version]', 'id': 'PLBB231211A4F62143', - 'uploader': 'Wickman', - 'uploader_id': '@WickmanVT', + 'uploader': 'Wickman Wish', + 'uploader_id': '@WickmanWish', 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2', 'view_count': int, - 'uploader_url': 'https://www.youtube.com/@WickmanVT', + 'uploader_url': 'https://www.youtube.com/@WickmanWish', 'modified_date': r're:\d{8}', 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q', - 'channel': 'Wickman', + 'channel': 'Wickman Wish', 'tags': [], 'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q', 'availability': 'public', @@ -2331,6 +2336,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'playlist_count': 2, 'skip': 'This playlist is private', }, { + # TODO: fix availability extraction 'note': 'embedded', 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', 'playlist_count': 4, @@ -2351,6 +2357,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): }, 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'], }, { + # TODO: fix availability extraction 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', 'playlist_mincount': 455, 'info_dict': { From edf55e81842fcfa6c302528d7f33ccd5081b37ef Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 16 Aug 2025 17:57:14 -0500 Subject: [PATCH 10/11] [ie/tiktok:user] Avoid infinite loop during extraction (#14032) Closes #14031 Authored by: bashonly --- yt_dlp/extractor/tiktok.py | 87 ++++++++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 32 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index d9280cec14..18407a0820 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -65,7 +65,7 @@ class TikTokBaseIE(InfoExtractor): @functools.cached_property def _DEVICE_ID(self): - return self._KNOWN_DEVICE_ID or str(random.randint(7250000000000000000, 7351147085025500000)) + return self._KNOWN_DEVICE_ID or str(random.randint(7250000000000000000, 7325099899999994577)) @functools.cached_property def _API_HOSTNAME(self): @@ -942,7 +942,6 @@ class TikTokUserIE(TikTokBaseIE): 'id': 'MS4wLjABAAAAM3R2BtjzVT-uAtstkl2iugMzC6AtnpkojJbjiOdDDrdsTiTR75-8lyWJCY5VvDrZ', }, }] - _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0' _API_BASE_URL = 'https://www.tiktok.com/api/creator/item_list/' def _build_web_query(self, sec_uid, cursor): @@ -986,9 +985,23 @@ class TikTokUserIE(TikTokBaseIE): cursor = int(time.time() * 1E3) for page in itertools.count(1): - response = self._download_json( - self._API_BASE_URL, display_id, f'Downloading page {page}', - query=self._build_web_query(sec_uid, cursor), headers={'User-Agent': self._USER_AGENT}) + for retry in self.RetryManager(): + response = self._download_json( + self._API_BASE_URL, display_id, f'Downloading page {page}', + query=self._build_web_query(sec_uid, cursor)) + + # Avoid infinite loop caused by bad device_id + # See: https://github.com/yt-dlp/yt-dlp/issues/14031 + current_batch = sorted(traverse_obj(response, ('itemList', ..., 'id', {str}))) + if current_batch and current_batch == sorted(seen_ids): + message = 'TikTok API keeps sending the same page' + if self._KNOWN_DEVICE_ID: + raise ExtractorError( + f'{message}. Try again with a different device_id', expected=True) + # The user didn't pass a device_id so we can reset it and retry + del self._DEVICE_ID + retry.error = ExtractorError( + f'{message}. Taking measures to avoid an infinite loop', expected=True) for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])): video_id = video['id'] @@ -1008,42 +1021,52 @@ class TikTokUserIE(TikTokBaseIE): cursor = old_cursor - 7 * 86_400_000 # In case 'hasMorePrevious' is wrong, break if we have gone back before TikTok existed if cursor < 1472706000000 or not traverse_obj(response, 'hasMorePrevious'): - break + return - def _get_sec_uid(self, user_url, user_name, msg): + # User directly passed sec_uid via prefix URL, bypassing our private account detection + if not user_name and not seen_ids: + self.raise_login_required( + 'This user\'s account is likely private. Log into an account that has access') + + def _extract_sec_uid_from_embed(self, user_name): webpage = self._download_webpage( - user_url, user_name, fatal=False, headers={'User-Agent': 'Mozilla/5.0'}, - note=f'Downloading {msg} webpage', errnote=f'Unable to download {msg} webpage') or '' - return (traverse_obj(self._get_universal_data(webpage, user_name), - ('webapp.user-detail', 'userInfo', 'user', 'secUid', {str})) - or traverse_obj(self._get_sigi_state(webpage, user_name), - ('LiveRoom', 'liveRoomUserInfo', 'user', 'secUid', {str}), - ('UserModule', 'users', ..., 'secUid', {str}, any))) + f'https://www.tiktok.com/embed/@{user_name}', user_name, + 'Downloading user embed page', errnote=False, fatal=False) + if not webpage: + self.report_warning('This user\'s account is either private or has embedding disabled') + return None + + data = traverse_obj(self._search_json( + r']+\bid=[\'"]__FRONTITY_CONNECT_STATE__[\'"][^>]*>', + webpage, 'data', user_name, default={}), + ('source', 'data', f'/embed/@{user_name}', {dict})) + + for aweme_id in traverse_obj(data, ('videoList', ..., 'id', {str})): + webpage_url = self._create_url(user_name, aweme_id) + video_data, _ = self._extract_web_data_and_status(webpage_url, aweme_id, fatal=False) + sec_uid = self._parse_aweme_video_web( + video_data, webpage_url, aweme_id, extract_flat=True).get('channel_id') + if sec_uid: + return sec_uid + + return None def _real_extract(self, url): user_name, sec_uid = self._match_id(url), None if mobj := re.fullmatch(r'MS4wLjABAAAA[\w-]{64}', user_name): user_name, sec_uid = None, mobj.group(0) else: - sec_uid = (self._get_sec_uid(self._UPLOADER_URL_FORMAT % user_name, user_name, 'user') - or self._get_sec_uid(self._UPLOADER_URL_FORMAT % f'{user_name}/live', user_name, 'live')) - - if not sec_uid: webpage = self._download_webpage( - f'https://www.tiktok.com/embed/@{user_name}', user_name, - note='Downloading user embed page', fatal=False) or '' - data = traverse_obj(self._search_json( - r']+\bid=[\'"]__FRONTITY_CONNECT_STATE__[\'"][^>]*>', - webpage, 'data', user_name, default={}), - ('source', 'data', f'/embed/@{user_name}', {dict})) - - for aweme_id in traverse_obj(data, ('videoList', ..., 'id', {str})): - webpage_url = self._create_url(user_name, aweme_id) - video_data, _ = self._extract_web_data_and_status(webpage_url, aweme_id, fatal=False) - sec_uid = self._parse_aweme_video_web( - video_data, webpage_url, aweme_id, extract_flat=True).get('channel_id') - if sec_uid: - break + self._UPLOADER_URL_FORMAT % user_name, user_name, + 'Downloading user webpage', 'Unable to download user webpage', + fatal=False, headers={'User-Agent': 'Mozilla/5.0'}) or '' + detail = traverse_obj( + self._get_universal_data(webpage, user_name), ('webapp.user-detail', {dict})) or {} + if detail.get('statusCode') == 10222: + self.raise_login_required( + 'This user\'s account is private. Log into an account that has access') + sec_uid = traverse_obj(detail, ( + 'userInfo', 'user', 'secUid', {str})) or self._extract_sec_uid_from_embed(user_name) if not sec_uid: raise ExtractorError( From 404bd889d0e0b62ad72b7281e3fefdc0497080b3 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 16 Aug 2025 18:02:04 -0500 Subject: [PATCH 11/11] [ie/weibo] Support more URLs and --no-playlist (#14035) Authored by: bashonly --- yt_dlp/extractor/weibo.py | 66 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py index d5210850cc..dc8a2cd753 100644 --- a/yt_dlp/extractor/weibo.py +++ b/yt_dlp/extractor/weibo.py @@ -8,6 +8,7 @@ from ..utils import ( int_or_none, make_archive_id, mimetype2ext, + parse_qs, parse_resolution, str_or_none, strip_jsonp, @@ -209,7 +210,11 @@ class WeiboIE(WeiboBaseIE): class WeiboVideoIE(WeiboBaseIE): - _VALID_URL = r'https?://(?:www\.)?weibo\.com/tv/show/(?P\d+:(?:[\da-f]{32}|\d{16,}))' + _VIDEO_ID_RE = r'\d+:(?:[\da-f]{32}|\d{16,})' + _VALID_URL = [ + fr'https?://(?:www\.)?weibo\.com/tv/show/(?P{_VIDEO_ID_RE})', + fr'https?://video\.weibo\.com/show/?\?(?:[^#]+&)?fid=(?P{_VIDEO_ID_RE})', + ] _TESTS = [{ 'url': 'https://weibo.com/tv/show/1034:4797699866951785?from=old_pc_videoshow', 'info_dict': { @@ -252,6 +257,28 @@ class WeiboVideoIE(WeiboBaseIE): 'upload_date': '20171226', '_old_archive_ids': ['weibomobile 4189191225395228'], }, + }, { + 'url': 'https://video.weibo.com/show?fid=1034:4967272104787984', + 'info_dict': { + 'id': '4967273022359838', + 'ext': 'mp4', + 'display_id': 'Nse4S9TTU', + 'title': '#张婧仪[超话]#📸#婧仪的相册集#  早收工的一天,小张@张婧仪 变身可可爱爱小导游,来次说走就走的泉州City Walk[举手]', + 'alt_title': '#张婧仪[超话]#📸#婧仪的相册集# \n早收工的一天,小张@张婧仪 变身可可爱爱小导游,来次说走就走的泉州City Walk[举手]', + 'description': '#张婧仪[超话]#📸#婧仪的相册集# \n早收工的一天,小张@张婧仪 变身可可爱爱小导游,来次说走就走的泉州City Walk[举手] http://t.cn/A6WTpbEu \u200B\u200B\u200B', + 'uploader': '张婧仪工作室', + 'uploader_id': '7610808848', + 'uploader_url': 'https://weibo.com/u/7610808848', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'duration': 85, + 'thumbnail': 'https://wx2.sinaimg.cn/orj480/008j4b3qly1hjsce01gnqj30u00gvwf8.jpg', + 'tags': ['婧仪的相册集'], + 'timestamp': 1699773545, + 'upload_date': '20231112', + '_old_archive_ids': ['weibomobile 4967273022359838'], + }, }] def _real_extract(self, url): @@ -275,6 +302,38 @@ class WeiboUserIE(WeiboBaseIE): 'uploader': '萧影殿下', }, 'playlist_mincount': 195, + }, { + 'url': 'https://weibo.com/u/7610808848?tabtype=newVideo&layerid=4967273022359838', + 'info_dict': { + 'id': '7610808848', + 'title': '张婧仪工作室的视频', + 'description': '张婧仪工作室的全部视频', + 'uploader': '张婧仪工作室', + }, + 'playlist_mincount': 61, + }, { + 'url': 'https://weibo.com/u/7610808848?tabtype=newVideo&layerid=4967273022359838', + 'info_dict': { + 'id': '4967273022359838', + 'ext': 'mp4', + 'display_id': 'Nse4S9TTU', + 'title': '#张婧仪[超话]#📸#婧仪的相册集#  早收工的一天,小张@张婧仪 变身可可爱爱小导游,来次说走就走的泉州City Walk[举手]', + 'alt_title': '#张婧仪[超话]#📸#婧仪的相册集# \n早收工的一天,小张@张婧仪 变身可可爱爱小导游,来次说走就走的泉州City Walk[举手]', + 'description': '#张婧仪[超话]#📸#婧仪的相册集# \n早收工的一天,小张@张婧仪 变身可可爱爱小导游,来次说走就走的泉州City Walk[举手] http://t.cn/A6WTpbEu \u200B\u200B\u200B', + 'uploader': '张婧仪工作室', + 'uploader_id': '7610808848', + 'uploader_url': 'https://weibo.com/u/7610808848', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'duration': 85, + 'thumbnail': 'https://wx2.sinaimg.cn/orj480/008j4b3qly1hjsce01gnqj30u00gvwf8.jpg', + 'tags': ['婧仪的相册集'], + 'timestamp': 1699773545, + 'upload_date': '20231112', + '_old_archive_ids': ['weibomobile 4967273022359838'], + }, + 'params': {'noplaylist': True}, }] def _fetch_page(self, uid, cursor=0, page=1): @@ -295,6 +354,11 @@ class WeiboUserIE(WeiboBaseIE): def _real_extract(self, url): uid = self._match_id(url) + params = {k: v[-1] for k, v in parse_qs(url).items()} + video_id = params.get('layerid') if params.get('tabtype') == 'newVideo' else None + if not self._yes_playlist(uid, video_id): + return self.url_result(f'https://weibo.com/{uid}/{video_id}', WeiboIE, video_id) + first_page = self._fetch_page(uid) uploader = traverse_obj(first_page, ('list', ..., 'user', 'screen_name', {str}), get_all=False) metainfo = {