diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8012ebb8cd..e523154c41 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -242,7 +242,7 @@ jobs: permissions: contents: read actions: write # For cleaning up cache - runs-on: macos-13 + runs-on: macos-14 steps: - uses: actions/checkout@v4 @@ -261,6 +261,8 @@ jobs: - name: Install Requirements run: | brew install coreutils + # We need to use system Python in order to roll our own universal2 curl_cffi wheel + brew uninstall --ignore-dependencies python3 python3 -m venv ~/yt-dlp-build-venv source ~/yt-dlp-build-venv/bin/activate python3 devscripts/install_deps.py -o --include build diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index dd2c6f481e..86036989c0 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -37,7 +37,7 @@ jobs: matrix: os: [ubuntu-latest] # CPython 3.9 is in quick-test - python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] + python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.11] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest @@ -49,7 +49,7 @@ jobs: - os: windows-latest python-version: '3.13' - os: windows-latest - python-version: pypy-3.10 + python-version: pypy-3.11 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml index 6849fba9b6..594a664c9c 100644 --- a/.github/workflows/download.yml +++ b/.github/workflows/download.yml @@ -28,13 +28,13 @@ jobs: fail-fast: true matrix: os: [ubuntu-latest] - python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] + python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.11] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest python-version: '3.9' - os: windows-latest - python-version: pypy-3.10 + python-version: pypy-3.11 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/signature-tests.yml b/.github/workflows/signature-tests.yml index 203172e0b9..42c65db353 100644 --- a/.github/workflows/signature-tests.yml +++ b/.github/workflows/signature-tests.yml @@ -25,7 +25,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest] - python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.10, pypy-3.11] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.11] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fd7b0f1210..8822907b79 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -126,7 +126,7 @@ By sharing an account with anyone, you agree to bear all risks associated with i While these steps won't necessarily ensure that no misuse of the account takes place, these are still some good practices to follow. - Look for people with `Member` (maintainers of the project) or `Contributor` (people who have previously contributed code) tag on their messages. -- Change the password before sharing the account to something random (use [this](https://passwordsgenerator.net/) if you don't have a random password generator). +- Change the password before sharing the account to something random. - Change the password after receiving the account back. ### Is the website primarily used for piracy? @@ -272,7 +272,7 @@ After you have ensured this site is distributing its content legally, you can fo You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`). -1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.10. Backward compatibility is not required for even older versions of Python. +1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.11. Backward compatibility is not required for even older versions of Python. 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: ```shell diff --git a/CONTRIBUTORS b/CONTRIBUTORS index ba23b66dc5..f20b4ce172 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -784,3 +784,12 @@ eason1478 ceandreasen chauhantirth helpimnotdrowning +adamralph +averageFOSSenjoyer +bubo +flanter21 +Georift +moonshinerd +R0hanW +ShockedPlot7560 +swayll diff --git a/Changelog.md b/Changelog.md index 5a5c18cf34..7205b95aa3 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,97 @@ # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2025.07.21 + +#### Important changes +- **Default behaviour changed from `--mtime` to `--no-mtime`** +yt-dlp no longer applies the server modified time to downloaded files by default. [Read more](https://github.com/yt-dlp/yt-dlp/issues/12780) +- Security: [[CVE-2025-54072](https://nvd.nist.gov/vuln/detail/CVE-2025-54072)] [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-45hg-7f49-5h56) + - When `--exec` is used on Windows, the filepath expanded from `{}` (or the default placeholder) is now properly escaped + +#### Core changes +- [Allow extractors to designate formats/subtitles for impersonation](https://github.com/yt-dlp/yt-dlp/commit/32809eb2da92c649e540a5b714f6235036026161) ([#13778](https://github.com/yt-dlp/yt-dlp/issues/13778)) by [bashonly](https://github.com/bashonly) (With fixes in [3e49bc8](https://github.com/yt-dlp/yt-dlp/commit/3e49bc8a1bdb4109b857f2c361c358e86fa63405), [2ac3eb9](https://github.com/yt-dlp/yt-dlp/commit/2ac3eb98373d1c31341c5e918c83872c7ff409c6)) +- [Don't let format testing alter the return code](https://github.com/yt-dlp/yt-dlp/commit/4919051e447c7f8ae9df8ba5c4208b6b5c04915a) ([#13767](https://github.com/yt-dlp/yt-dlp/issues/13767)) by [bashonly](https://github.com/bashonly) +- [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/commit/959ac99e98c3215437e573c22d64be42d361e863) by [Grub4K](https://github.com/Grub4K) +- [No longer enable `--mtime` by default](https://github.com/yt-dlp/yt-dlp/commit/f3008bc5f89d2691f2f8dfc51b406ef4e25281c3) ([#12781](https://github.com/yt-dlp/yt-dlp/issues/12781)) by [seproDev](https://github.com/seproDev) +- [Warn when skipping formats](https://github.com/yt-dlp/yt-dlp/commit/1f27a9f8baccb9105f2476154557540efe09a937) ([#13090](https://github.com/yt-dlp/yt-dlp/issues/13090)) by [bashonly](https://github.com/bashonly) +- **jsinterp** + - [Cache undefined variable names](https://github.com/yt-dlp/yt-dlp/commit/b342d27f3f82d913976509ddf5bff539ad8567ec) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly) (With fixes in [805519b](https://github.com/yt-dlp/yt-dlp/commit/805519bfaa7cb5443912dfe45ac774834ba65a16)) + - [Fix variable scoping](https://github.com/yt-dlp/yt-dlp/commit/b6328ca05030d815222b25d208cc59a964623bf9) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) +- **utils** + - `mimetype2ext`: [Always parse `flac` from `audio/flac`](https://github.com/yt-dlp/yt-dlp/commit/b8abd255e454acbe0023cdb946f9eb461ced7eeb) ([#13748](https://github.com/yt-dlp/yt-dlp/issues/13748)) by [bashonly](https://github.com/bashonly) + - `unified_timestamp`: [Return `int` values](https://github.com/yt-dlp/yt-dlp/commit/6be26626f7cfa71d28e0fac2861eb04758810c5d) ([#13796](https://github.com/yt-dlp/yt-dlp/issues/13796)) by [doe1080](https://github.com/doe1080) + - `urlhandle_detect_ext`: [Use `x-amz-meta-file-type` headers](https://github.com/yt-dlp/yt-dlp/commit/28bf46b7dafe2e241137763bf570a2f91ba8a53a) ([#13749](https://github.com/yt-dlp/yt-dlp/issues/13749)) by [bashonly](https://github.com/bashonly) + +#### Extractor changes +- [Add `_search_nextjs_v13_data` helper](https://github.com/yt-dlp/yt-dlp/commit/5245231e4a39ecd5595d4337d46d85e150e2430a) ([#13398](https://github.com/yt-dlp/yt-dlp/issues/13398)) by [bashonly](https://github.com/bashonly) (With fixes in [b5fea53](https://github.com/yt-dlp/yt-dlp/commit/b5fea53f2099bed41ba1b17ab0ac87c8dba5a5ec)) +- [Detect invalid m3u8 playlist data](https://github.com/yt-dlp/yt-dlp/commit/e99c0b838a9c5feb40c0dcd291bd7b8620b8d36d) ([#13601](https://github.com/yt-dlp/yt-dlp/issues/13601)) by [Grub4K](https://github.com/Grub4K) +- **10play**: [Support new site domain](https://github.com/yt-dlp/yt-dlp/commit/790c286ce3e0b534ca2d8f6648ced220d888f139) ([#13611](https://github.com/yt-dlp/yt-dlp/issues/13611)) by [Georift](https://github.com/Georift) +- **9gag**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/0b359b184dee0c7052be482857bf562de67e4928) ([#13678](https://github.com/yt-dlp/yt-dlp/issues/13678)) by [bashonly](https://github.com/bashonly) +- **aenetworks**: [Support new URL formats](https://github.com/yt-dlp/yt-dlp/commit/5f951ce929b56a822514f1a02cc06af030855ec7) ([#13747](https://github.com/yt-dlp/yt-dlp/issues/13747)) by [bashonly](https://github.com/bashonly) +- **archive.org**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d42a6ff0c4ca8893d722ff4e0c109aecbf4cc7cf) ([#13706](https://github.com/yt-dlp/yt-dlp/issues/13706)) by [rdamas](https://github.com/rdamas) +- **bandaichannel**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/23e9389f936ec5236a87815b8576e5ce567b2f77) ([#13152](https://github.com/yt-dlp/yt-dlp/issues/13152)) by [doe1080](https://github.com/doe1080) +- **bandcamp**: [Extract tags](https://github.com/yt-dlp/yt-dlp/commit/f9dff95cb1c138913011417b3bba020c0a691bba) ([#13480](https://github.com/yt-dlp/yt-dlp/issues/13480)) by [WouterGordts](https://github.com/WouterGordts) +- **bellmedia**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/6fb3947c0dc6d0e3eab5077c5bada8402f47a277) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080) +- **bilibili**: [Pass newer user-agent with API requests](https://github.com/yt-dlp/yt-dlp/commit/d3edc5d52a7159eda2331dbc7e14bf40a6585c81) ([#13736](https://github.com/yt-dlp/yt-dlp/issues/13736)) by [c-basalt](https://github.com/c-basalt) +- **bilibilibangumi** + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b15aa8d77257b86fa44c9a42a615dfe47ac5b3b7) ([#13800](https://github.com/yt-dlp/yt-dlp/issues/13800)) by [bashonly](https://github.com/bashonly) + - [Fix geo-block detection](https://github.com/yt-dlp/yt-dlp/commit/884f35d54a64f1e6e7be49459842f573fc3a2701) ([#13667](https://github.com/yt-dlp/yt-dlp/issues/13667)) by [bashonly](https://github.com/bashonly) +- **blackboardcollaborate**: [Support subtitles and authwalled videos](https://github.com/yt-dlp/yt-dlp/commit/dcc4cba39e2a79d3efce16afa28dbe245468489f) ([#12473](https://github.com/yt-dlp/yt-dlp/issues/12473)) by [flanter21](https://github.com/flanter21) +- **btvplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3ae61e0f313dd03a09060abc7a212775c3717818) ([#13541](https://github.com/yt-dlp/yt-dlp/issues/13541)) by [bubo](https://github.com/bubo) +- **ctv**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/9f54ea38984788811773ca2ceaca73864acf0e8a) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080) +- **dangalplay**: [Support other login regions](https://github.com/yt-dlp/yt-dlp/commit/09982bc33e2f1f9a1ff66e6738df44f15b36f6a6) ([#13768](https://github.com/yt-dlp/yt-dlp/issues/13768)) by [bashonly](https://github.com/bashonly) +- **francetv**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/ade876efb31d55d3394185ffc56942fdc8d325cc) ([#13726](https://github.com/yt-dlp/yt-dlp/issues/13726)) by [bashonly](https://github.com/bashonly) +- **hotstar** + - [Fix support for free accounts](https://github.com/yt-dlp/yt-dlp/commit/07d1d85f6387e4bdb107096f0131c7054f078bb9) ([#13700](https://github.com/yt-dlp/yt-dlp/issues/13700)) by [chauhantirth](https://github.com/chauhantirth) + - [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/7e0af2b1f0c3edb688603b022f3a9ca0bfdf75e9) ([#13727](https://github.com/yt-dlp/yt-dlp/issues/13727)) by [bashonly](https://github.com/bashonly) (With fixes in [ef103b2](https://github.com/yt-dlp/yt-dlp/commit/ef103b2d115bd0e880f9cfd2f7dd705f48e4b40d)) +- **joqrag**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/6d39c420f7774562a106d90253e2ed5b75036321) ([#13152](https://github.com/yt-dlp/yt-dlp/issues/13152)) by [doe1080](https://github.com/doe1080) +- **limelight**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/5d693446e882931618c40c99bb593f0b87b30eb9) ([#13267](https://github.com/yt-dlp/yt-dlp/issues/13267)) by [doe1080](https://github.com/doe1080) +- **lrtradio**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b4b4486effdcb96bb6b8148171a49ff579b69a4a) ([#13717](https://github.com/yt-dlp/yt-dlp/issues/13717)) by [Pawka](https://github.com/Pawka) +- **mir24.tv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/7b4c96e0898db048259ef5fdf12ed14e3605dce3) ([#13651](https://github.com/yt-dlp/yt-dlp/issues/13651)) by [swayll](https://github.com/swayll) +- **mixlr**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/0f33950c778331bf4803c76e8b0ba1862df93431) ([#13561](https://github.com/yt-dlp/yt-dlp/issues/13561)) by [seproDev](https://github.com/seproDev), [ShockedPlot7560](https://github.com/ShockedPlot7560) +- **mlbtv**: [Make formats downloadable with ffmpeg](https://github.com/yt-dlp/yt-dlp/commit/87e3dc8c7f78929d2ef4f4a44e6a567e04cd8226) ([#13761](https://github.com/yt-dlp/yt-dlp/issues/13761)) by [bashonly](https://github.com/bashonly) +- **newspicks**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2aaf1aa71d174700859c9ec1a81109b78e34961c) ([#13612](https://github.com/yt-dlp/yt-dlp/issues/13612)) by [doe1080](https://github.com/doe1080) +- **nhkradiru**: [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/7c49a937887756efcfa162abdcf17e48c244cb0c) ([#12708](https://github.com/yt-dlp/yt-dlp/issues/12708)) by [garret1317](https://github.com/garret1317) +- **noovo**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/d57a0b5aa78d59324b037d37492fe86aa4fbf58a) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080) +- **patreon**: campaign: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d88b304d44c599d81acfa4231502270c8b9fe2f8) ([#13712](https://github.com/yt-dlp/yt-dlp/issues/13712)) by [bashonly](https://github.com/bashonly) +- **playerfm**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1a8474c3ca6dbe51bb153b2b8eef7b9a61fa7dc3) ([#13016](https://github.com/yt-dlp/yt-dlp/issues/13016)) by [R0hanW](https://github.com/R0hanW) +- **rai**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c8329fc572903eeed7edad1642773b2268b71a62) ([#13572](https://github.com/yt-dlp/yt-dlp/issues/13572)) by [moonshinerd](https://github.com/moonshinerd), [seproDev](https://github.com/seproDev) +- **raisudtirol**: [Support alternative domain](https://github.com/yt-dlp/yt-dlp/commit/85c3fa1925a9057ef4ae8af682686d5b3eb8e568) ([#13718](https://github.com/yt-dlp/yt-dlp/issues/13718)) by [barsnick](https://github.com/barsnick) +- **skeb**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/060c6a4501a0b8a92f1b9c12788f556d902c83c6) ([#13593](https://github.com/yt-dlp/yt-dlp/issues/13593)) by [doe1080](https://github.com/doe1080) +- **soundcloud**: [Always extract original format extension](https://github.com/yt-dlp/yt-dlp/commit/c1ac543c8166ff031d62e340b3244ca8556e3fb9) ([#13746](https://github.com/yt-dlp/yt-dlp/issues/13746)) by [bashonly](https://github.com/bashonly) +- **sproutvideo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0b41746964e1d0470ac286ce09408940a3a51147) ([#13610](https://github.com/yt-dlp/yt-dlp/issues/13610)) by [bashonly](https://github.com/bashonly) +- **thehighwire**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3a84be9d1660ef798ea28f929a20391bef6afda4) ([#13505](https://github.com/yt-dlp/yt-dlp/issues/13505)) by [swayll](https://github.com/swayll) +- **twitch**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/422cc8cb2ff2bd3b4c2bc64e23507b7e6f522c35) ([#13618](https://github.com/yt-dlp/yt-dlp/issues/13618)) by [bashonly](https://github.com/bashonly) +- **unitednationswebtv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/630f3389c33f0f7f6ec97e8917d20aeb4e4078da) ([#13538](https://github.com/yt-dlp/yt-dlp/issues/13538)) by [averageFOSSenjoyer](https://github.com/averageFOSSenjoyer) +- **vimeo** + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a5d697f62d8be78ffd472acb2f52c8bc32833003) ([#13692](https://github.com/yt-dlp/yt-dlp/issues/13692)) by [bashonly](https://github.com/bashonly) + - [Handle age-restricted videos](https://github.com/yt-dlp/yt-dlp/commit/a6db1d297ab40cc346de24aacbeab93112b2f4e1) ([#13719](https://github.com/yt-dlp/yt-dlp/issues/13719)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Do not require PO Token for premium accounts](https://github.com/yt-dlp/yt-dlp/commit/5b57b72c1a7c6bd249ffcebdf5630761ec664c10) ([#13640](https://github.com/yt-dlp/yt-dlp/issues/13640)) by [coletdjnz](https://github.com/coletdjnz) + - [Ensure context params are consistent for web clients](https://github.com/yt-dlp/yt-dlp/commit/6e5bee418bc108565108153fd745c8e7a59f16dd) ([#13701](https://github.com/yt-dlp/yt-dlp/issues/13701)) by [coletdjnz](https://github.com/coletdjnz) + - [Extract global nsig helper functions](https://github.com/yt-dlp/yt-dlp/commit/fca94ac5d63ed6578b5cd9c8129d97a8a713c39a) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) + - [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/0e68332bcb9fba87c42805b7a051eeb2bed36206) ([#13659](https://github.com/yt-dlp/yt-dlp/issues/13659)) by [bashonly](https://github.com/bashonly) + - [Log bad playability statuses of player responses](https://github.com/yt-dlp/yt-dlp/commit/aa9f1f4d577e99897ac16cd19d4e217d688ea75d) ([#13647](https://github.com/yt-dlp/yt-dlp/issues/13647)) by [coletdjnz](https://github.com/coletdjnz) + - [Use impersonation for downloading subtitles](https://github.com/yt-dlp/yt-dlp/commit/8820101aa3152e5f4811541c645f8b5de231ba8c) ([#13786](https://github.com/yt-dlp/yt-dlp/issues/13786)) by [bashonly](https://github.com/bashonly) + - tab: [Fix subscriptions feed extraction](https://github.com/yt-dlp/yt-dlp/commit/c23d837b6524d1e7a4595948871ba1708cba4dfa) ([#13665](https://github.com/yt-dlp/yt-dlp/issues/13665)) by [bashonly](https://github.com/bashonly) + +#### Downloader changes +- **hls**: [Do not fall back to ffmpeg when native is required](https://github.com/yt-dlp/yt-dlp/commit/a7113722ec33f30fc898caee9242af2b82188a53) ([#13655](https://github.com/yt-dlp/yt-dlp/issues/13655)) by [bashonly](https://github.com/bashonly) + +#### Networking changes +- **Request Handler** + - requests + - [Refactor default headers](https://github.com/yt-dlp/yt-dlp/commit/a4561c7a66c39d88efe7ae51e7fa1986faf093fb) ([#13785](https://github.com/yt-dlp/yt-dlp/issues/13785)) by [bashonly](https://github.com/bashonly) + - [Work around partial read dropping data](https://github.com/yt-dlp/yt-dlp/commit/c2ff2dbaec7929015373fe002e9bd4849931a4ce) ([#13599](https://github.com/yt-dlp/yt-dlp/issues/13599)) by [Grub4K](https://github.com/Grub4K) (With fixes in [c316416](https://github.com/yt-dlp/yt-dlp/commit/c316416b972d1b05e58fbcc21e80428b900ce102)) + +#### Misc. changes +- **cleanup** + - [Bump ruff to 0.12.x](https://github.com/yt-dlp/yt-dlp/commit/ca5cce5b07d51efe7310b449cdefeca8d873e9df) ([#13596](https://github.com/yt-dlp/yt-dlp/issues/13596)) by [seproDev](https://github.com/seproDev) + - Miscellaneous: [9951fdd](https://github.com/yt-dlp/yt-dlp/commit/9951fdd0d08b655cb1af8cd7f32a3fb7e2b1324e) by [adamralph](https://github.com/adamralph), [bashonly](https://github.com/bashonly), [doe1080](https://github.com/doe1080), [hseg](https://github.com/hseg), [InvalidUsernameException](https://github.com/InvalidUsernameException), [seproDev](https://github.com/seproDev) +- **devscripts**: [Fix filename/directory Bash completions](https://github.com/yt-dlp/yt-dlp/commit/99093e96fd6a26dea9d6e4bd1e4b16283b6ad1ee) ([#13620](https://github.com/yt-dlp/yt-dlp/issues/13620)) by [barsnick](https://github.com/barsnick) +- **test**: download: [Support `playlist_maxcount`](https://github.com/yt-dlp/yt-dlp/commit/fd36b8f31bafbd8096bdb92a446a0c9c6081209c) ([#13433](https://github.com/yt-dlp/yt-dlp/issues/13433)) by [InvalidUsernameException](https://github.com/InvalidUsernameException) + ### 2025.06.30 #### Core changes diff --git a/README.md b/README.md index 26a27ce7e4..1f8c7936e4 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,7 @@ python3 -m pip install -U --pre "yt-dlp[default]" ``` ## DEPENDENCIES -Python versions 3.9+ (CPython) and 3.10+ (PyPy) are supported. Other versions and implementations may or may not work correctly. +Python versions 3.9+ (CPython) and 3.11+ (PyPy) are supported. Other versions and implementations may or may not work correctly. yt-dlp [OPTIONS] [--] URL [URL...] -`Ctrl+F` is your friend :D +Tip: Use `CTRL`+`F` (or `Command`+`F`) to search by keywords @@ -640,9 +640,9 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git --no-part Do not use .part files - write directly into output file --mtime Use the Last-modified header to set the file - modification time (default) + modification time --no-mtime Do not use the Last-modified header to set - the file modification time + the file modification time (default) --write-description Write video description to a .description file --no-write-description Do not write video description (default) --write-info-json Write video metadata to a .info.json file @@ -1903,6 +1903,10 @@ The following extractors use this feature: #### tver * `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated) +#### vimeo +* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `web` client is used by default. The `web` client only works with account cookies or login credentials. The `android` and `ios` clients only work with previously cached OAuth tokens +* `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the web client's API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability + **Note**: These options may be changed/removed in the future without concern for backward compatibility diff --git a/bundle/pyinstaller.py b/bundle/pyinstaller.py index c2f6511210..0597f602d0 100755 --- a/bundle/pyinstaller.py +++ b/bundle/pyinstaller.py @@ -62,16 +62,22 @@ def parse_options(): def exe(onedir): """@returns (name, path)""" + platform_name, machine, extension = { + 'win32': (None, MACHINE, '.exe'), + 'darwin': ('macos', None, None), + }.get(OS_NAME, (OS_NAME, MACHINE, None)) + name = '_'.join(filter(None, ( 'yt-dlp', - {'win32': '', 'darwin': 'macos'}.get(OS_NAME, OS_NAME), - MACHINE, + platform_name, + machine, ))) + return name, ''.join(filter(None, ( 'dist/', onedir and f'{name}/', name, - OS_NAME == 'win32' and '.exe', + extension, ))) diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index d7296bf309..c22ea94bfc 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -262,5 +262,15 @@ { "action": "remove", "when": "500761e41acb96953a5064e951d41d190c287e46" + }, + { + "action": "add", + "when": "f3008bc5f89d2691f2f8dfc51b406ef4e25281c3", + "short": "[priority] **Default behaviour changed from `--mtime` to `--no-mtime`**\nyt-dlp no longer applies the server modified time to downloaded files by default. [Read more](https://github.com/yt-dlp/yt-dlp/issues/12780)" + }, + { + "action": "add", + "when": "959ac99e98c3215437e573c22d64be42d361e863", + "short": "[priority] Security: [[CVE-2025-54072](https://nvd.nist.gov/vuln/detail/CVE-2025-54072)] [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-45hg-7f49-5h56)\n - When `--exec` is used on Windows, the filepath expanded from `{}` (or the default placeholder) is now properly escaped" } ] diff --git a/supportedsites.md b/supportedsites.md index 8e48135d22..3e0bef4bcf 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -133,7 +133,6 @@ The only reliable way to check if a site is supported is to try it. - **BaiduVideo**: 百度视频 - **BanBye** - **BanByeChannel** - - **bandaichannel** - **Bandcamp** - **Bandcamp:album** - **Bandcamp:user** @@ -157,7 +156,6 @@ The only reliable way to check if a site is supported is to try it. - **Beeg** - **BehindKink**: (**Currently broken**) - **Bellator** - - **BellMedia** - **BerufeTV** - **Bet**: (**Currently broken**) - **bfi:player**: (**Currently broken**) @@ -197,6 +195,7 @@ The only reliable way to check if a site is supported is to try it. - **BitChute** - **BitChuteChannel** - **BlackboardCollaborate** + - **BlackboardCollaborateLaunch** - **BleacherReport**: (**Currently broken**) - **BleacherReportCMS**: (**Currently broken**) - **blerp** @@ -225,6 +224,7 @@ The only reliable way to check if a site is supported is to try it. - **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org - **bt:article**: Bergens Tidende Articles - **bt:vestlendingen**: Bergens Tidende - Vestlendingen + - **BTVPlus** - **Bundesliga** - **Bundestag** - **BunnyCdn** @@ -317,7 +317,6 @@ The only reliable way to check if a site is supported is to try it. - **CSpan**: C-SPAN - **CSpanCongress** - **CtsNews**: 華視新聞 - - **CTV** - **CTVNews** - **cu.ntv.co.jp**: 日テレ無料TADA! - **CultureUnplugged** @@ -652,7 +651,6 @@ The only reliable way to check if a site is supported is to try it. - **jiosaavn:​show:playlist** - **jiosaavn:song** - **Joj** - - **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR) - **Jove** - **JStream** - **JTBC**: jtbc.co.kr @@ -723,9 +721,6 @@ The only reliable way to check if a site is supported is to try it. - **life:embed** - **likee** - **likee:user** - - **limelight** - - **limelight:channel** - - **limelight:channel_list** - **LinkedIn**: [*linkedin*](## "netrc machine") - **linkedin:events**: [*linkedin*](## "netrc machine") - **linkedin:learning**: [*linkedin*](## "netrc machine") @@ -807,6 +802,7 @@ The only reliable way to check if a site is supported is to try it. - **minds:channel** - **minds:group** - **Minoto** + - **mir24.tv** - **mirrativ** - **mirrativ:user** - **MirrorCoUK** @@ -817,6 +813,8 @@ The only reliable way to check if a site is supported is to try it. - **mixcloud** - **mixcloud:playlist** - **mixcloud:user** + - **Mixlr** + - **MixlrRecoring** - **MLB** - **MLBArticle** - **MLBTV**: [*mlb*](## "netrc machine") @@ -973,7 +971,6 @@ The only reliable way to check if a site is supported is to try it. - **NoicePodcast** - **NonkTube** - **NoodleMagazine** - - **Noovo** - **NOSNLArticle** - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - **NovaEmbed** @@ -1097,6 +1094,7 @@ The only reliable way to check if a site is supported is to try it. - **Platzi**: [*platzi*](## "netrc machine") - **PlatziCourse**: [*platzi*](## "netrc machine") - **player.sky.it** + - **PlayerFm** - **playeur** - **PlayPlusTV**: [*playplustv*](## "netrc machine") - **PlaySuisse**: [*playsuisse*](## "netrc machine") @@ -1472,11 +1470,12 @@ The only reliable way to check if a site is supported is to try it. - **Tempo** - **TennisTV**: [*tennistv*](## "netrc machine") - **TF1** - - **TFO** + - **TFO**: (**Currently broken**) - **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine") - **theatercomplextown:vod**: [*theatercomplextown*](## "netrc machine") - **TheGuardianPodcast** - **TheGuardianPodcastPlaylist** + - **TheHighWire** - **TheHoleTv** - **TheIntercept** - **ThePlatform** @@ -1544,8 +1543,8 @@ The only reliable way to check if a site is supported is to try it. - **tv2playseries.hu** - **TV4**: tv4.se and tv4play.se - **TV5MONDE** - - **tv5unis** - - **tv5unis:video** + - **tv5unis**: (**Currently broken**) + - **tv5unis:video**: (**Currently broken**) - **tv8.it** - **tv8.it:live**: TV8 Live - **tv8.it:playlist**: TV8 Playlist @@ -1600,6 +1599,7 @@ The only reliable way to check if a site is supported is to try it. - **UlizaPortal**: ulizaportal.jp - **umg:de**: Universal Music Deutschland - **Unistra** + - **UnitedNationsWebTv** - **Unity**: (**Currently broken**) - **uol.com.br** - **uplynk** diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index c9f70431f7..40dd05e136 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1959,6 +1959,37 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ with self.assertWarns(DeprecationWarning): self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {}) + def test_search_nextjs_v13_data(self): + HTML = R''' + + + + + + + + ''' + EXPECTED = { + '18': { + 'foo': 'bar', + }, + '16': { + 'meta': { + 'dateCreated': 1730489700, + 'uuid': '40cac41d-8d29-4ef5-aa11-75047b9f0907', + }, + }, + '19': { + 'duplicated_field_name': {'x': 1}, + }, + '20': { + 'duplicated_field_name': {'y': 2}, + }, + } + self.assertEqual(self.ie._search_nextjs_v13_data(HTML, None), EXPECTED) + self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), {}) + self.assertEqual(self.ie._search_nextjs_v13_data(None, None, fatal=False), {}) + def test_search_nuxt_json(self): HTML_TMPL = '' VALID_DATA = ''' diff --git a/test/test_compat.py b/test/test_compat.py index b1cc2a8187..3aa9c0c518 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -21,9 +21,6 @@ class TestCompat(unittest.TestCase): with self.assertWarns(DeprecationWarning): _ = compat.compat_basestring - with self.assertWarns(DeprecationWarning): - _ = compat.WINDOWS_VT_MODE - self.assertEqual(urllib.request.getproxies, getproxies) with self.assertWarns(DeprecationWarning): diff --git a/test/test_download.py b/test/test_download.py index c7842735c2..1714cb52ec 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -66,10 +66,6 @@ tests_counter = collections.defaultdict(collections.Counter) @is_download_test class TestDownload(unittest.TestCase): - # Parallel testing in nosetests. See - # http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html - _multiprocess_shared_ = True - maxDiff = None COMPLETED_TESTS = {} diff --git a/test/test_utils.py b/test/test_utils.py index aedb565ec1..44747efda6 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1373,6 +1373,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('ep1x2'), {}) self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('1920w', lenient=True), {'width': 1920}) def test_parse_bitrate(self): self.assertEqual(parse_bitrate(None), None) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index abf6507b30..37d6eae4c5 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -36,6 +36,7 @@ from .extractor.openload import PhantomJSwrapper from .globals import ( IN_CLI, LAZY_EXTRACTORS, + WINDOWS_VT_MODE, plugin_ies, plugin_ies_overrides, plugin_pps, @@ -52,7 +53,7 @@ from .networking.exceptions import ( SSLError, network_exceptions, ) -from .networking.impersonate import ImpersonateRequestHandler +from .networking.impersonate import ImpersonateRequestHandler, ImpersonateTarget from .plugins import directories as plugin_directories, load_all_plugins from .postprocessor import ( EmbedThumbnailPP, @@ -529,6 +530,7 @@ class YoutubeDL: discontinuities such as ad breaks (default: False) extractor_args: A dictionary of arguments to be passed to the extractors. See "EXTRACTOR ARGUMENTS" for details. + Argument values must always be a list of string(s). E.g. {'youtube': {'skip': ['dash', 'hls']}} mark_watched: Mark videos watched (even with --simulate). Only for YouTube @@ -2195,7 +2197,7 @@ class YoutubeDL: return op(actual_value, comparison_value) return _filter - def _check_formats(self, formats): + def _check_formats(self, formats, warning=True): for f in formats: working = f.get('__working') if working is not None: @@ -2208,6 +2210,9 @@ class YoutubeDL: continue temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None) temp_file.close() + # If FragmentFD fails when testing a fragment, it will wrongly set a non-zero return code. + # Save the actual return code for later. See https://github.com/yt-dlp/yt-dlp/issues/13750 + original_retcode = self._download_retcode try: success, _ = self.dl(temp_file.name, f, test=True) except (DownloadError, OSError, ValueError, *network_exceptions): @@ -2218,12 +2223,18 @@ class YoutubeDL: os.remove(temp_file.name) except OSError: self.report_warning(f'Unable to delete temporary file "{temp_file.name}"') + # Restore the actual return code + self._download_retcode = original_retcode f['__working'] = success if success: f.pop('__needs_testing', None) yield f else: - self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id'])) + msg = f'Unable to download format {f["format_id"]}. Skipping...' + if warning: + self.report_warning(msg) + else: + self.to_screen(f'[info] {msg}') def _select_formats(self, formats, selector): return list(selector({ @@ -2949,7 +2960,7 @@ class YoutubeDL: ) if self.params.get('check_formats') is True: - formats = LazyList(self._check_formats(formats[::-1]), reverse=True) + formats = LazyList(self._check_formats(formats[::-1], warning=False), reverse=True) if not formats or formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them @@ -3222,6 +3233,7 @@ class YoutubeDL: } else: params = self.params + fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params) if not test: for ph in self._progress_hooks: @@ -3697,6 +3709,8 @@ class YoutubeDL: return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)} elif isinstance(obj, (list, tuple, set, LazyList)): return list(map(filter_fn, obj)) + elif isinstance(obj, ImpersonateTarget): + return str(obj) elif obj is None or isinstance(obj, (str, int, float, bool)): return obj elif callable(obj): @@ -4029,8 +4043,7 @@ class YoutubeDL: if os.environ.get('TERM', '').lower() == 'dumb': additional_info.append('dumb') if not supports_terminal_sequences(stream): - from .utils import WINDOWS_VT_MODE # Must be imported locally - additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI') + additional_info.append('No VT' if WINDOWS_VT_MODE.value is False else 'No ANSI') if additional_info: ret = f'{ret} ({",".join(additional_info)})' return ret @@ -4176,6 +4189,31 @@ class YoutubeDL: for rh in self._request_director.handlers.values() if isinstance(rh, ImpersonateRequestHandler)) + def _parse_impersonate_targets(self, impersonate): + if impersonate in (True, ''): + impersonate = ImpersonateTarget() + + requested_targets = [ + t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t) + for t in variadic(impersonate) + ] if impersonate else [] + + available_target = next(filter(self._impersonate_target_available, requested_targets), None) + + return available_target, requested_targets + + @staticmethod + def _unavailable_targets_message(requested_targets, note=None, is_error=False): + note = note or 'The extractor specified to use impersonation for this download' + specific_targets = ', '.join(filter(None, map(str, requested_targets))) + message = ( + 'no impersonate target is available' if not specific_targets + else f'none of these impersonate targets are available: {specific_targets}') + return ( + f'{note}, but {message}. {"See" if is_error else "If you encounter errors, then see"}' + f' https://github.com/yt-dlp/yt-dlp#impersonation ' + f'for information on installing the required dependencies') + def urlopen(self, req): """ Start an HTTP download """ if isinstance(req, str): diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index dae2c14592..2f3e35d4a8 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -37,7 +37,7 @@ from ..dependencies import websockets as compat_websockets # noqa: F401 from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 from ..networking.exceptions import HTTPError as compat_HTTPError -passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) +passthrough_module(__name__, '...utils', ('windows_enable_vt_mode',)) # compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 5675445ace..459a4b7de0 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -1335,7 +1335,7 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): if len(cookie_list) != self._ENTRY_LEN: raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}') cookie = self._CookieFileEntry(*cookie_list) - if cookie.expires_at and not cookie.expires_at.isdigit(): + if cookie.expires_at and not re.fullmatch(r'[0-9]+(?:\.[0-9]+)?', cookie.expires_at): raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}') return line diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index 8f575ece4c..4ad0b6a0f6 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -105,7 +105,7 @@ def _get_suitable_downloader(info_dict, protocol, params, default): if external_downloader is None: if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params): return FFmpegFD - elif external_downloader.lower() != 'native': + elif external_downloader.lower() != 'native' and info_dict.get('impersonate') is None: ed = get_external_downloader(external_downloader) if ed.can_download(info_dict, external_downloader): return ed diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index bb9303f8a1..7bc70a51a2 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -495,3 +495,14 @@ class FileDownloader: exe = os.path.basename(args[0]) self.write_debug(f'{exe} command line: {shell_quote(args)}') + + def _get_impersonate_target(self, info_dict): + impersonate = info_dict.get('impersonate') + if impersonate is None: + return None + available_target, requested_targets = self.ydl._parse_impersonate_targets(impersonate) + if available_target: + return available_target + elif requested_targets: + self.report_warning(self.ydl._unavailable_targets_message(requested_targets)) + return None diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index ee73ac043e..65ed83991b 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -572,7 +572,21 @@ class FFmpegFD(ExternalFD): if end_time: args += ['-t', str(end_time - start_time)] - args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']] + url = fmt['url'] + if self.params.get('enable_file_urls') and url.startswith('file:'): + # The default protocol_whitelist is 'file,crypto,data' when reading local m3u8 URLs, + # so only local segments can be read unless we also include 'http,https,tcp,tls' + args += ['-protocol_whitelist', 'file,crypto,data,http,https,tcp,tls'] + # ffmpeg incorrectly handles 'file:' URLs by only removing the + # 'file:' prefix and treating the rest as if it's a normal filepath. + # FFmpegPostProcessor also depends on this behavior, so we need to fixup the URLs: + # - On Windows/Cygwin, replace 'file:///' and 'file://localhost/' with 'file:' + # - On *nix, replace 'file://localhost/' with 'file:/' + # Ref: https://github.com/yt-dlp/yt-dlp/issues/13781 + # https://trac.ffmpeg.org/ticket/2702 + url = re.sub(r'^file://(?:localhost)?/', 'file:' if os.name == 'nt' else 'file:/', url) + + args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', url] if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): args += ['-c', 'copy'] diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 2256305785..58cfbbf163 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -205,7 +205,7 @@ class HlsFD(FragmentFD): line = line.strip() if line: if not line.startswith('#'): - if format_index and discontinuity_count != format_index: + if format_index is not None and discontinuity_count != format_index: continue if ad_frag_next: continue @@ -231,7 +231,7 @@ class HlsFD(FragmentFD): byte_range = {} elif line.startswith('#EXT-X-MAP'): - if format_index and discontinuity_count != format_index: + if format_index is not None and discontinuity_count != format_index: continue if frag_index > 0: self.report_error( diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 90bfcaf552..c388deb7ea 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -27,6 +27,10 @@ class HttpFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] request_data = info_dict.get('request_data', None) + request_extensions = {} + impersonate_target = self._get_impersonate_target(info_dict) + if impersonate_target is not None: + request_extensions['impersonate'] = impersonate_target class DownloadContext(dict): __getattr__ = dict.get @@ -109,7 +113,7 @@ class HttpFD(FileDownloader): if try_call(lambda: range_end >= ctx.content_len): range_end = ctx.content_len - 1 - request = Request(url, request_data, headers) + request = Request(url, request_data, headers, extensions=request_extensions) has_range = range_start is not None if has_range: request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}' diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 84da570b0a..3eea0cdf6b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -201,7 +201,6 @@ from .banbye import ( BanByeChannelIE, BanByeIE, ) -from .bandaichannel import BandaiChannelIE from .bandcamp import ( BandcampAlbumIE, BandcampIE, @@ -229,7 +228,6 @@ from .beatbump import ( from .beatport import BeatportIE from .beeg import BeegIE from .behindkink import BehindKinkIE -from .bellmedia import BellMediaIE from .berufetv import BerufeTVIE from .bet import BetIE from .bfi import BFIPlayerIE @@ -275,7 +273,10 @@ from .bitchute import ( BitChuteChannelIE, BitChuteIE, ) -from .blackboardcollaborate import BlackboardCollaborateIE +from .blackboardcollaborate import ( + BlackboardCollaborateIE, + BlackboardCollaborateLaunchIE, +) from .bleacherreport import ( BleacherReportCMSIE, BleacherReportIE, @@ -309,6 +310,7 @@ from .brilliantpala import ( BrilliantpalaClassesIE, BrilliantpalaElearnIE, ) +from .btvplus import BTVPlusIE from .bundesliga import BundesligaIE from .bundestag import BundestagIE from .bunnycdn import BunnyCdnIE @@ -446,7 +448,6 @@ from .cspan import ( CSpanIE, ) from .ctsnews import CtsNewsIE -from .ctv import CTVIE from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .curiositystream import ( @@ -570,10 +571,6 @@ from .dw import ( DWIE, DWArticleIE, ) -from .eagleplatform import ( - ClipYouEmbedIE, - EaglePlatformIE, -) from .ebaumsworld import EbaumsWorldIE from .ebay import EbayIE from .egghead import ( @@ -639,6 +636,7 @@ from .fancode import ( FancodeVodIE, ) from .fathom import FathomIE +from .faulio import FaulioLiveIE from .faz import FazIE from .fc2 import ( FC2IE, @@ -928,7 +926,6 @@ from .jiosaavn import ( JioSaavnSongIE, ) from .joj import JojIE -from .joqrag import JoqrAgIE from .jove import JoveIE from .jstream import JStreamIE from .jtbc import ( @@ -1031,11 +1028,6 @@ from .likee import ( LikeeIE, LikeeUserIE, ) -from .limelight import ( - LimelightChannelIE, - LimelightChannelListIE, - LimelightMediaIE, -) from .linkedin import ( LinkedInEventsIE, LinkedInIE, @@ -1168,6 +1160,10 @@ from .mixcloud import ( MixcloudPlaylistIE, MixcloudUserIE, ) +from .mixlr import ( + MixlrIE, + MixlrRecoringIE, +) from .mlb import ( MLBIE, MLBTVIE, @@ -1378,7 +1374,6 @@ from .nobelprize import NobelPrizeIE from .noice import NoicePodcastIE from .nonktube import NonkTubeIE from .noodlemagazine import NoodleMagazineIE -from .noovo import NoovoIE from .nosnl import NOSNLArticleIE from .nova import ( NovaEmbedIE, @@ -1559,6 +1554,7 @@ from .platzi import ( PlatziCourseIE, PlatziIE, ) +from .playerfm import PlayerFmIE from .playplustv import PlayPlusTVIE from .playsuisse import PlaySuisseIE from .playtvak import PlaytvakIE @@ -1569,6 +1565,7 @@ from .pluralsight import ( ) from .plutotv import PlutoTVIE from .plvideo import PlVideoIE +from .plyr import PlyrEmbedIE from .podbayfm import ( PodbayFMChannelIE, PodbayFMIE, @@ -1784,6 +1781,7 @@ from .rtve import ( RTVEALaCartaIE, RTVEAudioIE, RTVELiveIE, + RTVEProgramIE, RTVETelevisionIE, ) from .rtvs import RTVSIE @@ -2097,6 +2095,7 @@ from .theguardian import ( TheGuardianPodcastIE, TheGuardianPodcastPlaylistIE, ) +from .thehighwire import TheHighWireIE from .theholetv import TheHoleTvIE from .theintercept import TheInterceptIE from .theplatform import ( @@ -2166,7 +2165,6 @@ from .trtworld import TrtWorldIE from .trueid import TrueIDIE from .trunews import TruNewsIE from .truth import TruthIE -from .trutv import TruTVIE from .tube8 import Tube8IE from .tubetugraz import ( TubeTuGrazIE, @@ -2237,6 +2235,7 @@ from .tvplay import ( from .tvplayer import TVPlayerIE from .tvw import ( TvwIE, + TvwNewsIE, TvwTvChannelsIE, ) from .tweakers import TweakersIE @@ -2285,6 +2284,7 @@ from .uliza import ( ) from .umg import UMGDeIE from .unistra import UnistraIE +from .unitednations import UnitedNationsWebTvIE from .unity import UnityIE from .unsupported import ( KnownDRMIE, diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 8c2d9d9340..eb45734ec0 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -48,7 +48,6 @@ MSO_INFO = { 'username_field': 'user', 'password_field': 'passwd', 'login_hostname': 'login.xfinity.com', - 'needs_newer_ua': True, }, 'TWC': { 'name': 'Time Warner Cable | Spectrum', @@ -1379,11 +1378,8 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en @staticmethod def _get_mso_headers(mso_info): - # yt-dlp's default user-agent is usually too old for some MSO's like Comcast_SSO - # See: https://github.com/yt-dlp/yt-dlp/issues/10848 - return { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0', - } if mso_info.get('needs_newer_ua') else {} + # Not needed currently + return {} @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): diff --git a/yt_dlp/extractor/adobetv.py b/yt_dlp/extractor/adobetv.py index 4608e5c13d..997e1b92cb 100644 --- a/yt_dlp/extractor/adobetv.py +++ b/yt_dlp/extractor/adobetv.py @@ -84,9 +84,10 @@ class AdobeTVBaseIE(InfoExtractor): class AdobeTVEmbedIE(AdobeTVBaseIE): + _WORKING = False IE_NAME = 'adobetv:embed' _VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'https://tv.adobe.com/embed/22/4153', 'md5': 'c8c0461bf04d54574fc2b4d07ac6783a', 'info_dict': { @@ -94,12 +95,12 @@ class AdobeTVEmbedIE(AdobeTVBaseIE): 'ext': 'flv', 'title': 'Creating Graphics Optimized for BlackBerry', 'description': 'md5:eac6e8dced38bdaae51cd94447927459', - 'thumbnail': r're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.+\.jpg', 'upload_date': '20091109', 'duration': 377, 'view_count': int, }, - } + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -110,10 +111,10 @@ class AdobeTVEmbedIE(AdobeTVBaseIE): class AdobeTVIE(AdobeTVBaseIE): + _WORKING = False IE_NAME = 'adobetv' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?watch/(?P[^/]+)/(?P[^/]+)' - - _TEST = { + _TESTS = [{ 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/', 'md5': '9bc5727bcdd55251f35ad311ca74fa1e', 'info_dict': { @@ -121,12 +122,12 @@ class AdobeTVIE(AdobeTVBaseIE): 'ext': 'mp4', 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop', 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311', - 'thumbnail': r're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.+\.jpg', 'upload_date': '20110914', 'duration': 60, 'view_count': int, }, - } + }] def _real_extract(self, url): language, show_urlname, urlname = self._match_valid_url(url).groups() @@ -159,10 +160,10 @@ class AdobeTVPlaylistBaseIE(AdobeTVBaseIE): class AdobeTVShowIE(AdobeTVPlaylistBaseIE): + _WORKING = False IE_NAME = 'adobetv:show' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?show/(?P[^/]+)' - - _TEST = { + _TESTS = [{ 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost', 'info_dict': { 'id': '36', @@ -170,7 +171,7 @@ class AdobeTVShowIE(AdobeTVPlaylistBaseIE): 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27', }, 'playlist_mincount': 136, - } + }] _RESOURCE = 'episode' _process_data = AdobeTVBaseIE._parse_video_data @@ -195,16 +196,16 @@ class AdobeTVShowIE(AdobeTVPlaylistBaseIE): class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): + _WORKING = False IE_NAME = 'adobetv:channel' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?channel/(?P[^/]+)(?:/(?P[^/]+))?' - - _TEST = { + _TESTS = [{ 'url': 'http://tv.adobe.com/channel/development', 'info_dict': { 'id': 'development', }, 'playlist_mincount': 96, - } + }] _RESOURCE = 'show' def _process_data(self, show_data): @@ -231,8 +232,7 @@ class AdobeTVVideoIE(AdobeTVBaseIE): IE_NAME = 'adobetv:video' _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P\d+)' _EMBED_REGEX = [r']+src=[\'"](?P(?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]'] - - _TEST = { + _TESTS = [{ # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners 'url': 'https://video.tv.adobe.com/v/2456/', 'md5': '43662b577c018ad707a63766462b1e87', @@ -242,8 +242,20 @@ class AdobeTVVideoIE(AdobeTVBaseIE): 'title': 'New experience with Acrobat DC', 'description': 'New experience with Acrobat DC', 'duration': 248.667, + 'thumbnail': r're:https?://images-tv\.adobe\.com/.+\.jpg', + }, + }] + _WEBPAGE_TESTS = [{ + # FIXME: Invalid extension + 'url': 'https://www.adobe.com/learn/acrobat/web/customize-toolbar', + 'info_dict': { + 'id': '3463980', + 'ext': 'm3u8', + 'title': 'Adobe Acrobat: How to Customize the Toolbar for Faster PDF Editing', + 'description': 'md5:94368ab95ae24f9c1bee0cb346e03dc3', + 'duration': 97.557, }, - } + }] def _real_extract(self, url): video_id = self._match_id(url) diff --git a/yt_dlp/extractor/aenetworks.py b/yt_dlp/extractor/aenetworks.py index e5c922b41f..a4a5f409ec 100644 --- a/yt_dlp/extractor/aenetworks.py +++ b/yt_dlp/extractor/aenetworks.py @@ -111,11 +111,9 @@ class AENetworksIE(AENetworksBaseIE): IE_NAME = 'aenetworks' IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault' _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P - shows/[^/]+/season-\d+/episode-\d+| - (?: - (?:movie|special)s/[^/]+| - (?:shows/[^/]+/)?videos - )/[^/?#&]+ + shows/[^/?#]+/season-\d+/episode-\d+| + (?Pmovie|special)s/[^/?#]+(?P/[^/?#]+)?| + (?:shows/[^/?#]+/)?videos/[^/?#]+ )''' _TESTS = [{ 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', @@ -128,7 +126,7 @@ class AENetworksIE(AENetworksBaseIE): 'upload_date': '20120529', 'uploader': 'AENE-NEW', 'duration': 2592.0, - 'thumbnail': r're:^https?://.*\.jpe?g$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'chapters': 'count:5', 'tags': 'count:14', 'categories': ['Mountain Men'], @@ -139,10 +137,7 @@ class AENetworksIE(AENetworksBaseIE): 'series': 'Mountain Men', 'age_limit': 0, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'params': {'skip_download': 'm3u8'}, 'add_ie': ['ThePlatform'], 'skip': 'Geo-restricted - This content is not available in your location.', }, { @@ -156,7 +151,7 @@ class AENetworksIE(AENetworksBaseIE): 'upload_date': '20160112', 'uploader': 'AENE-NEW', 'duration': 1277.695, - 'thumbnail': r're:^https?://.*\.jpe?g$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'chapters': 'count:4', 'tags': 'count:23', 'episode': 'Inlawful Entry', @@ -166,10 +161,53 @@ class AENetworksIE(AENetworksBaseIE): 'series': 'Duck Dynasty', 'age_limit': 0, }, - 'params': { - # m3u8 download - 'skip_download': True, + 'params': {'skip_download': 'm3u8'}, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'https://play.mylifetime.com/movies/v-c-andrews-web-of-dreams', + 'info_dict': { + 'id': '1590627395981', + 'ext': 'mp4', + 'title': 'VC Andrews\' Web of Dreams', + 'description': 'md5:2a8ba13ae64271c79eb65c0577d312ce', + 'uploader': 'AENE-NEW', + 'age_limit': 14, + 'duration': 5253.665, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'chapters': 'count:8', + 'tags': ['lifetime', 'mylifetime', 'lifetime channel', "VC Andrews' Web of Dreams"], + 'series': '', + 'season': 'Season 0', + 'season_number': 0, + 'episode': 'VC Andrews\' Web of Dreams', + 'episode_number': 0, + 'timestamp': 1566489703.0, + 'upload_date': '20190822', + }, + 'params': {'skip_download': 'm3u8'}, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story', + 'info_dict': { + 'id': '1488235587551', + 'ext': 'mp4', + 'title': 'Hunting JonBenet\'s Killer: The Untold Story', + 'description': 'md5:209869425ee392d74fe29201821e48b4', + 'uploader': 'AENE-NEW', + 'age_limit': 14, + 'duration': 5003.903, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'chapters': 'count:10', + 'tags': 'count:11', + 'series': '', + 'season': 'Season 0', + 'season_number': 0, + 'episode': 'Hunting JonBenet\'s Killer: The Untold Story', + 'episode_number': 0, + 'timestamp': 1554987697.0, + 'upload_date': '20190411', }, + 'params': {'skip_download': 'm3u8'}, 'add_ie': ['ThePlatform'], }, { 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', @@ -198,7 +236,9 @@ class AENetworksIE(AENetworksBaseIE): }] def _real_extract(self, url): - domain, canonical = self._match_valid_url(url).groups() + domain, canonical, url_type, extra = self._match_valid_url(url).group('domain', 'id', 'type', 'extra') + if url_type in ('movie', 'special') and not extra: + canonical += f'/full-{url_type}' return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url) diff --git a/yt_dlp/extractor/apa.py b/yt_dlp/extractor/apa.py index fed597042a..db82b56187 100644 --- a/yt_dlp/extractor/apa.py +++ b/yt_dlp/extractor/apa.py @@ -11,12 +11,11 @@ class APAIE(InfoExtractor): _EMBED_REGEX = [r']+\bsrc=(["\'])(?P(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1'] _TESTS = [{ 'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029', - 'md5': '2b12292faeb0a7d930c778c7a5b4759b', 'info_dict': { 'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029', 'ext': 'mp4', 'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029', - 'thumbnail': r're:^https?://.*\.jpg$', + 'thumbnail': r're:https?://kf-vn\.sf\.apa\.at/vn/.+\.jpg', }, }, { 'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78', @@ -28,6 +27,15 @@ class APAIE(InfoExtractor): 'url': 'http://uvp-kleinezeitung.sf.apa.at/embed/f1c44979-dba2-4ebf-b021-e4cf2cac3c81', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + 'url': 'https://www.vol.at/blue-man-group/5593454', + 'info_dict': { + 'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029', + 'ext': 'mp4', + 'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029', + 'thumbnail': r're:https?://kf-vn\.sf\.apa\.at/vn/.+\.jpg', + }, + }] def _real_extract(self, url): mobj = self._match_valid_url(url) diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index 2849d9fd5b..1864ddbfd9 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -16,6 +16,7 @@ from ..utils import ( dict_get, extract_attributes, get_element_by_id, + get_element_text_and_html_by_tag, int_or_none, join_nonempty, js_to_json, @@ -32,7 +33,6 @@ from ..utils import ( unified_timestamp, url_or_none, urlhandle_detect_ext, - variadic, ) @@ -72,6 +72,7 @@ class ArchiveOrgIE(InfoExtractor): 'display_id': 'Cops-v2.mp4', 'thumbnail': r're:https://archive\.org/download/.*\.jpg', 'duration': 1091.96, + 'track': 'Cops-v2', }, }, { 'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect', @@ -86,6 +87,7 @@ class ArchiveOrgIE(InfoExtractor): 'thumbnail': r're:https://archive\.org/download/.*\.jpg', 'duration': 59.77, 'display_id': 'Commercial-JFK1960ElectionAdCampaignJingle.mpg', + 'track': 'Commercial-JFK1960ElectionAdCampaignJingle', }, }, { 'url': 'https://archive.org/details/Election_Ads/Commercial-Nixon1960ElectionAdToughonDefense.mpg', @@ -102,6 +104,7 @@ class ArchiveOrgIE(InfoExtractor): 'duration': 59.51, 'license': 'http://creativecommons.org/licenses/publicdomain/', 'thumbnail': r're:https://archive\.org/download/.*\.jpg', + 'track': 'Commercial-Nixon1960ElectionAdToughonDefense', }, }, { 'url': 'https://archive.org/details/gd1977-05-08.shure57.stevenson.29303.flac16', @@ -182,6 +185,7 @@ class ArchiveOrgIE(InfoExtractor): 'duration': 130.46, 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_01_000117.jpg', 'display_id': 'irelandthemakingofarepublicreel1_01.mov', + 'track': 'irelandthemakingofarepublicreel1 01', }, }, { 'md5': '67335ee3b23a0da930841981c1e79b02', @@ -192,6 +196,7 @@ class ArchiveOrgIE(InfoExtractor): 'title': 'irelandthemakingofarepublicreel1_02.mov', 'display_id': 'irelandthemakingofarepublicreel1_02.mov', 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_02_001374.jpg', + 'track': 'irelandthemakingofarepublicreel1 02', }, }, { 'md5': 'e470e86787893603f4a341a16c281eb5', @@ -202,6 +207,7 @@ class ArchiveOrgIE(InfoExtractor): 'title': 'irelandthemakingofarepublicreel2.mov', 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg', 'display_id': 'irelandthemakingofarepublicreel2.mov', + 'track': 'irelandthemakingofarepublicreel2', }, }, ], @@ -225,19 +231,29 @@ class ArchiveOrgIE(InfoExtractor): 'release_date': '19950402', 'timestamp': 1084927901, }, + }, { + # metadata['metadata']['description'] is a list of strings instead of str + 'url': 'https://archive.org/details/pra-KZ1908.02', + 'info_dict': { + 'id': 'pra-KZ1908.02', + 'ext': 'mp3', + 'display_id': 'KZ1908.02_01.wav', + 'title': 'Crips and Bloods speak about gang life', + 'description': 'md5:2b56b35ff021311e3554b47a285e70b3', + 'uploader': 'jake@archive.org', + 'duration': 1733.74, + 'track': 'KZ1908.02 01', + 'track_number': 1, + 'timestamp': 1336026026, + 'upload_date': '20120503', + 'release_year': 1992, + }, }] @staticmethod def _playlist_data(webpage): - element = re.findall(r'''(?xs) - - ''', webpage)[0] - - return json.loads(extract_attributes(element)['value']) + element = get_element_text_and_html_by_tag('play-av', webpage)[1] + return json.loads(extract_attributes(element)['playlist']) def _real_extract(self, url): video_id = urllib.parse.unquote_plus(self._match_id(url)) @@ -274,34 +290,40 @@ class ArchiveOrgIE(InfoExtractor): m = metadata['metadata'] identifier = m['identifier'] - info = { + info = traverse_obj(m, { + 'title': ('title', {str}), + 'description': ('description', ({str}, (..., all, {' '.join})), {clean_html}, filter, any), + 'uploader': (('uploader', 'adder'), {str}, any), + 'creators': ('creator', (None, ...), {str}, filter, all, filter), + 'license': ('licenseurl', {url_or_none}), + 'release_date': ('date', {unified_strdate}), + 'timestamp': (('publicdate', 'addeddate'), {unified_timestamp}, any), + 'location': ('venue', {str}), + 'release_year': ('year', {int_or_none}), + }) + info.update({ 'id': identifier, - 'title': m['title'], - 'description': clean_html(m.get('description')), - 'uploader': dict_get(m, ['uploader', 'adder']), - 'creators': traverse_obj(m, ('creator', {variadic}, {lambda x: x[0] and list(x)})), - 'license': m.get('licenseurl'), - 'release_date': unified_strdate(m.get('date')), - 'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])), 'webpage_url': f'https://archive.org/details/{identifier}', - 'location': m.get('venue'), - 'release_year': int_or_none(m.get('year'))} + }) for f in metadata['files']: if f['name'] in entries: entries[f['name']] = merge_dicts(entries[f['name']], { 'id': identifier + '/' + f['name'], - 'title': f.get('title') or f['name'], - 'display_id': f['name'], - 'description': clean_html(f.get('description')), - 'creators': traverse_obj(f, ('creator', {variadic}, {lambda x: x[0] and list(x)})), - 'duration': parse_duration(f.get('length')), - 'track_number': int_or_none(f.get('track')), - 'album': f.get('album'), - 'discnumber': int_or_none(f.get('disc')), - 'release_year': int_or_none(f.get('year'))}) + **traverse_obj(f, { + 'title': (('title', 'name'), {str}, any), + 'display_id': ('name', {str}), + 'description': ('description', ({str}, (..., all, {' '.join})), {clean_html}, filter, any), + 'creators': ('creator', (None, ...), {str}, filter, all, filter), + 'duration': ('length', {parse_duration}), + 'track_number': ('track', {int_or_none}), + 'album': ('album', {str}), + 'discnumber': ('disc', {int_or_none}), + 'release_year': ('year', {int_or_none}), + }), + }) entry = entries[f['name']] - elif traverse_obj(f, 'original', expected_type=str) in entries: + elif traverse_obj(f, ('original', {str})) in entries: entry = entries[f['original']] else: continue diff --git a/yt_dlp/extractor/arcpublishing.py b/yt_dlp/extractor/arcpublishing.py index 8da9bc4ccb..903f9c739a 100644 --- a/yt_dlp/extractor/arcpublishing.py +++ b/yt_dlp/extractor/arcpublishing.py @@ -62,6 +62,20 @@ class ArcPublishingIE(InfoExtractor): 'url': 'arcpublishing:tronc:460f2931-8130-4719-8ea1-ffcb2d7cb685', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + 'url': 'https://www.uppermichiganssource.com/2025/07/18/scattered-showers-storms-bring-heavy-rain-potential/', + 'info_dict': { + 'id': '508116f7-e999-48db-b7c2-60a04842679b', + 'ext': 'mp4', + 'title': 'Scattered showers & storms bring heavy rain potential', + 'description': 'Scattered showers & storms bring heavy rain potential', + 'duration': 2016, + 'thumbnail': r're:https?://.+\.jpg', + 'timestamp': 1752881287, + 'upload_date': '20250718', + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], + }] _POWA_DEFAULTS = [ (['cmg', 'prisa'], '%s-config-prod.api.cdn.arcpublishing.com/video'), ([ diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 142d4b066b..3f17da463d 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -51,8 +51,8 @@ class ArteTVIE(ArteTVBaseIE): 'id': '109067-000-A', 'ext': 'mp4', 'description': 'md5:d2ca367b8ecee028dddaa8bd1aebc739', + 'thumbnail': r're:https?://api-cdn\.arte\.tv/img/v2/image/.+', 'timestamp': 1713927600, - 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/3rR6PLzfbigSkkeHtkCZNF/940x530', 'duration': 7599, 'title': 'La loi de Téhéran', 'upload_date': '20240424', @@ -62,6 +62,7 @@ class ArteTVIE(ArteTVBaseIE): 'fr-forced': 'mincount:1', }, }, + 'skip': 'Invalid URL', }, { 'note': 'age-restricted', 'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/', @@ -69,9 +70,9 @@ class ArteTVIE(ArteTVBaseIE): 'id': '006785-000-A', 'description': 'md5:c2f94fdfefc8a280e4dab68ab96ab0ba', 'title': 'The Element of Crime', + 'thumbnail': r're:https?://api-cdn\.arte\.tv/img/v2/image/.+', 'timestamp': 1696111200, 'duration': 5849, - 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530', 'upload_date': '20230930', 'ext': 'mp4', }, @@ -252,6 +253,30 @@ class ArteTVEmbedIE(InfoExtractor): 'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + # FIXME: Embed detection + 'url': 'https://timesofmalta.com/article/watch-sunken-warships-north-sea-arte.1108358', + 'info_dict': { + 'id': '110288-000-A', + 'ext': 'mp4', + 'title': 'Danger on the Seabed', + 'alt_title': 'Sunken Warships in the North Sea', + 'description': 'md5:a2c84cbad37d280bddb6484087120add', + 'duration': 3148, + 'thumbnail': r're:https?://api-cdn\.arte\.tv/img/v2/image/.+', + 'timestamp': 1741686820, + 'upload_date': '20250311', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + # FIXME: Embed detection + 'url': 'https://www.eurockeennes.fr/en-live/', + 'info_dict': { + 'id': 'en-live', + 'title': 'Les Eurocks en live | Les Eurockéennes de Belfort – 3-4-5-6 juillet 2025 sur la Presqu'Île du Malsaucy', + }, + 'playlist_count': 4, + }] def _real_extract(self, url): qs = parse_qs(url) @@ -304,9 +329,9 @@ class ArteTVCategoryIE(ArteTVBaseIE): 'info_dict': { 'id': 'politics-and-society', 'title': 'Politics and society', - 'description': 'Investigative documentary series, geopolitical analysis, and international commentary', + 'description': 'Watch documentaries and reportage about politics, society and current affairs.', }, - 'playlist_mincount': 13, + 'playlist_mincount': 3, }] @classmethod diff --git a/yt_dlp/extractor/bandaichannel.py b/yt_dlp/extractor/bandaichannel.py deleted file mode 100644 index d7fcf44bd9..0000000000 --- a/yt_dlp/extractor/bandaichannel.py +++ /dev/null @@ -1,33 +0,0 @@ -from .brightcove import BrightcoveNewBaseIE -from ..utils import extract_attributes - - -class BandaiChannelIE(BrightcoveNewBaseIE): - IE_NAME = 'bandaichannel' - _VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P\d+/\d+)' - _TESTS = [{ - 'url': 'https://www.b-ch.com/titles/514/001', - 'md5': 'a0f2d787baa5729bed71108257f613a4', - 'info_dict': { - 'id': '6128044564001', - 'ext': 'mp4', - 'title': 'メタルファイターMIKU 第1話', - 'timestamp': 1580354056, - 'uploader_id': '5797077852001', - 'upload_date': '20200130', - 'duration': 1387.733, - }, - 'params': { - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - attrs = extract_attributes(self._search_regex( - r'(]+\bid="bcplayer"[^>]*>)', webpage, 'player')) - bc = self._download_json( - 'https://pbifcd.b-ch.com/v1/playbackinfo/ST/70/' + attrs['data-info'], - video_id, headers={'X-API-KEY': attrs['data-auth'].strip()})['bc'] - return self._parse_brightcove_metadata(bc, bc['id']) diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index 939c2800e6..0a8f88fa8c 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( KNOWN_EXTENSIONS, ExtractorError, + clean_html, extract_attributes, float_or_none, int_or_none, @@ -19,7 +20,7 @@ from ..utils import ( url_or_none, urljoin, ) -from ..utils.traversal import find_element, traverse_obj +from ..utils.traversal import find_element, find_elements, traverse_obj class BandcampIE(InfoExtractor): @@ -35,14 +36,12 @@ class BandcampIE(InfoExtractor): 'duration': 9.8485, 'uploader': 'youtube-dl "\'/\\ä↭', 'upload_date': '20121129', + 'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg', 'timestamp': 1354224127, 'track': 'youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭', - 'album_artist': 'youtube-dl "\'/\\ä↭', 'track_id': '1812978515', - 'artist': 'youtube-dl "\'/\\ä↭', 'uploader_url': 'https://youtube-dl.bandcamp.com', 'uploader_id': 'youtube-dl', - 'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg', 'artists': ['youtube-dl "\'/\\ä↭'], 'album_artists': ['youtube-dl "\'/\\ä↭'], }, @@ -53,10 +52,9 @@ class BandcampIE(InfoExtractor): 'info_dict': { 'id': '2650410135', 'ext': 'm4a', - 'acodec': r're:[fa]lac', 'title': 'Ben Prunty - Lanius (Battle)', - 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Ben Prunty', + 'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg', 'timestamp': 1396508491, 'upload_date': '20140403', 'release_timestamp': 1396483200, @@ -65,11 +63,12 @@ class BandcampIE(InfoExtractor): 'track': 'Lanius (Battle)', 'track_number': 1, 'track_id': '2650410135', - 'artist': 'Ben Prunty', - 'album_artist': 'Ben Prunty', 'album': 'FTL: Advanced Edition Soundtrack', 'uploader_url': 'https://benprunty.bandcamp.com', 'uploader_id': 'benprunty', + 'tags': ['soundtrack', 'chiptunes', 'cinematic', 'electronic', 'video game music', 'California'], + 'artists': ['Ben Prunty'], + 'album_artists': ['Ben Prunty'], }, }, { # no free download, mp3 128 @@ -79,8 +78,8 @@ class BandcampIE(InfoExtractor): 'id': '2584466013', 'ext': 'mp3', 'title': 'Mastodon - Hail to Fire', - 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Mastodon', + 'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg', 'timestamp': 1322005399, 'upload_date': '20111122', 'release_timestamp': 1076112000, @@ -89,11 +88,12 @@ class BandcampIE(InfoExtractor): 'track': 'Hail to Fire', 'track_number': 5, 'track_id': '2584466013', - 'artist': 'Mastodon', - 'album_artist': 'Mastodon', 'album': 'Call of the Mastodon', 'uploader_url': 'https://relapsealumni.bandcamp.com', 'uploader_id': 'relapsealumni', + 'tags': ['Philadelphia'], + 'artists': ['Mastodon'], + 'album_artists': ['Mastodon'], }, }, { # track from compilation album (artist/album_artist difference) @@ -103,8 +103,8 @@ class BandcampIE(InfoExtractor): 'id': '1978174799', 'ext': 'mp3', 'title': 'submerse - submerse - Safehouse', - 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'submerse', + 'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg', 'timestamp': 1480779297, 'upload_date': '20161203', 'release_timestamp': 1481068800, @@ -113,11 +113,36 @@ class BandcampIE(InfoExtractor): 'track': 'submerse - Safehouse', 'track_number': 3, 'track_id': '1978174799', - 'artist': 'submerse', - 'album_artist': 'Diskotopia', 'album': 'DSK F/W 2016-2017 Free Compilation', 'uploader_url': 'https://diskotopia.bandcamp.com', 'uploader_id': 'diskotopia', + 'tags': ['Japan'], + 'artists': ['submerse'], + 'album_artists': ['Diskotopia'], + }, + }] + _WEBPAGE_TESTS = [{ + # FIXME: Embed detection + 'url': 'https://www.punknews.org/article/85809/stay-inside-super-sonic', + 'info_dict': { + 'id': '2475540375', + 'ext': 'mp3', + 'title': 'Stay Inside - Super Sonic', + 'album': 'Lunger', + 'album_artists': ['Stay Inside'], + 'artists': ['Stay Inside'], + 'duration': 166.157, + 'release_date': '20251003', + 'release_timestamp': 1759449600.0, + 'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg', + 'timestamp': 1749473029.0, + 'track': 'Super Sonic', + 'track_id': '2475540375', + 'track_number': 3, + 'upload_date': '20250609', + 'uploader': 'Stay Inside', + 'uploader_id': 'stayinside', + 'uploader_url': 'https://stayinside.bandcamp.com', }, }] @@ -252,6 +277,7 @@ class BandcampIE(InfoExtractor): 'album': embed.get('album_title'), 'album_artist': album_artist, 'formats': formats, + 'tags': traverse_obj(webpage, ({find_elements(cls='tag')}, ..., {clean_html})), } @@ -268,10 +294,10 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'id': '1353101989', 'ext': 'mp3', 'title': 'Blazo - Intro', + 'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg', 'timestamp': 1311756226, 'upload_date': '20110727', 'uploader': 'Blazo', - 'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg', 'album_artists': ['Blazo'], 'uploader_url': 'https://blazo.bandcamp.com', 'release_date': '20110727', @@ -291,6 +317,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'id': '38097443', 'ext': 'mp3', 'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)', + 'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg', 'timestamp': 1311757238, 'upload_date': '20110727', 'uploader': 'Blazo', @@ -304,7 +331,6 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'uploader_id': 'blazo', 'album_artists': ['Blazo'], 'artists': ['Blazo'], - 'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg', 'release_timestamp': 1311724800.0, }, }, diff --git a/yt_dlp/extractor/bellmedia.py b/yt_dlp/extractor/bellmedia.py deleted file mode 100644 index ac45dd4779..0000000000 --- a/yt_dlp/extractor/bellmedia.py +++ /dev/null @@ -1,91 +0,0 @@ -from .common import InfoExtractor - - -class BellMediaIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:www\.)? - (?P - (?: - ctv| - tsn| - bnn(?:bloomberg)?| - thecomedynetwork| - discovery| - discoveryvelocity| - sciencechannel| - investigationdiscovery| - animalplanet| - bravo| - mtv| - space| - etalk| - marilyn - )\.ca| - (?:much|cp24)\.com - )/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P[0-9]{6,})''' - _TESTS = [{ - 'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070', - 'md5': '3e5b8e38370741d5089da79161646635', - 'info_dict': { - 'id': '1403070', - 'ext': 'flv', - 'title': 'David Cockfield\'s Top Picks', - 'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3', - 'upload_date': '20180525', - 'timestamp': 1527288600, - 'season_id': '73997', - 'season': '2018', - 'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg', - 'tags': [], - 'categories': ['ETFs'], - 'season_number': 8, - 'duration': 272.038, - 'series': 'Market Call Tonight', - }, - }, { - 'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582', - 'only_matching': True, - }, { - 'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549', - 'only_matching': True, - }, { - 'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654', - 'only_matching': True, - }, { - 'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009', - 'only_matching': True, - }, { - 'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016', - 'only_matching': True, - }, { - 'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6', - 'only_matching': True, - }, { - 'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430', - 'only_matching': True, - }, { - 'url': 'http://www.etalk.ca/video?videoid=663455', - 'only_matching': True, - }, { - 'url': 'https://www.cp24.com/video?clipId=1982548', - 'only_matching': True, - }] - _DOMAINS = { - 'thecomedynetwork': 'comedy', - 'discoveryvelocity': 'discvel', - 'sciencechannel': 'discsci', - 'investigationdiscovery': 'invdisc', - 'animalplanet': 'aniplan', - 'etalk': 'ctv', - 'bnnbloomberg': 'bnn', - 'marilyn': 'ctv_marilyn', - } - - def _real_extract(self, url): - domain, video_id = self._match_valid_url(url).groups() - domain = domain.split('.')[0] - return { - '_type': 'url_transparent', - 'id': video_id, - 'url': f'9c9media:{self._DOMAINS.get(domain, domain)}_web:{video_id}', - 'ie_key': 'NineCNineMedia', - } diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 0c6535fc72..d00ac63176 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -353,7 +353,7 @@ class BiliBiliIE(BilibiliBaseIE): 'id': 'BV1bK411W797', 'title': '物语中的人物是如何吐槽自己的OP的', }, - 'playlist_count': 18, + 'playlist_count': 23, 'playlist': [{ 'info_dict': { 'id': 'BV1bK411W797_p1', @@ -373,6 +373,7 @@ class BiliBiliIE(BilibiliBaseIE): '_old_archive_ids': ['bilibili 498159642_part1'], }, }], + 'params': {'playlist_items': '2'}, }, { 'note': 'Specific page of Anthology', 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1', @@ -899,13 +900,26 @@ class BiliBiliBangumiIE(BilibiliBaseIE): 'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id}, headers=headers)) + # play_info can be structured in at least three different ways, e.g.: + # 1.) play_info['result']['video_info'] and play_info['code'] + # 2.) play_info['raw']['data']['video_info'] and play_info['code'] + # 3.) play_info['data']['result']['video_info'] and play_info['data']['code'] + # So we need to transform any of the above into a common structure + status_code = play_info.get('code') + if 'raw' in play_info: + play_info = play_info['raw'] + if 'data' in play_info: + play_info = play_info['data'] + if status_code is None: + status_code = play_info.get('code') + if 'result' in play_info: + play_info = play_info['result'] + geo_blocked = traverse_obj(play_info, ( - ('result', ('raw', 'data')), 'plugins', - lambda _, v: v['name'] == 'AreaLimitPanel', - 'config', 'is_block', {bool}, any)) - premium_only = play_info.get('code') == -10403 + 'plugins', lambda _, v: v['name'] == 'AreaLimitPanel', 'config', 'is_block', {bool}, any)) + premium_only = status_code == -10403 - video_info = traverse_obj(play_info, (('result', ('raw', 'data')), 'video_info', {dict}, any)) or {} + video_info = traverse_obj(play_info, ('video_info', {dict})) or {} formats = self.extract_formats(video_info) if not formats: @@ -915,8 +929,8 @@ class BiliBiliBangumiIE(BilibiliBaseIE): self.raise_login_required('This video is for premium members only') if traverse_obj(play_info, (( - ('result', 'play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE' - (('result', ('raw', 'data')), 'play_video_type'), # 'preview' vs 'whole' vs 'none' + ('play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE' vs 'PLAY_NONE' + 'play_video_type', # 'preview' vs 'whole' vs 'none' ), any, {lambda x: x in ('PLAY_PREVIEW', 'preview')})): self.report_warning( 'Only preview format is available, ' @@ -1002,6 +1016,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE): 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], + 'params': {'playlist_items': '2'}, }] def _real_extract(self, url): @@ -1057,6 +1072,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE): 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], + 'params': {'playlist_items': '2'}, }] def _real_extract(self, url): @@ -1847,7 +1863,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE): 'thumbnail': r're:^https?://.+\.jpg', 'timestamp': 1564836614, 'upload_date': '20190803', - 'uploader': 'tsukimi-つきみぐー', + 'uploader': '十六夜tsukimiつきみぐ', 'view_count': int, }, } @@ -1902,10 +1918,10 @@ class BilibiliAudioAlbumIE(BilibiliAudioBaseIE): 'url': 'https://www.bilibili.com/audio/am10624', 'info_dict': { 'id': '10624', - 'title': '每日新曲推荐(每日11:00更新)', + 'title': '新曲推荐', 'description': '每天11:00更新,为你推送最新音乐', }, - 'playlist_count': 19, + 'playlist_count': 16, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index 535890979b..c14ff1f142 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -1,16 +1,27 @@ from .common import InfoExtractor -from ..utils import parse_iso8601 +from ..utils import ( + UnsupportedError, + float_or_none, + int_or_none, + join_nonempty, + jwt_decode_hs256, + mimetype2ext, + parse_iso8601, + parse_qs, + url_or_none, +) +from ..utils.traversal import traverse_obj class BlackboardCollaborateIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// - (?P[a-z-]+)\.bbcollab\.com/ + (?P[a-z]+)(?:-lti)?\.bbcollab\.com/ (?: collab/ui/session/playback/load| recording )/ - (?P[^/]+)''' + (?P[^/?#]+)''' _TESTS = [ { 'url': 'https://us-lti.bbcollab.com/collab/ui/session/playback/load/0a633b6a88824deb8c918f470b22b256', @@ -19,9 +30,55 @@ class BlackboardCollaborateIE(InfoExtractor): 'id': '0a633b6a88824deb8c918f470b22b256', 'title': 'HESI A2 Information Session - Thursday, May 6, 2021 - recording_1', 'ext': 'mp4', - 'duration': 1896000, - 'timestamp': 1620331399, + 'duration': 1896, + 'timestamp': 1620333295, 'upload_date': '20210506', + 'subtitles': { + 'live_chat': 'mincount:1', + }, + }, + }, + { + 'url': 'https://eu.bbcollab.com/collab/ui/session/playback/load/4bde2dee104f40289a10f8e554270600', + 'md5': '108db6a8f83dcb0c2a07793649581865', + 'info_dict': { + 'id': '4bde2dee104f40289a10f8e554270600', + 'title': 'Meeting - Azerbaycanca erize formasi', + 'ext': 'mp4', + 'duration': 880, + 'timestamp': 1671176868, + 'upload_date': '20221216', + }, + }, + { + 'url': 'https://eu.bbcollab.com/recording/f83be390ecff46c0bf7dccb9dddcf5f6', + 'md5': 'e3b0b88ddf7847eae4b4c0e2d40b83a5', + 'info_dict': { + 'id': 'f83be390ecff46c0bf7dccb9dddcf5f6', + 'title': 'Keynote lecture by Laura Carvalho - recording_1', + 'ext': 'mp4', + 'duration': 5506, + 'timestamp': 1662721705, + 'upload_date': '20220909', + 'subtitles': { + 'live_chat': 'mincount:1', + }, + }, + }, + { + 'url': 'https://eu.bbcollab.com/recording/c3e1e7c9e83d4cd9981c93c74888d496', + 'md5': 'fdb2d8c43d66fbc0b0b74ef5e604eb1f', + 'info_dict': { + 'id': 'c3e1e7c9e83d4cd9981c93c74888d496', + 'title': 'International Ally User Group - recording_18', + 'ext': 'mp4', + 'duration': 3479, + 'timestamp': 1721919621, + 'upload_date': '20240725', + 'subtitles': { + 'en': 'mincount:1', + 'live_chat': 'mincount:1', + }, }, }, { @@ -42,22 +99,81 @@ class BlackboardCollaborateIE(InfoExtractor): }, ] + def _call_api(self, region, video_id, path=None, token=None, note=None, fatal=False): + # Ref: https://github.com/blackboard/BBDN-Collab-Postman-REST + return self._download_json( + join_nonempty(f'https://{region}.bbcollab.com/collab/api/csa/recordings', video_id, path, delim='/'), + video_id, note or 'Downloading JSON metadata', fatal=fatal, + headers={'Authorization': f'Bearer {token}'} if token else None) + def _real_extract(self, url): mobj = self._match_valid_url(url) region = mobj.group('region') video_id = mobj.group('id') - info = self._download_json( - f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/data', video_id) - duration = info.get('duration') - title = info['name'] - upload_date = info.get('created') - streams = info['streams'] - formats = [{'format_id': k, 'url': url} for k, url in streams.items()] + token = parse_qs(url).get('authToken', [None])[-1] + + video_info = self._call_api(region, video_id, path='data/secure', token=token, note='Trying auth token') + if video_info: + video_extra = self._call_api(region, video_id, token=token, note='Retrieving extra attributes') + else: + video_info = self._call_api(region, video_id, path='data', note='Trying fallback', fatal=True) + video_extra = {} + + formats = traverse_obj(video_info, ('extStreams', lambda _, v: url_or_none(v['streamUrl']), { + 'url': 'streamUrl', + 'ext': ('contentType', {mimetype2ext}), + 'aspect_ratio': ('aspectRatio', {float_or_none}), + })) + + if filesize := traverse_obj(video_extra, ('storageSize', {int_or_none})): + for fmt in formats: + fmt['filesize'] = filesize + + subtitles = {} + for subs in traverse_obj(video_info, ('subtitles', lambda _, v: url_or_none(v['url']))): + subtitles.setdefault(subs.get('lang') or 'und', []).append({ + 'name': traverse_obj(subs, ('label', {str})), + 'url': subs['url'], + }) + + for live_chat_url in traverse_obj(video_info, ('chats', ..., 'url', {url_or_none})): + subtitles.setdefault('live_chat', []).append({'url': live_chat_url}) return { - 'duration': duration, + **traverse_obj(video_info, { + 'title': ('name', {str}), + 'timestamp': ('created', {parse_iso8601}), + 'duration': ('duration', {int_or_none(scale=1000)}), + }), 'formats': formats, 'id': video_id, - 'timestamp': parse_iso8601(upload_date), - 'title': title, + 'subtitles': subtitles, } + + +class BlackboardCollaborateLaunchIE(InfoExtractor): + _VALID_URL = r'https?://[a-z]+\.bbcollab\.com/launch/(?P[^/?#]+)' + + _TESTS = [ + { + 'url': 'https://au.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNzQwNDE2NDgzLCJpYXQiOjE3NDA0MTYxODMsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI3MzI4YzRjZTNmM2U0ZTcwYmY3MTY3N2RkZTgzMzk2NSIsImNvbnN1bWVySWQiOiJhM2Q3NGM0Y2QyZGU0MGJmODFkMjFlODNlMmEzNzM5MCIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.xuELw4EafEwUMoYcCHidGn4Tw9O1QCbYHzYGJUl0kKk', + 'only_matching': True, + }, + { + 'url': 'https://us.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNjk0NDgxOTc3LCJpYXQiOjE2OTQ0ODE2NzcsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI3YWU0MTFhNTU3NjU0OWFiOTZlYjVmMTM1YmY3MWU5MCIsImNvbnN1bWVySWQiOiJBRUU2MEI4MDI2QzM3ODU2RjMwMzNEN0ZEOTQzMTFFNSIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.yOhRZNaIjXYoMYMpcTzgjZJCnIFaYf2cAzbco8OAxlY', + 'only_matching': True, + }, + { + 'url': 'https://eu.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNzUyNjgyODYwLCJpYXQiOjE3NTI2ODI1NjAsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI4MjQzYjFiODg2Nzk0NTZkYjkwN2NmNDZmZmE1MmFhZiIsImNvbnN1bWVySWQiOiI5ZTY4NzYwZWJiNzM0MzRiYWY3NTQyZjA1YmJkOTMzMCIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.Xj4ymojYLwZ1vKPKZ-KxjpqQvFXoJekjRaG0npngwWs', + 'only_matching': True, + }, + ] + + def _real_extract(self, url): + token = self._match_id(url) + video_id = jwt_decode_hs256(token)['resourceAccessTicket']['resourceId'] + + redirect_url = self._request_webpage(url, video_id).url + if self.suitable(redirect_url): + raise UnsupportedError(redirect_url) + return self.url_result(redirect_url, BlackboardCollaborateIE, video_id) diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py index 1614b6f947..6ff72e70d2 100644 --- a/yt_dlp/extractor/blogger.py +++ b/yt_dlp/extractor/blogger.py @@ -19,8 +19,19 @@ class BloggerIE(InfoExtractor): 'id': 'BLOGGER-video-3c740e3a49197e16-796', 'title': 'BLOGGER-video-3c740e3a49197e16-796', 'ext': 'mp4', - 'thumbnail': r're:^https?://.*', 'duration': 76.068, + 'thumbnail': r're:https?://i9\.ytimg\.com/vi_blogger/.+', + }, + }] + _WEBPAGE_TESTS = [{ + 'url': 'https://blog.tomeuvizoso.net/2019/01/a-panfrost-milestone.html', + 'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac', + 'info_dict': { + 'id': 'BLOGGER-video-3c740e3a49197e16-12203', + 'ext': 'mp4', + 'title': 'BLOGGER-video-3c740e3a49197e16-12203', + 'duration': 76.068, + 'thumbnail': r're:https?://i9\.ytimg\.com/vi_blogger/.+', }, }] diff --git a/yt_dlp/extractor/btvplus.py b/yt_dlp/extractor/btvplus.py new file mode 100644 index 0000000000..531ace1471 --- /dev/null +++ b/yt_dlp/extractor/btvplus.py @@ -0,0 +1,73 @@ +from .common import InfoExtractor +from ..utils import ( + bug_reports_message, + clean_html, + get_element_by_class, + js_to_json, + mimetype2ext, + strip_or_none, + url_or_none, + urljoin, +) +from ..utils.traversal import traverse_obj + + +class BTVPlusIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?btvplus\.bg/produkt/(?:predavaniya|seriali|novini)/(?P\d+)' + _TESTS = [{ + 'url': 'https://btvplus.bg/produkt/predavaniya/67271/btv-reporterite/btv-reporterite-12-07-2025-g', + 'info_dict': { + 'ext': 'mp4', + 'id': '67271', + 'title': 'bTV Репортерите - 12.07.2025 г.', + 'thumbnail': 'https://cdn.btv.bg/media/images/940x529/Jul2025/2113606319.jpg', + }, + }, { + 'url': 'https://btvplus.bg/produkt/seriali/66942/sezon-2/plen-sezon-2-epizod-55', + 'info_dict': { + 'ext': 'mp4', + 'id': '66942', + 'title': 'Плен - сезон 2, епизод 55', + 'thumbnail': 'https://cdn.btv.bg/media/images/940x529/Jun2025/2113595104.jpg', + }, + }, { + 'url': 'https://btvplus.bg/produkt/novini/67270/btv-novinite-centralna-emisija-12-07-2025', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + player_url = self._search_regex( + r'var\s+videoUrl\s*=\s*[\'"]([^\'"]+)[\'"]', + webpage, 'player URL') + + player_config = self._download_json( + urljoin('https://btvplus.bg', player_url), video_id)['config'] + + videojs_data = self._search_json( + r'videojs\(["\'][^"\']+["\'],', player_config, 'videojs data', + video_id, transform_source=js_to_json) + formats = [] + subtitles = {} + for src in traverse_obj(videojs_data, ('sources', lambda _, v: url_or_none(v['src']))): + ext = mimetype2ext(src.get('type')) + if ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles( + src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + else: + self.report_warning(f'Unknown format type {ext}{bug_reports_message()}') + + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + 'title': ( + strip_or_none(self._og_search_title(webpage, default=None)) + or clean_html(get_element_by_class('product-title', webpage))), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), + 'description': self._og_search_description(webpage, default=None), + } diff --git a/yt_dlp/extractor/cloudflarestream.py b/yt_dlp/extractor/cloudflarestream.py index 9e9e89a801..a90fceaacf 100644 --- a/yt_dlp/extractor/cloudflarestream.py +++ b/yt_dlp/extractor/cloudflarestream.py @@ -19,18 +19,16 @@ class CloudflareStreamIE(InfoExtractor): 'id': '31c9291ab41fac05471db4e73aa11717', 'ext': 'mp4', 'title': '31c9291ab41fac05471db4e73aa11717', - 'thumbnail': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg', - }, - 'params': { - 'skip_download': 'm3u8', + 'thumbnail': r're:https?://cloudflarestream\.com/.+\.jpg', }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://watch.cloudflarestream.com/embed/sdk-iframe-integration.fla9.latest.js?video=0e8e040aec776862e1d632a699edf59e', 'info_dict': { 'id': '0e8e040aec776862e1d632a699edf59e', 'ext': 'mp4', 'title': '0e8e040aec776862e1d632a699edf59e', - 'thumbnail': 'https://cloudflarestream.com/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg', + 'thumbnail': r're:https?://cloudflarestream\.com/.+\.jpg', }, }, { 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', @@ -54,11 +52,21 @@ class CloudflareStreamIE(InfoExtractor): 'id': 'eaef9dea5159cf968be84241b5cedfe7', 'ext': 'mp4', 'title': 'eaef9dea5159cf968be84241b5cedfe7', - 'thumbnail': 'https://cloudflarestream.com/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg', + 'thumbnail': r're:https?://cloudflarestream\.com/.+\.jpg', }, 'params': { + 'extractor_args': {'generic': {'impersonate': ['chrome']}}, 'skip_download': 'm3u8', }, + }, { + # FIXME: Embed detection + 'url': 'https://www.cloudflare.com/developer-platform/products/cloudflare-stream/', + 'info_dict': { + 'id': 'e7bd2dd67e0f8860b4ae81e33a966049', + 'ext': 'mp4', + 'title': 'e7bd2dd67e0f8860b4ae81e33a966049', + 'thumbnail': r're:https?://cloudflarestream\.com/.+\.jpg', + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index b75e806233..4a4b5416d0 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -38,7 +38,6 @@ from ..networking.exceptions import ( TransportError, network_exceptions, ) -from ..networking.impersonate import ImpersonateTarget from ..utils import ( IDENTITY, JSON_LD_RE, @@ -259,6 +258,11 @@ class InfoExtractor: * key The key (as hex) used to decrypt fragments. If `key` is given, any key URI will be ignored * iv The IV (as hex) used to decrypt fragments + * impersonate Impersonate target(s). Can be any of the following entities: + * an instance of yt_dlp.networking.impersonate.ImpersonateTarget + * a string in the format of CLIENT[:OS] + * a list or a tuple of CLIENT[:OS] strings or ImpersonateTarget instances + * a boolean value; True means any impersonate target is sufficient * downloader_options A dictionary of downloader options (For internal use only) * http_chunk_size Chunk size for HTTP downloads @@ -336,6 +340,7 @@ class InfoExtractor: * "name": Name or description of the subtitles * "http_headers": A dictionary of additional HTTP headers to add to the request. + * "impersonate": Impersonate target(s); same as the "formats" field "ext" will be calculated from URL if missing automatic_captions: Like 'subtitles'; contains automatically generated captions instead of normal subtitles @@ -392,6 +397,8 @@ class InfoExtractor: chapters: A list of dictionaries, with the following entries: * "start_time" - The start time of the chapter in seconds * "end_time" - The end time of the chapter in seconds + (optional: core code can determine this value from + the next chapter's start_time or the video's duration) * "title" (optional, string) heatmap: A list of dictionaries, with the following entries: * "start_time" - The start time of the data point in seconds @@ -406,7 +413,8 @@ class InfoExtractor: 'unlisted' or 'public'. Use 'InfoExtractor._availability' to set it media_type: The type of media as classified by the site, e.g. "episode", "clip", "trailer" - _old_archive_ids: A list of old archive ids needed for backward compatibility + _old_archive_ids: A list of old archive ids needed for backward + compatibility. Use yt_dlp.utils.make_archive_id to generate ids _format_sort_fields: A list of fields to use for sorting formats __post_extractor: A function to be called just before the metadata is written to either disk, logger or console. The function @@ -884,26 +892,17 @@ class InfoExtractor: extensions = {} - if impersonate in (True, ''): - impersonate = ImpersonateTarget() - requested_targets = [ - t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t) - for t in variadic(impersonate) - ] if impersonate else [] - - available_target = next(filter(self._downloader._impersonate_target_available, requested_targets), None) + available_target, requested_targets = self._downloader._parse_impersonate_targets(impersonate) if available_target: extensions['impersonate'] = available_target elif requested_targets: - message = 'The extractor is attempting impersonation, but ' - message += ( - 'no impersonate target is available' if not str(impersonate) - else f'none of these impersonate targets are available: "{", ".join(map(str, requested_targets))}"') - info_msg = ('see https://github.com/yt-dlp/yt-dlp#impersonation ' - 'for information on installing the required dependencies') + msg = 'The extractor is attempting impersonation' if require_impersonation: - raise ExtractorError(f'{message}; {info_msg}', expected=True) - self.report_warning(f'{message}; if you encounter errors, then {info_msg}', only_once=True) + raise ExtractorError( + self._downloader._unavailable_targets_message(requested_targets, note=msg, is_error=True), + expected=True) + self.report_warning( + self._downloader._unavailable_targets_message(requested_targets, note=msg), only_once=True) try: return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query, extensions)) @@ -1783,6 +1782,59 @@ class InfoExtractor: r']+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data', video_id, end_pattern='', fatal=fatal, default=default, **kw) + def _search_nextjs_v13_data(self, webpage, video_id, fatal=True): + """Parses Next.js app router flight data that was introduced in Next.js v13""" + nextjs_data = {} + if not fatal and not isinstance(webpage, str): + return nextjs_data + + def flatten(flight_data): + if not isinstance(flight_data, list): + return + if len(flight_data) == 4 and flight_data[0] == '$': + _, name, _, data = flight_data + if not isinstance(data, dict): + return + children = data.pop('children', None) + if data and isinstance(name, str) and re.fullmatch(r'\$L[0-9a-f]+', name): + # It is useful hydration JSON data + nextjs_data[name[2:]] = data + flatten(children) + return + for f in flight_data: + flatten(f) + + flight_text = '' + # The pattern for the surrounding JS/tag should be strict as it's a hardcoded string in the next.js source + # Ref: https://github.com/vercel/next.js/blob/5a4a08fdc/packages/next/src/server/app-render/use-flight-response.tsx#L189 + for flight_segment in re.findall(r']*>self\.__next_f\.push\((\[.+?\])\)', webpage): + segment = self._parse_json(flight_segment, video_id, fatal=fatal, errnote=None if fatal else False) + # Some earlier versions of next.js "optimized" away this array structure; this is unsupported + # Ref: https://github.com/vercel/next.js/commit/0123a9d5c9a9a77a86f135b7ae30b46ca986d761 + if not isinstance(segment, list) or len(segment) != 2: + self.write_debug( + f'{video_id}: Unsupported next.js flight data structure detected', only_once=True) + continue + # Only use the relevant payload type (1 == data) + # Ref: https://github.com/vercel/next.js/blob/5a4a08fdc/packages/next/src/server/app-render/use-flight-response.tsx#L11-L14 + payload_type, chunk = segment + if payload_type == 1: + flight_text += chunk + + for f in flight_text.splitlines(): + prefix, _, body = f.lstrip().partition(':') + if not re.fullmatch(r'[0-9a-f]+', prefix): + continue + # The body still isn't guaranteed to be valid JSON, so parsing should always be non-fatal + if body.startswith('[') and body.endswith(']'): + flatten(self._parse_json(body, video_id, fatal=False, errnote=False)) + elif body.startswith('{') and body.endswith('}'): + data = self._parse_json(body, video_id, fatal=False, errnote=False) + if data is not None: + nextjs_data[prefix] = data + + return nextjs_data + def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)): """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function""" rectx = re.escape(context_name) diff --git a/yt_dlp/extractor/condenast.py b/yt_dlp/extractor/condenast.py index 0c84cfdab7..318fa8943b 100644 --- a/yt_dlp/extractor/condenast.py +++ b/yt_dlp/extractor/condenast.py @@ -96,6 +96,24 @@ class CondeNastIE(InfoExtractor): 'upload_date': '20150916', 'timestamp': 1442434920, }, + }, { + # FIXME: Subtitles + 'url': 'https://www.vanityfair.com/video/watch/vf-quiz-show-squid-game-s3', + 'info_dict': { + 'id': '6862f999c1afbc5ff06b4803', + 'ext': 'mp4', + 'title': '\'Squid Game\' Cast Tests How Well They Know Each Other', + 'categories': ['Arts & Culture', 'Hollywood'], + 'description': 'md5:7a9c668a1fc87648e77da13842ec1534', + 'duration': 955, + 'season': 'Season 1', + 'series': 'Quizzing Each Other', + 'tags': 'count:2', + 'thumbnail': r're:https?://dwgyu36up6iuz\.cloudfront\.net/.+\.jpg', + 'timestamp': 1751341306, + 'upload_date': '20250701', + 'uploader': 'vanityfair', + }, }, { 'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player', 'only_matching': True, diff --git a/yt_dlp/extractor/crooksandliars.py b/yt_dlp/extractor/crooksandliars.py index abd3322a95..29bbc2fe6a 100644 --- a/yt_dlp/extractor/crooksandliars.py +++ b/yt_dlp/extractor/crooksandliars.py @@ -8,7 +8,6 @@ from ..utils import ( class CrooksAndLiarsIE(InfoExtractor): _VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P[A-Za-z0-9]+)' _EMBED_REGEX = [r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1'] - _TESTS = [{ 'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi', 'info_dict': { @@ -16,7 +15,7 @@ class CrooksAndLiarsIE(InfoExtractor): 'ext': 'mp4', 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!', 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', - 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnail': r're:https?://crooksandliars\.com/files/.+', 'timestamp': 1428207000, 'upload_date': '20150405', 'uploader': 'Heather', @@ -26,6 +25,20 @@ class CrooksAndLiarsIE(InfoExtractor): 'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + 'url': 'https://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists', + 'info_dict': { + 'id': '8RUoRhRi', + 'ext': 'mp4', + 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!', + 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', + 'duration': 236, + 'thumbnail': r're:https?://crooksandliars\.com/files/.+', + 'timestamp': 1428207000, + 'upload_date': '20150405', + 'uploader': 'Heather', + }, + }] def _real_extract(self, url): video_id = self._match_id(url) diff --git a/yt_dlp/extractor/ctv.py b/yt_dlp/extractor/ctv.py deleted file mode 100644 index a41dab11b1..0000000000 --- a/yt_dlp/extractor/ctv.py +++ /dev/null @@ -1,49 +0,0 @@ -from .common import InfoExtractor - - -class CTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P(?:show|movie)s/[^/]+/[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88', - 'info_dict': { - 'id': '2102249', - 'ext': 'flv', - 'title': 'Wednesday, December 23, 2020', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.', - 'timestamp': 1608732000, - 'upload_date': '20201223', - 'series': 'Your Morning', - 'season': '2020-2021', - 'season_number': 5, - 'episode_number': 88, - 'tags': ['Your Morning'], - 'categories': ['Talk Show'], - 'duration': 7467.126, - }, - }, { - 'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - content = self._download_json( - 'https://www.ctv.ca/space-graphql/graphql', display_id, query={ - 'query': '''{ - resolvedPath(path: "/%s") { - lastSegment { - content { - ... on AxisContent { - axisId - videoPlayerDestCode - } - } - } - } -}''' % display_id, # noqa: UP031 - })['data']['resolvedPath']['lastSegment']['content'] - video_id = content['axisId'] - return self.url_result( - '9c9media:{}:{}'.format(content['videoPlayerDestCode'], video_id), - 'NineCNineMedia', video_id) diff --git a/yt_dlp/extractor/dailymail.py b/yt_dlp/extractor/dailymail.py index 540676ac0f..3058a0f7b5 100644 --- a/yt_dlp/extractor/dailymail.py +++ b/yt_dlp/extractor/dailymail.py @@ -19,11 +19,22 @@ class DailyMailIE(InfoExtractor): 'ext': 'mp4', 'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'', 'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84', + 'thumbnail': r're:https?://i\.dailymail\.co\.uk/.+\.jpg', }, }, { 'url': 'http://www.dailymail.co.uk/embed/video/1295863.html', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + 'url': 'https://www.daily-news.gr/lifestyle/%ce%b7-%cf%84%cf%81%ce%b1%ce%b3%ce%bf%cf%85%ce%b4%ce%af%cf%83%cf%84%cf%81%ce%b9%ce%b1-jessie-j-%ce%bc%ce%bf%ce%b9%cf%81%ce%ac%cf%83%cf%84%ce%b7%ce%ba%ce%b5-%cf%83%cf%85%ce%b3%ce%ba%ce%bb%ce%bf%ce%bd/', + 'info_dict': { + 'id': '3463585', + 'ext': 'mp4', + 'title': 'Jessie J reveals she has undergone surgery as she shares clips', + 'description': 'md5:9fa9a25feca5b656b0b4a39c922fad1e', + 'thumbnail': r're:https?://i\.dailymail\.co\.uk/.+\.jpg', + }, + }] def _real_extract(self, url): video_id = self._match_id(url) diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py index a81f0a26dd..d27a027702 100644 --- a/yt_dlp/extractor/dailymotion.py +++ b/yt_dlp/extractor/dailymotion.py @@ -119,13 +119,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor): _EMBED_REGEX = [rf'(?ix)<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)["\'](?P{_VALID_URL[5:]})'] _TESTS = [{ 'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news', - 'md5': '074b95bdee76b9e3654137aee9c79dfe', 'info_dict': { 'id': 'x5kesuj', 'ext': 'mp4', 'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller', 'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller', 'duration': 187, + 'tags': 'count:5', + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'timestamp': 1493651285, 'upload_date': '20170501', 'uploader': 'Deadline', @@ -133,18 +134,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'age_limit': 0, 'view_count': int, 'like_count': int, - 'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'], - 'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1cmt4ZcZ9KiM/x1080', }, }, { 'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true', - 'md5': 'e2f9717c6604773f963f069ca53a07f8', 'info_dict': { 'id': 'x89eyek', 'ext': 'mp4', - 'title': "En quête d'esprit du 27/03/2022", + 'title': 'En quête d\'esprit du 27/03/2022', 'description': 'md5:66542b9f4df2eb23f314fc097488e553', 'duration': 2756, + 'tags': 'count:1', + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'timestamp': 1648383669, 'upload_date': '20220327', 'uploader': 'CNEWS', @@ -152,8 +152,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'age_limit': 0, 'view_count': int, 'like_count': int, - 'tags': ['en_quete_d_esprit'], - 'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1clTH6StrxMP/x1080', }, }, { 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames', @@ -163,8 +161,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'ext': 'mp4', 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News', 'description': 'Several come bundled with the Steam Controller.', - 'thumbnail': r're:^https?:.*\.(?:jpg|png)$', 'duration': 74, + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'timestamp': 1425657362, 'upload_date': '20150306', 'uploader': 'IGN', @@ -183,10 +181,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'uploader': 'Katy Perry', 'upload_date': '20130905', }, - 'params': { - 'skip_download': True, - }, - 'skip': 'VEVO is only available in some countries', + 'skip': 'Invalid URL', }, { # age-restricted video 'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband', @@ -259,9 +254,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'uploader_id': 'x2vtgmm', 'age_limit': 0, 'tags': [], + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'view_count': int, 'like_count': int, - 'thumbnail': r're:https://\w+.dmcdn.net/v/WnEY61cmvMxt2Fi6d/x1080', }, }, { # https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj @@ -276,18 +271,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'info_dict': { 'id': 'x8u4owg', 'ext': 'mp4', + 'description': 'À bord du « véloto », l’alternative à la voiture pour la campagne', 'like_count': int, 'uploader': 'Le Parisien', - 'thumbnail': 'https://www.leparisien.fr/resizer/ho_GwveeYftNkLwg_cEta--5Bv4=/1200x675/cloudfront-eu-central-1.images.arcpublishing.com/leparisien/BFXJNEBN75EUNHGYJLORUC3TX4.jpg', 'upload_date': '20240309', 'view_count': int, + 'tags': 'count:7', + 'thumbnail': r're:https?://www\.leparisien\.fr/.+\.jpg', 'timestamp': 1709997866, 'age_limit': 0, 'uploader_id': 'x32f7b', 'title': 'VIDÉO. Le «\xa0véloto\xa0», la voiture à pédales qui aimerait se faire une place sur les routes', 'duration': 428.0, - 'description': 'À bord du « véloto », l’alternative à la voiture pour la campagne', - 'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'], }, }, { # https://geo.dailymotion.com/player/xry80.html?video=x8vu47w @@ -297,9 +292,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'ext': 'mp4', 'like_count': int, 'uploader': 'Metatube', - 'thumbnail': r're:https://\w+.dmcdn.net/v/W1G_S1coGSFTfkTeR/x1080', 'upload_date': '20240326', 'view_count': int, + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'timestamp': 1711496732, 'age_limit': 0, 'uploader_id': 'x2xpy74', @@ -308,6 +303,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'description': 'Que lindura', 'tags': [], }, + 'skip': 'Invalid URL', }, { # //geo.dailymotion.com/player/xysxq.html?video=k2Y4Mjp7krAF9iCuINM 'url': 'https://lcp.fr/programmes/avant-la-catastrophe-la-naissance-de-la-dictature-nazie-1933-1936-346819', @@ -322,11 +318,30 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'like_count': int, 'age_limit': 0, 'duration': 3220, - 'thumbnail': 'https://s1.dmcdn.net/v/Xvumk1djJBUZfjj2a/x1080', 'tags': [], + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'timestamp': 1739919947, 'upload_date': '20250218', }, + 'skip': 'Invalid URL', + }, { + 'url': 'https://forum.ionicframework.com/t/ionic-2-jw-player-dailymotion-player/83248', + 'info_dict': { + 'id': 'xwr14q', + 'ext': 'mp4', + 'title': 'Macklemore & Ryan Lewis - Thrift Shop (feat. Wanz)', + 'age_limit': 0, + 'description': 'md5:47fbe168b5a6ddc4a205e20dd6c841b2', + 'duration': 234, + 'like_count': int, + 'tags': 'count:5', + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', + 'timestamp': 1358177670, + 'upload_date': '20130114', + 'uploader': 'Macklemore Official', + 'uploader_id': 'x19qlwr', + 'view_count': int, + }, }] _GEO_BYPASS = False _COMMON_MEDIA_FIELDS = '''description @@ -540,7 +555,7 @@ class DailymotionSearchIE(DailymotionPlaylistBaseIE): 'id': 'king of turtles', 'title': 'king of turtles', }, - 'playlist_mincount': 90, + 'playlist_mincount': 0, }] _SEARCH_QUERY = 'query SEARCH_QUERY( $query: String! $page: Int $limit: Int ) { search { videos( query: $query first: $limit page: $page ) { edges { node { xid } } } } } ' @@ -584,7 +599,7 @@ class DailymotionUserIE(DailymotionPlaylistBaseIE): 'info_dict': { 'id': 'nqtv', }, - 'playlist_mincount': 152, + 'playlist_mincount': 148, }, { 'url': 'http://www.dailymotion.com/user/UnderProject', 'info_dict': { diff --git a/yt_dlp/extractor/dangalplay.py b/yt_dlp/extractor/dangalplay.py index f7b243234a..3b0dc1f607 100644 --- a/yt_dlp/extractor/dangalplay.py +++ b/yt_dlp/extractor/dangalplay.py @@ -11,8 +11,14 @@ from ..utils.traversal import traverse_obj class DangalPlayBaseIE(InfoExtractor): _NETRC_MACHINE = 'dangalplay' + _REGION = 'IN' _OTV_USER_ID = None - _LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage' + _LOGIN_HINT = ( + 'Pass credentials as -u "token" -p "USER_ID" ' + '(where USER_ID is the value of "otv_user_id" in your browser local storage). ' + 'Your login region can be optionally suffixed to the username as @REGION ' + '(where REGION is the two-letter "region" code found in your browser local storage), ' + 'e.g.: -u "token@IN" -p "USER_ID"') _API_BASE = 'https://ottapi.dangalplay.com' _AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js _SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above @@ -20,8 +26,12 @@ class DangalPlayBaseIE(InfoExtractor): def _perform_login(self, username, password): if self._OTV_USER_ID: return - if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password): + mobj = re.fullmatch(r'token(?:@(?P[A-Z]{2}))?', username) + if not mobj or not re.fullmatch(r'[\da-f]{32}', password): raise ExtractorError(self._LOGIN_HINT, expected=True) + if region := mobj.group('region'): + self._REGION = region + self.write_debug(f'Setting login region to "{self._REGION}"') self._OTV_USER_ID = password def _real_initialize(self): @@ -52,7 +62,7 @@ class DangalPlayBaseIE(InfoExtractor): f'{self._API_BASE}/{path}', display_id, note, fatal=fatal, headers={'Accept': 'application/json'}, query={ 'auth_token': self._AUTH_TOKEN, - 'region': 'IN', + 'region': self._REGION, **query, }) @@ -106,7 +116,7 @@ class DangalPlayIE(DangalPlayBaseIE): 'catalog_id': catalog_id, 'content_id': content_id, 'category': '', - 'region': 'IN', + 'region': self._REGION, 'auth_token': self._AUTH_TOKEN, 'id': self._OTV_USER_ID, 'md5': hashlib.md5(unhashed.encode()).hexdigest(), @@ -129,11 +139,14 @@ class DangalPlayIE(DangalPlayBaseIE): except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 422: error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {} - if error_info.get('code') == '1016': + error_code = error_info.get('code') + if error_code == '1016': self.raise_login_required( f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None) - elif msg := error_info.get('message'): - raise ExtractorError(msg) + elif error_code == '4028': + self.raise_login_required( + f'Your login region is unspecified or incorrect. {self._LOGIN_HINT}', method=None) + raise ExtractorError(join_nonempty(error_code, error_info.get('message'), delim=': ')) raise m3u8_url = traverse_obj(details, ( diff --git a/yt_dlp/extractor/dbtv.py b/yt_dlp/extractor/dbtv.py index 795fbacc41..ff93c642d2 100644 --- a/yt_dlp/extractor/dbtv.py +++ b/yt_dlp/extractor/dbtv.py @@ -12,13 +12,13 @@ class DBTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', 'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f', - 'thumbnail': r're:https?://.*\.jpg', + 'thumbnail': r're:https?://.+\.jpg', 'upload_date': '20160916', 'duration': 69, 'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ', 'uploader': 'Dagbladet', }, - 'add_ie': ['Youtube'], + 'skip': 'Invalid URL', }, { 'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false', 'only_matching': True, @@ -26,6 +26,20 @@ class DBTVIE(InfoExtractor): 'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + # FIXME: Embed detection + 'url': 'https://www.dagbladet.no/nyheter/rekordstort-russisk-angrep/83325693', + 'info_dict': { + 'id': '1HW7fYry', + 'ext': 'mp4', + 'title': 'Putin taler - så skjer dette', + 'description': 'md5:3e8bacee33de861a9663d9a3fcc54e5e', + 'display_id': 'putin-taler-sa-skjer-dette', + 'thumbnail': r're:https?://cdn\.jwplayer\.com/v2/media/.+', + 'timestamp': 1751043600, + 'upload_date': '20250627', + }, + }] def _real_extract(self, url): display_id, video_id = self._match_valid_url(url).groups() diff --git a/yt_dlp/extractor/eagleplatform.py b/yt_dlp/extractor/eagleplatform.py deleted file mode 100644 index 685f8c0590..0000000000 --- a/yt_dlp/extractor/eagleplatform.py +++ /dev/null @@ -1,215 +0,0 @@ -import functools -import re - -from .common import InfoExtractor -from ..networking.exceptions import HTTPError -from ..utils import ( - ExtractorError, - int_or_none, - smuggle_url, - unsmuggle_url, - url_or_none, -) - - -class EaglePlatformIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - eagleplatform:(?P[^/]+):| - https?://(?P.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id= - ) - (?P\d+) - ''' - _EMBED_REGEX = [r']+src=(["\'])(?P(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1'] - _TESTS = [{ - # http://lenta.ru/news/2015/03/06/navalny/ - 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201', - # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used - 'info_dict': { - 'id': '227304', - 'ext': 'mp4', - 'title': 'Навальный вышел на свободу', - 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 87, - 'view_count': int, - 'age_limit': 0, - }, - }, { - # http://muz-tv.ru/play/7129/ - # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true - 'url': 'eagleplatform:media.clipyou.ru:12820', - 'md5': '358597369cf8ba56675c1df15e7af624', - 'info_dict': { - 'id': '12820', - 'ext': 'mp4', - 'title': "'O Sole Mio", - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 216, - 'view_count': int, - }, - 'skip': 'Georestricted', - }, { - # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/) - 'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306', - 'only_matching': True, - }] - - @classmethod - def _extract_embed_urls(cls, url, webpage): - add_referer = functools.partial(smuggle_url, data={'referrer': url}) - - res = tuple(super()._extract_embed_urls(url, webpage)) - if res: - return map(add_referer, res) - - PLAYER_JS_RE = r''' - ]+ - src=(?P["\'])(?:https?:)?//(?P(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs) - .+? - ''' - # "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/) - mobj = re.search( - rf'''(?xs) - {PLAYER_JS_RE} - ]+ - class=(?P["\'])eagleplayer(?P=qclass)[^>]+ - data-id=["\'](?P\d+) - ''', webpage) - if mobj is not None: - return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))] - # Generalization of "Javascript code usage", "Combined usage" and - # "Usage without attaching to DOM" embeddings (see - # http://dultonmedia.github.io/eplayer/) - mobj = re.search( - r'''(?xs) - %s - - ''' % PLAYER_JS_RE, webpage) # noqa: UP031 - if mobj is not None: - return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))] - - @staticmethod - def _handle_error(response): - status = int_or_none(response.get('status', 200)) - if status != 200: - raise ExtractorError(' '.join(response['errors']), expected=True) - - def _download_json(self, url_or_request, video_id, *args, **kwargs): - try: - response = super()._download_json( - url_or_request, video_id, *args, **kwargs) - except ExtractorError as ee: - if isinstance(ee.cause, HTTPError): - response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id) - self._handle_error(response) - raise - return response - - def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'): - return self._download_json(url_or_request, video_id, note)['data'][0] - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - - mobj = self._match_valid_url(url) - host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id') - - headers = {} - query = { - 'id': video_id, - } - - referrer = smuggled_data.get('referrer') - if referrer: - headers['Referer'] = referrer - query['referrer'] = referrer - - player_data = self._download_json( - f'http://{host}/api/player_data', video_id, - headers=headers, query=query) - - media = player_data['data']['playlist']['viewports'][0]['medialist'][0] - - title = media['title'] - description = media.get('description') - thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:') - duration = int_or_none(media.get('duration')) - view_count = int_or_none(media.get('views')) - - age_restriction = media.get('age_restriction') - age_limit = None - if age_restriction: - age_limit = 0 if age_restriction == 'allow_all' else 18 - - secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:') - - formats = [] - - m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON') - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False) - formats.extend(m3u8_formats) - - m3u8_formats_dict = {} - for f in m3u8_formats: - if f.get('height') is not None: - m3u8_formats_dict[f['height']] = f - - mp4_data = self._download_json( - # Secure mp4 URL is constructed according to Player.prototype.mp4 from - # http://lentaru.media.eagleplatform.com/player/player.js - re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8), - video_id, 'Downloading mp4 JSON', fatal=False) - if mp4_data: - for format_id, format_url in mp4_data.get('data', {}).items(): - if not url_or_none(format_url): - continue - height = int_or_none(format_id) - if height is not None and m3u8_formats_dict.get(height): - f = m3u8_formats_dict[height].copy() - f.update({ - 'format_id': f['format_id'].replace('hls', 'http'), - 'protocol': 'http', - }) - else: - f = { - 'format_id': f'http-{format_id}', - 'height': int_or_none(format_id), - } - f['url'] = format_url - formats.append(f) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'view_count': view_count, - 'age_limit': age_limit, - 'formats': formats, - } - - -class ClipYouEmbedIE(InfoExtractor): - _VALID_URL = False - - @classmethod - def _extract_embed_urls(cls, url, webpage): - mobj = re.search( - r']+src="https?://(?Pmedia\.clipyou\.ru)/index/player\?.*\brecord_id=(?P\d+).*"', webpage) - if mobj is not None: - yield smuggle_url('eagleplatform:{host}:{id}'.format(**mobj.groupdict()), {'referrer': url}) diff --git a/yt_dlp/extractor/ertgr.py b/yt_dlp/extractor/ertgr.py index 6f3f60ff43..ba68d5324d 100644 --- a/yt_dlp/extractor/ertgr.py +++ b/yt_dlp/extractor/ertgr.py @@ -64,14 +64,12 @@ class ERTFlixCodenameIE(ERTFlixBaseIE): _VALID_URL = r'ertflix:(?P[\w-]+)' _TESTS = [{ 'url': 'ertflix:monogramma-praxitelis-tzanoylinos', - 'md5': '5b9c2cd171f09126167e4082fc1dd0ef', 'info_dict': { 'id': 'monogramma-praxitelis-tzanoylinos', 'ext': 'mp4', - 'title': 'md5:ef0b439902963d56c43ac83c3f41dd0e', + 'title': 'monogramma-praxitelis-tzanoylinos', }, - }, - ] + }] def _extract_formats_and_subs(self, video_id): media_info = self._call_api(video_id, codename=video_id) @@ -131,13 +129,14 @@ class ERTFlixIE(ERTFlixBaseIE): 'duration': 3166, 'age_limit': 8, }, + 'skip': 'Invalid URL', }, { 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma', 'info_dict': { 'id': 'ser.3448', 'age_limit': 8, - 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.', - 'title': 'Μονόγραμμα', + 'title': 'Monogramma', + 'description': 'md5:e30cc640e6463da87f210a8ed10b2439', }, 'playlist_mincount': 64, }, { @@ -145,28 +144,28 @@ class ERTFlixIE(ERTFlixBaseIE): 'info_dict': { 'id': 'ser.3448', 'age_limit': 8, - 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.', - 'title': 'Μονόγραμμα', + 'title': 'Monogramma', + 'description': 'md5:e30cc640e6463da87f210a8ed10b2439', }, - 'playlist_count': 22, + 'playlist_mincount': 66, }, { 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1&season=2021%20-%202022', 'info_dict': { 'id': 'ser.3448', 'age_limit': 8, - 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.', - 'title': 'Μονόγραμμα', + 'title': 'Monogramma', + 'description': 'md5:e30cc640e6463da87f210a8ed10b2439', }, - 'playlist_mincount': 36, + 'playlist_mincount': 25, }, { 'url': 'https://www.ertflix.gr/series/ser.164991-to-diktuo-1?season=1-9', 'info_dict': { 'id': 'ser.164991', 'age_limit': 8, - 'description': 'Η πρώτη ελληνική εκπομπή με θεματολογία αποκλειστικά γύρω από το ίντερνετ.', - 'title': 'Το δίκτυο', + 'title': 'The Network', + 'description': 'The first Greek show featuring topics exclusively around the internet.', }, - 'playlist_mincount': 9, + 'playlist_mincount': 0, }, { 'url': 'https://www.ertflix.gr/en/vod/vod.127652-ta-kalytera-mas-chronia-ep1-mia-volta-sto-feggari', 'only_matching': True, @@ -282,6 +281,16 @@ class ERTWebtvEmbedIE(InfoExtractor): 'ext': 'mp4', 'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg', }, + 'skip': 'Invalid URL', + }] + _WEBPAGE_TESTS = [{ + 'url': 'https://www.ertnews.gr/video/manolis-goyalles-o-anthropos-piso-apo-ti-diadiktyaki-vasilopita/', + 'info_dict': { + 'id': '2022/tv/news-themata-ianouarios/20220114-apotis6-gouales-pita.mp4', + 'ext': 'mp4', + 'title': 'VOD - 2022/tv/news-themata-ianouarios/20220114-apotis6-gouales-pita.mp4', + 'thumbnail': r're:https?://www\.ert\.gr/themata/photos/.+\.jpg', + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 24ecb03505..2c35013faa 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -81,13 +81,14 @@ class FacebookIE(InfoExtractor): 'description': 'md5:34675bda53336b1d16400265c2bb9b3b', 'uploader': 'RADIO KICKS FM', 'upload_date': '20230818', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'timestamp': 1692346159, - 'thumbnail': r're:^https?://.*', 'uploader_id': '100063551323670', 'duration': 3133.583, 'view_count': int, 'concurrent_view_count': 0, }, + 'expected_warnings': ['Cannot parse data'], }, { 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf', 'md5': '6a40d33c0eccbb1af76cf0485a052659', @@ -106,17 +107,18 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '274175099429670', 'ext': 'mp4', - 'title': 'Asif', + 'title': '119 reactions · 1.4K shares | Asif Nawab Butt on Reels', 'description': '', 'uploader': 'Asif Nawab Butt', 'upload_date': '20140506', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'timestamp': 1399398998, - 'thumbnail': r're:^https?://.*', - 'uploader_id': 'pfbid05AzrFTXgY37tqwaSgbFTTEpCLBjjEJHkigogwGiRPtKEpAsJYJpzE94H1RxYXWEtl', + 'uploader_id': 'pfbid028xue38TBXRyNbiqBSV2LFs3QK3yopvKjupbqFoL6U9SKbx4p2SMdJjQSBvnjsHGWl', 'duration': 131.03, 'concurrent_view_count': int, 'view_count': int, }, + 'expected_warnings': ['Cannot parse data'], }, { 'note': 'Video with DASH manifest', 'url': 'https://www.facebook.com/video.php?v=957955867617029', @@ -158,7 +160,7 @@ class FacebookIE(InfoExtractor): 'id': '10153664894881749', 'ext': 'mp4', 'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'timestamp': 1456259628, 'upload_date': '20160223', 'uploader': 'Barack Obama', @@ -168,7 +170,7 @@ class FacebookIE(InfoExtractor): # have 1080P, but only up to 720p in swf params # data.video.story.attachments[].media 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/', - 'md5': '1659aa21fb3dd1585874f668e81a72c8', + 'md5': '70b82ebf5f0e9b91b2a49d3db3563611', 'info_dict': { 'id': '10155529876156509', 'ext': 'mp4', @@ -177,7 +179,7 @@ class FacebookIE(InfoExtractor): 'timestamp': 1477818095, 'upload_date': '20161030', 'uploader': 'CNN', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'view_count': int, 'uploader_id': '100059479812265', 'concurrent_view_count': int, @@ -198,13 +200,11 @@ class FacebookIE(InfoExtractor): 'uploader': 'Yaroslav Korpan', 'uploader_id': 'pfbid06AScABAWcW91qpiuGrLt99Ef9tvwHoXP6t8KeFYEqkSfreMtfa9nTveh8b2ZEVSWl', 'concurrent_view_count': int, - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'view_count': int, 'duration': 11736.446, }, - 'params': { - 'skip_download': True, - }, + 'skip': 'Invalid URL', }, { # FIXME: Cannot parse data error 'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471', @@ -215,7 +215,7 @@ class FacebookIE(InfoExtractor): 'timestamp': 1477305000, 'upload_date': '20161024', 'uploader': 'La Guía Del Varón', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', }, 'skip': 'Requires logging in', }, { @@ -244,9 +244,10 @@ class FacebookIE(InfoExtractor): 'upload_date': '20171124', 'uploader': 'Vickie Gentry', 'uploader_id': 'pfbid0FkkycT95ySNNyfCw4Cho6u5G7WbbZEcxT496Hq8rtx1K3LcTCATpR3wnyYhmyGC5l', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'duration': 148.224, }, + 'skip': 'Invalid URL', }, { # data.node.comet_sections.content.story.attachments[].styles.attachment.media 'url': 'https://www.facebook.com/attn/posts/pfbid0j1Czf2gGDVqeQ8KiMLFm3pWN8GxsQmeRrVhimWDzMuKQoR8r4b1knNsejELmUgyhl', @@ -260,7 +261,7 @@ class FacebookIE(InfoExtractor): 'duration': 132.675, 'uploader_id': '100064451419378', 'view_count': int, - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'timestamp': 1701975646, }, }, { @@ -271,9 +272,9 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': 'Lela Evans', 'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'uploader': 'Lela Evans', - 'uploader_id': 'pfbid0swT2y7t6TAsZVBvcyeYPdhTMefGaS26mzUwML3vd1ma6ndGZKxsyS4Ssu3jitZLXl', + 'uploader_id': 'pfbid02wjMpknobSMnyynK3TNKN4Ww1StcpAKXgowqTyge3bz7LwHZMQ68uiXzzbu7xeryBl', 'upload_date': '20231228', 'timestamp': 1703804085, 'duration': 394.347, @@ -326,28 +327,27 @@ class FacebookIE(InfoExtractor): 'uploader_id': '100066514874195', 'duration': 4524.001, 'view_count': int, - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'concurrent_view_count': int, }, - 'params': { - 'skip_download': True, - }, + 'params': {'skip_download': True}, }, { # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media 'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/', 'info_dict': { 'id': '106560053808006', 'ext': 'mp4', - 'title': 'Josef', - 'thumbnail': r're:^https?://.*', + 'title': 'Josef Novak on Reels', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'concurrent_view_count': int, - 'uploader_id': 'pfbid02gpfwRM2XvdEJfsERupwQiNmBiDArc38RMRYZnap372q6Vs7MtFTVy72mmFWpJBTKl', + 'uploader_id': 'pfbid0cjYJYXpePWqhZ9DgpB6gKXrN2q3obwducdKm4wT7K5nkhbfKg5cneocYbsdaji7fl', 'timestamp': 1549275572, 'duration': 3.283, 'uploader': 'Josef Novak', 'description': '', 'upload_date': '20190204', }, + 'expected_warnings': ['Cannot parse data'], }, { # data.video.story.attachments[].media 'url': 'https://www.facebook.com/watch/?v=647537299265662', @@ -406,7 +406,7 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': 'ANALISI IN CAMPO OSCURO " Coaguli nel sangue dei vaccinati"', 'description': 'Other event by Comitato Liberi Pensatori on Tuesday, October 18 2022', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'uploader': 'Comitato Liberi Pensatori', 'uploader_id': '100065709540881', }, @@ -414,6 +414,56 @@ class FacebookIE(InfoExtractor): 'url': 'https://www.facebook.com/groups/1513990329015294/posts/d41d8cd9/2013209885760000/?app=fbl', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + #