From 943083edcd3df45aaa597a6967bc6c95b720f54c Mon Sep 17 00:00:00 2001 From: Sipherdrakon <64430430+Sipherdrakon@users.noreply.github.com> Date: Sun, 1 Jun 2025 13:26:33 -0400 Subject: [PATCH 001/182] [ie/adobepass] Fix Philo MSO authentication (#13335) Closes #2603 Authored by: Sipherdrakon --- yt_dlp/extractor/adobepass.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 91c40b32e..8c2d9d934 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -1574,18 +1574,29 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en post_form(mvpd_confirm_page_res, 'Confirming Login') elif mso_id == 'Philo': # Philo has very unique authentication method - self._download_webpage( - 'https://idp.philo.com/auth/init/login_code', video_id, 'Requesting auth code', data=urlencode_postdata({ + self._request_webpage( + 'https://idp.philo.com/auth/init/login_code', video_id, + 'Requesting Philo auth code', data=json.dumps({ 'ident': username, 'device': 'web', 'send_confirm_link': False, 'send_token': True, - })) + 'device_ident': f'web-{uuid.uuid4().hex}', + 'include_login_link': True, + }).encode(), headers={ + 'Content-Type': 'application/json', + 'Accept': 'application/json', + }) + philo_code = getpass.getpass('Type auth code you have received [Return]: ') - self._download_webpage( - 'https://idp.philo.com/auth/update/login_code', video_id, 'Submitting token', data=urlencode_postdata({ - 'token': philo_code, - })) + self._request_webpage( + 'https://idp.philo.com/auth/update/login_code', video_id, + 'Submitting token', data=json.dumps({'token': philo_code}).encode(), + headers={ + 'Content-Type': 'application/json', + 'Accept': 'application/json', + }) + mvpd_confirm_page_res = self._download_webpage_handle('https://idp.philo.com/idp/submit', video_id, 'Confirming Philo Login') post_form(mvpd_confirm_page_res, 'Confirming Login') elif mso_id == 'Verizon': From 85c8a405e3651dc041b758f4744d4fb3c4c55e01 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 1 Jun 2025 18:09:47 -0500 Subject: [PATCH 002/182] [ie] Improve JSON LD thumbnails extraction (#13368) Authored by: bashonly, doe1080 Co-authored-by: doe1080 <98906116+doe1080@users.noreply.github.com> --- test/test_InfoExtractor.py | 14 ++++++++++++++ yt_dlp/extractor/common.py | 6 +++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index c6ff6209a..bc89b2955 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -314,6 +314,20 @@ class TestInfoExtractor(unittest.TestCase): }, {}, ), + ( + # test thumbnail_url key without URL scheme + r''' +''', + { + 'thumbnails': [{'url': 'https://www.nobelprize.org/images/12693-landscape-medium-gallery.jpg'}], + }, + {}, + ), ] for html, expected_dict, search_json_ld_kwargs in _TESTS: expect_dict( diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index d5607296d..1174bd4f5 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1675,9 +1675,9 @@ class InfoExtractor: 'ext': mimetype2ext(e.get('encodingFormat')), 'title': unescapeHTML(e.get('name')), 'description': unescapeHTML(e.get('description')), - 'thumbnails': [{'url': unescapeHTML(url)} - for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL')) - if url_or_none(url)], + 'thumbnails': traverse_obj(e, (('thumbnailUrl', 'thumbnailURL', 'thumbnail_url'), (None, ...), { + 'url': ({str}, {unescapeHTML}, {self._proto_relative_url}, {url_or_none}), + })), 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('uploadDate')), # author can be an instance of 'Organization' or 'Person' types. From 148a1eb4c59e127965396c7a6e6acf1979de459e Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 1 Jun 2025 18:18:24 -0500 Subject: [PATCH 003/182] [ie/odnoklassniki] Detect and raise when login is required (#13361) Closes #13360 Authored by: bashonly --- yt_dlp/extractor/odnoklassniki.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/odnoklassniki.py b/yt_dlp/extractor/odnoklassniki.py index d27d1c3f0..18eba42e6 100644 --- a/yt_dlp/extractor/odnoklassniki.py +++ b/yt_dlp/extractor/odnoklassniki.py @@ -273,6 +273,8 @@ class OdnoklassnikiIE(InfoExtractor): return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'})) elif error: raise ExtractorError(error, expected=True) + elif '>Access to this video is restricted' in webpage: + self.raise_login_required() player = self._parse_json( unescapeHTML(self._search_regex( @@ -429,7 +431,7 @@ class OdnoklassnikiIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - f'http://m.ok.ru/video/{video_id}', video_id, + f'https://m.ok.ru/video/{video_id}', video_id, note='Downloading mobile webpage') error = self._search_regex( From c723c4e5e78263df178dbe69844a3d05f3ef9e35 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 1 Jun 2025 18:20:29 -0500 Subject: [PATCH 004/182] [ie/vimeo] Extract subtitles from player subdomain (#13350) Closes #12198 Authored by: bashonly --- yt_dlp/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 09497b699..b268fad56 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -236,7 +236,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): for tt in (request.get('text_tracks') or []): subtitles.setdefault(tt['lang'], []).append({ 'ext': 'vtt', - 'url': urljoin('https://vimeo.com', tt['url']), + 'url': urljoin('https://player.vimeo.com/', tt['url']), }) thumbnails = [] From e1b6062f8c4a3fa33c65269d48d09ec78de765a2 Mon Sep 17 00:00:00 2001 From: barsnick Date: Tue, 3 Jun 2025 04:29:03 +0200 Subject: [PATCH 005/182] [ie/svt:play] Fix extractor (#13329) Closes #13312 Authored by: barsnick, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/svt.py | 134 +++++++++++--------------------- 2 files changed, 44 insertions(+), 91 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index b0c52e0fc..34c98b537 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2017,7 +2017,6 @@ from .sverigesradio import ( SverigesRadioPublicationIE, ) from .svt import ( - SVTIE, SVTPageIE, SVTPlayIE, SVTSeriesIE, diff --git a/yt_dlp/extractor/svt.py b/yt_dlp/extractor/svt.py index 6a72f8d42..a48d7858d 100644 --- a/yt_dlp/extractor/svt.py +++ b/yt_dlp/extractor/svt.py @@ -6,10 +6,13 @@ from ..utils import ( determine_ext, dict_get, int_or_none, - traverse_obj, try_get, unified_timestamp, ) +from ..utils.traversal import ( + require, + traverse_obj, +) class SVTBaseIE(InfoExtractor): @@ -97,40 +100,8 @@ class SVTBaseIE(InfoExtractor): } -class SVTIE(SVTBaseIE): - _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P\d+)&.*?\barticleId=(?P\d+)' - _EMBED_REGEX = [rf'(?: