[ie/telecinco] Fix extractor (#13379)

Closes #13378 Authored by: bashonly
4 months ago · 03dba2012d
parent 5d96527be8
commit 03dba2012d
2 changed files with 13 additions and 14 deletions
--- a/yt_dlp/extractor/mitele.py
+++ b/yt_dlp/extractor/mitele.py
@ -1,7 +1,5 @@
 from .telecinco import TelecincoBaseIE
-from ..networking.exceptions import HTTPError
 from ..utils import (
-    ExtractorError,
    int_or_none,
    parse_iso8601,
 )
@ -81,17 +79,7 @@ class MiTeleIE(TelecincoBaseIE):

    def _real_extract(self, url):
        display_id = self._match_id(url)
-
-        try:  # yt-dlp's default user-agents are too old and blocked by akamai
-            webpage = self._download_webpage(url, display_id, headers={
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
-            })
-        except ExtractorError as e:
-            if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
-                raise
-            # Retry with impersonation if hardcoded UA is insufficient to bypass akamai
-            webpage = self._download_webpage(url, display_id, impersonate=True)
-
+        webpage = self._download_akamai_webpage(url, display_id)
        pre_player = self._search_json(
            r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=',
            webpage, 'Pre Player', display_id)['prePlayer']
--- a/yt_dlp/extractor/telecinco.py
+++ b/yt_dlp/extractor/telecinco.py
@ -63,6 +63,17 @@ class TelecincoBaseIE(InfoExtractor):
            'http_headers': headers,
        }

+    def _download_akamai_webpage(self, url, display_id):
+        try:  # yt-dlp's default user-agents are too old and blocked by akamai
+            return self._download_webpage(url, display_id, headers={
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
+            })
+        except ExtractorError as e:
+            if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
+                raise
+            # Retry with impersonation if hardcoded UA is insufficient to bypass akamai
+            return self._download_webpage(url, display_id, impersonate=True)
+

 class TelecincoIE(TelecincoBaseIE):
    IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
@ -140,7 +151,7 @@ class TelecincoIE(TelecincoBaseIE):

    def _real_extract(self, url):
        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
+        webpage = self._download_akamai_webpage(url, display_id)
        article = self._search_json(
            r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
            webpage, 'article', display_id)['article']