[ie/nfl] Fix extractors (#11409)

Authored by: bashonly
10 months ago · 838f4385de
parent d135823137
commit 838f4385de
2 changed files with 88 additions and 110 deletions
--- a/yt_dlp/extractor/anvato.py
+++ b/yt_dlp/extractor/anvato.py
@ -33,24 +33,6 @@ class AnvatoIE(InfoExtractor):
    _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'  # from anvplayer.min.js
    _TESTS = [{
        # from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
        'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
        'md5': '921919dab3cd0b849ff3d624831ae3e2',
        'info_dict': {
            'id': '899441',
            'ext': 'mp4',
            'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
            'description': 'md5:85e05a3cc163f8c344340f220521136d',
            'upload_date': '20201215',
            'timestamp': 1608009755,
            'thumbnail': r're:^https?://.*\.jpg',
            'uploader': 'NFL',
            'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
                     'Player Highlights', 'Cleveland Browns', 'league'],
            'duration': 157,
            'categories': ['Entertainment', 'Game', 'Highlights'],
        },
    }, {
        # from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
        'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
        'md5': '837718bcfb3a7778d022f857f7a9b19e',
@ -241,31 +223,6 @@ class AnvatoIE(InfoExtractor):
        'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
    }
    def _generate_nfl_token(self, anvack, mcp_id):
        reroute = self._download_json(
            'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
            headers={'X-Domain-Id': 100}, note='Fetching token info')
        token_type = reroute.get('token_type') or 'Bearer'
        auth_token = f'{token_type} {reroute["access_token"]}'
        response = self._download_json(
            'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
                'query': '''{
  viewer {
    mediaToken(anvack: "%s", id: %s) {
      token
    }
  }
 }''' % (anvack, mcp_id),  # noqa: UP031
            }).encode(), headers={
                'Authorization': auth_token,
                'Content-Type': 'application/json',
            }, note='Fetching NFL API token')
        return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
    _TOKEN_GENERATORS = {
        'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
    }
    def _server_time(self, access_key, video_id):
        return int_or_none(traverse_obj(self._download_json(
            f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
@ -290,8 +247,6 @@ class AnvatoIE(InfoExtractor):
        }
        if extracted_token is not None:
            api['anvstk2'] = extracted_token
        elif self._TOKEN_GENERATORS.get(access_key) is not None:
            api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
        elif self._ANVACK_TABLE.get(access_key) is not None:
            api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
        else:
--- a/yt_dlp/extractor/nfl.py
+++ b/yt_dlp/extractor/nfl.py
@ -11,9 +11,12 @@ from ..utils import (
    clean_html,
    determine_ext,
    get_element_by_class,
-    traverse_obj,
+    int_or_none,
    make_archive_id,
    url_or_none,
    urlencode_postdata,
 )
 from ..utils.traversal import traverse_obj
 class NFLBaseIE(InfoExtractor):
@ -75,22 +78,15 @@ class NFLBaseIE(InfoExtractor):
            'osVersion': '10.0',
        }, separators=(',', ':')).encode()).decode(),
        'networkType': 'other',
-        'nflClaimGroupsToAdd': [],
+        'peacockUUID': 'undefined',
        'nflClaimGroupsToRemove': [],
    }
    _ACCOUNT_INFO = {}
-    _API_KEY = None
+    _API_KEY = '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
    _TOKEN = None
    _TOKEN_EXPIRY = 0
-    def _get_account_info(self, url, slug):
+    def _get_account_info(self):
        if not self._API_KEY:
            webpage = self._download_webpage(url, slug, fatal=False) or ''
            self._API_KEY = self._search_regex(
                r'window\.gigyaApiKey\s*=\s*["\'](\w+)["\'];', webpage, 'API key',
                fatal=False) or '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
        cookies = self._get_cookies('https://auth-id.nfl.com/')
        login_token = traverse_obj(cookies, (
            (f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False)
@ -103,7 +99,7 @@ class NFLBaseIE(InfoExtractor):
                'or else try using --cookies-from-browser instead', expected=True)
        account = self._download_json(
-            'https://auth-id.nfl.com/accounts.getAccountInfo', slug,
+            'https://auth-id.nfl.com/accounts.getAccountInfo', None,
            note='Downloading account info', data=urlencode_postdata({
                'include': 'profile,data',
                'lang': 'en',
@ -111,7 +107,7 @@ class NFLBaseIE(InfoExtractor):
                'sdk': 'js_latest',
                'login_token': login_token,
                'authMode': 'cookie',
-                'pageURL': url,
+                'pageURL': 'https://www.nfl.com/',
                'sdkBuild': traverse_obj(cookies, (
                    'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'),
                'format': 'json',
@ -126,55 +122,78 @@ class NFLBaseIE(InfoExtractor):
        if len(self._ACCOUNT_INFO) != 3:
            raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True)
-    def _get_auth_token(self, url, slug):
+    def _get_auth_token(self):
        if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30):
            return
        if not self._ACCOUNT_INFO:
            self._get_account_info(url, slug)
        token = self._download_json(
            'https://api.nfl.com/identity/v3/token%s' % (
                '/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''),
-            slug, headers={'Content-Type': 'application/json'}, note='Downloading access token',
+            None, headers={'Content-Type': 'application/json'}, note='Downloading access token',
            data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode())
        self._TOKEN = token['accessToken']
        self._TOKEN_EXPIRY = token['expiresIn']
        self._ACCOUNT_INFO['refreshToken'] = token['refreshToken']
    def _extract_video(self, mcp_id, is_live=False):
        self._get_auth_token()
        data = self._download_json(
            f'https://api.nfl.com/play/v1/asset/{mcp_id}', mcp_id, headers={
                'Authorization': f'Bearer {self._TOKEN}',
                'Accept': 'application/json',
                'Content-Type': 'application/json',
            }, data=json.dumps({'init': True, 'live': is_live}, separators=(',', ':')).encode())
        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
            data['accessUrl'], mcp_id, 'mp4', m3u8_id='hls')
        return {
            'id': mcp_id,
            'formats': formats,
            'subtitles': subtitles,
            'is_live': is_live,
            '_old_archive_ids': [make_archive_id(AnvatoIE, mcp_id)],
            **traverse_obj(data, ('metadata', {
                'title': ('event', ('def_title', 'friendlyName'), {str}, any),
                'description': ('event', 'def_description', {str}),
                'duration': ('event', 'duration', {int_or_none}),
                'thumbnails': ('thumbnails', ..., 'url', {'url': {url_or_none}}),
            })),
        }
    def _parse_video_config(self, video_config, display_id):
        video_config = self._parse_json(video_config, display_id)
        is_live = traverse_obj(video_config, ('live', {bool})) or False
        item = video_config['playlist'][0]
-        mcp_id = item.get('mcpID')
+        if mcp_id := item.get('mcpID'):
-        if mcp_id:
+            return self._extract_video(mcp_id, is_live=is_live)
-            info = self.url_result(f'{self._ANVATO_PREFIX}{mcp_id}', AnvatoIE, mcp_id)
+
        info = {'id': item.get('id') or item['entityId']}
        item_url = item['url']
        ext = determine_ext(item_url)
        if ext == 'm3u8':
            info['formats'] = self._extract_m3u8_formats(item_url, info['id'], 'mp4')
        else:
-            media_id = item.get('id') or item['entityId']
+            info['url'] = item_url
-            title = item.get('title')
+            if item.get('audio') is True:
-            item_url = item['url']
+                info['vcodec'] = 'none'
-            info = {'id': media_id}
+
-            ext = determine_ext(item_url)
+        thumbnails = None
-            if ext == 'm3u8':
+        if image_url := traverse_obj(item, 'imageSrc', 'posterImage', expected_type=url_or_none):
-                info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4')
+            thumbnails = [{
-            else:
+                'url': image_url,
-                info['url'] = item_url
+                'ext': determine_ext(image_url, 'jpg'),
-                if item.get('audio') is True:
+            }]
-                    info['vcodec'] = 'none'
+
-            is_live = video_config.get('live') is True
+        info.update({
-            thumbnails = None
+            **traverse_obj(item, {
-            image_url = item.get(item.get('imageSrc')) or item.get(item.get('posterImage'))
+                'title': ('title', {str}),
-            if image_url:
+                'description': ('description', {clean_html}),
-                thumbnails = [{
+            }),
-                    'url': image_url,
+            'is_live': is_live,
-                    'ext': determine_ext(image_url, 'jpg'),
+            'thumbnails': thumbnails,
-                }]
+        })
            info.update({
                'title': title,
                'is_live': is_live,
                'description': clean_html(item.get('description')),
                'thumbnails': thumbnails,
            })
        return info
@ -188,24 +207,20 @@ class NFLIE(NFLBaseIE):
            'ext': 'mp4',
            'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14",
            'description': 'md5:85e05a3cc163f8c344340f220521136d',
-            'upload_date': '20201215',
+            'thumbnail': r're:https?://.+\.jpg',
            'timestamp': 1608009755,
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'NFL',
            'tags': 'count:6',
            'duration': 157,
-            'categories': 'count:3',
+            '_old_archive_ids': ['anvato 899441'],
        },
    }, {
        'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
-        'md5': '6886b32c24b463038c760ceb55a34566',
+        'md5': '92a517f05bd3eb50fe50244bc621aec8',
        'info_dict': {
-            'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99',
+            'id': '8b7c3625-a461-4751-8db4-85f536f2bbd0',
            'ext': 'mp3',
            'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
            'description': 'md5:12ada8ee70e6762658c30e223e095075',
            'thumbnail': 'https://static.clubs.nfl.com/image/private/t_editorial_landscape_12_desktop/v1571153441/chiefs/rfljejccnyhhkpkfq855',
        },
        'skip': 'HTTP Error 404: Not Found',
    }, {
        'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
        'only_matching': True,
@ -236,13 +251,16 @@ class NFLArticleIE(NFLBaseIE):
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
-        entries = []
+
-        for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
+        def entries():
-            entries.append(self._parse_video_config(video_config, display_id))
+            for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
                yield self._parse_video_config(video_config, display_id)
        title = clean_html(get_element_by_class(
            'nfl-c-article__title', webpage)) or self._html_search_meta(
            ['og:title', 'twitter:title'], webpage)
-        return self.playlist_result(entries, display_id, title)
+
        return self.playlist_result(entries(), display_id, title)
 class NFLPlusReplayIE(NFLBaseIE):
@ -307,6 +325,9 @@ class NFLPlusReplayIE(NFLBaseIE):
        'all_22': 'All-22',
    }
    def _real_initialize(self):
        self._get_account_info()
    def _real_extract(self, url):
        slug, video_id = self._match_valid_url(url).group('slug', 'id')
        requested_types = self._configuration_arg('type', ['all'])
@ -315,7 +336,7 @@ class NFLPlusReplayIE(NFLBaseIE):
        requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types))
        if not video_id:
-            self._get_auth_token(url, slug)
+            self._get_auth_token()
            headers = {'Authorization': f'Bearer {self._TOKEN}'}
            game_id = self._download_json(
                f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug,
@ -328,14 +349,13 @@ class NFLPlusReplayIE(NFLBaseIE):
                    'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False)
        if video_id:
-            return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
+            return self._extract_video(video_id)
        def entries():
            for replay in traverse_obj(
                replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types),
            ):
-                video_id = replay['mcpPlaybackId']
+                yield self._extract_video(replay['mcpPlaybackId'])
                yield self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
        return self.playlist_result(entries(), slug)
@ -362,12 +382,15 @@ class NFLPlusEpisodeIE(NFLBaseIE):
        'params': {'skip_download': 'm3u8'},
    }]
    def _real_initialize(self):
        self._get_account_info()
    def _real_extract(self, url):
        slug = self._match_id(url)
-        self._get_auth_token(url, slug)
+        self._get_auth_token()
        video_id = self._download_json(
            f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={
                'Authorization': f'Bearer {self._TOKEN}',
            })['mcpPlaybackId']
-        return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
+        return self._extract_video(video_id)