From 2ecc5fafbcf935183f469f7a248c9198a8634d00 Mon Sep 17 00:00:00 2001
From: ArabCoders <admin@arabcoders.org>
Date: Thu, 20 Mar 2025 20:26:09 +0300
Subject: [PATCH] [Tver] handle multi-streams from streaks.jp, and move
 broadcast label to it own method to revise later.

---
 yt_dlp/extractor/tver.py | 356 +++++++++++++++++++++++++--------------
 1 file changed, 231 insertions(+), 125 deletions(-)
diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py
index 104314f814..9ddc152a65 100644
--- a/yt_dlp/extractor/tver.py
+++ b/yt_dlp/extractor/tver.py
@@ -1,8 +1,9 @@
 import datetime
+import json
 import re
 
-from .common import InfoExtractor
-from ..utils import (
+from yt_dlp.extractor.common import InfoExtractor
+from yt_dlp.utils import (
     ExtractorError,
     float_or_none,
     join_nonempty,
@@ -16,131 +17,193 @@ from ..utils import (
 
 class TVerIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature)/)+(?P<id>[a-zA-Z0-9]+)'
-    _TESTS = [{
-        'skip': 'videos are only available for 7 days',
-        'url': 'https://tver.jp/episodes/ep83nf3w4p',
-        'info_dict': {
-            'title': '家事ヤロウ!!! 売り場席巻のチーズSP＆財前直見×森泉親子の脱東京暮らし密着！',
-            'description': 'md5:dc2c06b6acc23f1e7c730c513737719b',
-            'series': '家事ヤロウ!!!',
-            'episode': '売り場席巻のチーズSP＆財前直見×森泉親子の脱東京暮らし密着！',
-            'alt_title': '売り場席巻のチーズSP＆財前直見×森泉親子の脱東京暮らし密着！',
-            'channel': 'テレビ朝日',
-            'id': 'ep83nf3w4p',
-            'ext': 'mp4',
+    _TESTS = [
+        {
+            'skip': 'videos are only available for 7 days',
+            'url': 'https://tver.jp/episodes/ep83nf3w4p',
+            'info_dict': {
+                'title': '家事ヤロウ!!! 売り場席巻のチーズSP＆財前直見×森泉親子の脱東京暮らし密着！',
+                'description': 'md5:dc2c06b6acc23f1e7c730c513737719b',
+                'series': '家事ヤロウ!!!',
+                'episode': '売り場席巻のチーズSP＆財前直見×森泉親子の脱東京暮らし密着！',
+                'alt_title': '売り場席巻のチーズSP＆財前直見×森泉親子の脱東京暮らし密着！',
+                'channel': 'テレビ朝日',
+                'id': 'ep83nf3w4p',
+                'ext': 'mp4',
+            },
         },
-    }, {
-        'url': 'https://tver.jp/corner/f0103888',
-        'only_matching': True,
-    }, {
-        'url': 'https://tver.jp/lp/f0033031',
-        'only_matching': True,
-    }, {
-        'url': 'https://tver.jp/series/srtxft431v',
-        'info_dict': {
-            'id': 'srtxft431v',
-            'title': '名探偵コナン',
+        {
+            'url': 'https://tver.jp/corner/f0103888',
+            'only_matching': True,
         },
-        'playlist': [
-            {
-                'md5': '779ffd97493ed59b0a6277ea726b389e',
-                'info_dict': {
-                    'id': 'ref:conan-1137-241005',
-                    'ext': 'mp4',
-                    'title': '名探偵コナン #1137「行列店、味変の秘密」',
-                    'uploader_id': '5330942432001',
-                    'tags': [],
-                    'channel': '読売テレビ',
-                    'series': '名探偵コナン',
-                    'description': 'md5:601fccc1d2430d942a2c8068c4b33eb5',
-                    'episode': '#1137「行列店、味変の秘密」',
-                    'duration': 1469.077,
-                    'timestamp': 1728030405,
-                    'upload_date': '20241004',
-                    'alt_title': '名探偵コナン #1137「行列店、味変の秘密」 読売テレビ 10月5日(土)放送分',
-                    'thumbnail': r're:https://.+\.jpg',
+        {
+            'url': 'https://tver.jp/lp/f0033031',
+            'only_matching': True,
+        },
+        {
+            'url': 'https://tver.jp/series/srtxft431v',
+            'info_dict': {
+                'id': 'srtxft431v',
+                'title': '名探偵コナン',
+            },
+            'playlist': [
+                {
+                    'md5': '779ffd97493ed59b0a6277ea726b389e',
+                    'info_dict': {
+                        'id': 'ref:conan-1137-241005',
+                        'ext': 'mp4',
+                        'title': '名探偵コナン #1137「行列店、味変の秘密」',
+                        'uploader_id': '5330942432001',
+                        'tags': [],
+                        'channel': '読売テレビ',
+                        'series': '名探偵コナン',
+                        'description': 'md5:601fccc1d2430d942a2c8068c4b33eb5',
+                        'episode': '#1137「行列店、味変の秘密」',
+                        'duration': 1469.077,
+                        'timestamp': 1728030405,
+                        'upload_date': '20241004',
+                        'alt_title': '名探偵コナン #1137「行列店、味変の秘密」 読売テレビ 10月5日(土)放送分',
+                        'thumbnail': r're:https://.+\.jpg',
+                    },
                 },
-            }],
-    }, {
-        'url': 'https://tver.jp/series/sru35hwdd2',
-        'info_dict': {
-            'id': 'sru35hwdd2',
-            'title': '神回だけ見せます！',
+            ],
+        },
+        {
+            'url': 'https://tver.jp/series/sru35hwdd2',
+            'info_dict': {
+                'id': 'sru35hwdd2',
+                'title': '神回だけ見せます！',
+            },
+            'playlist_count': 11,
+        },
+        {
+            'url': 'https://tver.jp/series/srkq2shp9d',
+            'only_matching': True,
         },
-        'playlist_count': 11,
-    }, {
-        'url': 'https://tver.jp/series/srkq2shp9d',
-        'only_matching': True,
-    }]
+    ]
+
     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
-    STREAKS_URL_TEMPLATE = 'https://playback.api.streaks.jp/v1/projects/%s/medias/%s?ati=%s'
 
-    _HEADERS = {'x-tver-platform-type': 'web', 'origin': 'https://tver.jp/', 'referer': 'https://tver.jp/'}
+    STREAKS_URL_TEMPLATE = 'https://playback.api.streaks.jp/v1/projects/%s/medias/%s'
+
+    _HEADERS = {
+        'x-tver-platform-type': 'web',
+        'origin': 'https://tver.jp/',
+        'referer': 'https://tver.jp/',
+    }
     _PLATFORM_QUERY = {}
 
     def _real_initialize(self):
         session_info = self._download_json(
             'https://platform-api.tver.jp/v2/api/platform_users/browser/create',
-            None, 'Creating session', data=b'device_type=pc')
-        self._PLATFORM_QUERY = traverse_obj(session_info, ('result', {
-            'platform_uid': 'platform_uid',
-            'platform_token': 'platform_token',
-        }))
+            None,
+            'Creating session',
+            data=b'device_type=pc',
+        )
+        self._PLATFORM_QUERY = traverse_obj(
+            session_info,
+            (
+                'result',
+                {
+                    'platform_uid': 'platform_uid',
+                    'platform_token': 'platform_token',
+                },
+            ),
+        )
 
     def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None):
         return self._download_json(
-            f'https://platform-api.tver.jp/service/api/{path}', video_id, note,
-            fatal=fatal, headers=self._HEADERS, query={
+            f'https://platform-api.tver.jp/service/api/{path}',
+            video_id,
+            note,
+            fatal=fatal,
+            headers=self._HEADERS,
+            query={
                 **self._PLATFORM_QUERY,
                 **(query or {}),
-            })
+            },
+        )
 
     def _yield_episode_ids_for_series(self, series_id):
         seasons_info = self._download_json(
             f'https://service-api.tver.jp/api/v1/callSeriesSeasons/{series_id}',
-            series_id, 'Downloading seasons info', headers=self._HEADERS)
+            series_id,
+            'Downloading seasons info',
+            headers=self._HEADERS,
+        )
         for season_id in traverse_obj(
-                seasons_info, ('result', 'contents', lambda _, v: v['type'] == 'season', 'content', 'id', {str})):
+            seasons_info,
+            ('result', 'contents', lambda _, v: v['type'] == 'season', 'content', 'id', {str}),
+        ):
             episodes_info = self._call_platform_api(
-                f'v1/callSeasonEpisodes/{season_id}', series_id, f'Downloading season {season_id} episodes info')
-            yield from traverse_obj(episodes_info, (
-                'result', 'contents', lambda _, v: v['type'] == 'episode', 'content', 'id', {str}))
+                f'v1/callSeasonEpisodes/{season_id}',
+                series_id,
+                f'Downloading season {season_id} episodes info',
+            )
+            yield from traverse_obj(
+                episodes_info,
+                (
+                    'result',
+                    'contents',
+                    lambda _, v: v['type'] == 'episode',
+                    'content',
+                    'id',
+                    {str},
+                ),
+            )
 
     def _real_extract(self, url):
         video_id, video_type = self._match_valid_url(url).group('id', 'type')
 
         if video_type == 'series':
-            series_info = self._call_platform_api(
-                f'v2/callSeries/{video_id}', video_id, 'Downloading series info')
+            series_info = self._call_platform_api(f'v2/callSeries/{video_id}', video_id, 'Downloading series info')
             return self.playlist_from_matches(
-                self._yield_episode_ids_for_series(video_id), video_id,
+                self._yield_episode_ids_for_series(video_id),
+                video_id,
                 traverse_obj(series_info, ('result', 'content', 'content', 'title', {str})),
-                ie=TVerIE, getter=lambda x: f'https://tver.jp/episodes/{x}')
+                ie=TVerIE,
+                getter=lambda x: f'https://tver.jp/episodes/{x}',
+            )
 
         if video_type != 'episodes':
             webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
-            video_id = self._match_id(self._search_regex(
-                (r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
-                webpage, 'url regex'))
+            video_id = self._match_id(
+                self._search_regex(
+                    (
+                        r'canonical"\s*href="(https?://tver\.jp/[^"]+)"',
+                        r'&link=(https?://tver\.jp/[^?&]+)[?&]',
+                    ),
+                    webpage,
+                    'url regex',
+                ),
+            )
 
         episode_info = self._call_platform_api(
-            f'v1/callEpisode/{video_id}', video_id, 'Downloading episode info', fatal=False, query={
+            f'v1/callEpisode/{video_id}',
+            video_id,
+            'Downloading episode info',
+            fatal=False,
+            query={
                 'require_data': 'mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
-            })
-        episode_content = traverse_obj(
-            episode_info, ('result', 'episode', 'content')) or {}
+            },
+        )
+        episode_content = traverse_obj(episode_info, ('result', 'episode', 'content')) or {}
+
+        self.write_debug(json.dumps(episode_info, indent=2, ensure_ascii=False))
 
         version = traverse_obj(episode_content, ('version', {str_or_none}), default='5')
 
         video_info = self._download_json(
-            f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, 'Downloading video info',
-            query={'v': version}, headers={'Referer': 'https://tver.jp/'})
+            f'https://statics.tver.jp/content/episode/{video_id}.json',
+            video_id,
+            'Downloading video info',
+            query={'v': version},
+            headers={'Referer': 'https://tver.jp/'},
+        )
 
         episode = strip_or_none(episode_content.get('title'))
         series = str_or_none(episode_content.get('seriesTitle'))
-        title = (
-            join_nonempty(series, episode, delim=' ')
-            or str_or_none(video_info.get('title')))
+        title = join_nonempty(series, episode, delim=' ') or str_or_none(video_info.get('title'))
         provider = str_or_none(episode_content.get('productionProviderName'))
         onair_label = str_or_none(episode_content.get('broadcastDateLabel'))
 
@@ -149,7 +212,8 @@ class TVerIE(InfoExtractor):
                 'id': quality,
                 'url': update_url_query(
                     f'https://statics.tver.jp/images/content/thumbnail/episode/{quality}/{video_id}.jpg',
-                    {'v': version}),
+                    {'v': version},
+                ),
                 'width': width,
                 'height': height,
             }
@@ -180,19 +244,8 @@ class TVerIE(InfoExtractor):
         if episode_number:
             data['episode_number'] = int(episode_number)
 
-        onair_label = str_or_none(episode_content.get('broadcastDateLabel'))
         if onair_label:
-            match = re.search(
-                pattern=r'(?:(?P<year>\d{4})年)|(?:(?P<month>\d{1,2})\D(?P<day>\d{1,2})\D)',
-                string=onair_label,
-            )
-
-            if match:
-                air_date = match.groupdict()
-                if air_date.get('day') and air_date.get('month'):
-                    data['release_date'] = (
-                        f"{datetime.datetime.now().year}{air_date['month'].zfill(2)}{air_date['day'].zfill(2)}"
-                    )
+            data.update(self._format_broadcast_date(onair_label))
 
         backend = self._configuration_arg('backend', ['streaks'])[0]
 
@@ -200,9 +253,39 @@ class TVerIE(InfoExtractor):
             raise ExtractorError(f'Invalid backend value: {backend}', expected=True)
 
         if backend == 'brightcove':
-            return self._brightcove_backend(data, video_info)
+            data = self._brightcove_backend(data, video_info)
+        else:
+            data = self._streaks_backend(data, video_info, video_id)
+
+        self.write_debug(json.dumps(data, indent=2, ensure_ascii=False))
+
+        return data
+
+    def _format_broadcast_date(self, onair_label):
+        if not onair_label:
+            return {}
+
+        match = re.search(
+            pattern=r'(?:(?P<year>\d{4})年)|(?:(?P<month>\d{1,2})\D(?P<day>\d{1,2})\D)',
+            string=onair_label,
+        )
+
+        if not match:
+            return {}
 
-        return self._streaks_backend(data, video_info, video_id)
+        data = {}
+
+        broadcast_date = match.groupdict()
+
+        if broadcast_date.get('year'):
+            data['release_year'] = int(broadcast_date['year'])
+
+        if broadcast_date.get('day') and broadcast_date.get('month'):
+            data['release_date'] = int(
+                f"{datetime.datetime.now().year}{broadcast_date['month'].zfill(2)}{broadcast_date['day'].zfill(2)}",
+            )
+
+        return data
 
     def _brightcove_backend(self, result, video_info):
         self.write_debug('Using Brightcove backend')
@@ -216,12 +299,16 @@ class TVerIE(InfoExtractor):
         if not r_id.isdigit():
             r_id = f'ref:{r_id}'
 
-        result.update({
-            '_type': 'url_transparent',
-            'url': smuggle_url(
-                self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}),
-            'ie_key': 'BrightcoveNew',
-        })
+        result.update(
+            {
+                '_type': 'url_transparent',
+                'url': smuggle_url(
+                    self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id),
+                    {'geo_countries': ['JP']},
+                ),
+                'ie_key': 'BrightcoveNew',
+            },
+        )
 
         return result
 
@@ -240,7 +327,7 @@ class TVerIE(InfoExtractor):
         if not ref_id.startswith('ref:'):
             ref_id = f'ref:{ref_id}'
 
-        url = self.STREAKS_URL_TEMPLATE % (project_id, ref_id, 'aa')
+        url = self.STREAKS_URL_TEMPLATE % (project_id, ref_id)
         self.write_debug(f'Streaks URL: {url}')
 
         json_info = self._download_json(
@@ -254,26 +341,45 @@ class TVerIE(InfoExtractor):
             },
         )
 
-        res = traverse_obj(json_info, ('sources', 0, 'resolution'), default=None)
-        m3u8_url = traverse_obj(json_info, ('sources', 0, 'src'), default=False)
-        if not m3u8_url:
-            raise ExtractorError('Failed to extract m3u8 URL')
+        self.write_debug(json.dumps(json_info, indent=2, ensure_ascii=False))
 
-        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
-            m3u8_url,
-            video_id,
-            'mp4',
-            m3u8_id='hls',
-            quality=res,
-            headers={'origin': 'https://tver.jp/', 'referer': 'https://tver.jp/'},
-            note='Downloading streaks.jp m3u8 information',
-        )
+        sources = traverse_obj(json_info, ('sources'), default=[])
+
+        formats = []
+        subtitles = []
+
+        for item in sources:
+            m3u8_url = traverse_obj(item, ('src'), default=None)
+            if not m3u8_url:
+                continue
+
+            self.write_debug(f'M3U8 URL: {m3u8_url}')
+
+            item_formats, item_subtitles = self._extract_m3u8_formats_and_subtitles(
+                m3u8_url,
+                video_id,
+                'mp4',
+                m3u8_id='hls',
+                headers={'origin': 'https://tver.jp/', 'referer': 'https://tver.jp/'},
+                note='Downloading streaks.jp m3u8 information',
+            )
+
+            if len(item_formats) > 0:
+                formats.extend(item_formats)
+
+            if len(item_subtitles) > 0:
+                subtitles.extend(item_subtitles)
+
+        if len(formats) < 1:
+            raise ExtractorError('Failed to extract any m3u8 streams from streaks.jp video info')
 
-        result.update({
-            'id': video_id,
-            'formats': formats,
-            'subtitles': subtitles,
-        })
+        result.update(
+            {
+                'id': video_id,
+                'formats': formats,
+                'subtitles': subtitles,
+            },
+        )
 
         duration = float_or_none(json_info.get('duration'), 1000)
         if duration: