[Tver] handle multi-streams from streaks.jp, and move broadcast label to it own method to revise later.

pull/12659/head
ArabCoders 4 months ago
parent 3d00a93354
commit 2ecc5fafbc

@ -1,8 +1,9 @@
import datetime import datetime
import json
import re import re
from .common import InfoExtractor from yt_dlp.extractor.common import InfoExtractor
from ..utils import ( from yt_dlp.utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
join_nonempty, join_nonempty,
@ -16,7 +17,8 @@ from ..utils import (
class TVerIE(InfoExtractor): class TVerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature)/)+(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature)/)+(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{ _TESTS = [
{
'skip': 'videos are only available for 7 days', 'skip': 'videos are only available for 7 days',
'url': 'https://tver.jp/episodes/ep83nf3w4p', 'url': 'https://tver.jp/episodes/ep83nf3w4p',
'info_dict': { 'info_dict': {
@ -29,13 +31,16 @@ class TVerIE(InfoExtractor):
'id': 'ep83nf3w4p', 'id': 'ep83nf3w4p',
'ext': 'mp4', 'ext': 'mp4',
}, },
}, { },
{
'url': 'https://tver.jp/corner/f0103888', 'url': 'https://tver.jp/corner/f0103888',
'only_matching': True, 'only_matching': True,
}, { },
{
'url': 'https://tver.jp/lp/f0033031', 'url': 'https://tver.jp/lp/f0033031',
'only_matching': True, 'only_matching': True,
}, { },
{
'url': 'https://tver.jp/series/srtxft431v', 'url': 'https://tver.jp/series/srtxft431v',
'info_dict': { 'info_dict': {
'id': 'srtxft431v', 'id': 'srtxft431v',
@ -60,87 +65,145 @@ class TVerIE(InfoExtractor):
'alt_title': '名探偵コナン #1137「行列店、味変の秘密」 読売テレビ 10月5日(土)放送分', 'alt_title': '名探偵コナン #1137「行列店、味変の秘密」 読売テレビ 10月5日(土)放送分',
'thumbnail': r're:https://.+\.jpg', 'thumbnail': r're:https://.+\.jpg',
}, },
}], },
}, { ],
},
{
'url': 'https://tver.jp/series/sru35hwdd2', 'url': 'https://tver.jp/series/sru35hwdd2',
'info_dict': { 'info_dict': {
'id': 'sru35hwdd2', 'id': 'sru35hwdd2',
'title': '神回だけ見せます!', 'title': '神回だけ見せます!',
}, },
'playlist_count': 11, 'playlist_count': 11,
}, { },
{
'url': 'https://tver.jp/series/srkq2shp9d', 'url': 'https://tver.jp/series/srkq2shp9d',
'only_matching': True, 'only_matching': True,
}] },
]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
STREAKS_URL_TEMPLATE = 'https://playback.api.streaks.jp/v1/projects/%s/medias/%s?ati=%s'
_HEADERS = {'x-tver-platform-type': 'web', 'origin': 'https://tver.jp/', 'referer': 'https://tver.jp/'} STREAKS_URL_TEMPLATE = 'https://playback.api.streaks.jp/v1/projects/%s/medias/%s'
_HEADERS = {
'x-tver-platform-type': 'web',
'origin': 'https://tver.jp/',
'referer': 'https://tver.jp/',
}
_PLATFORM_QUERY = {} _PLATFORM_QUERY = {}
def _real_initialize(self): def _real_initialize(self):
session_info = self._download_json( session_info = self._download_json(
'https://platform-api.tver.jp/v2/api/platform_users/browser/create', 'https://platform-api.tver.jp/v2/api/platform_users/browser/create',
None, 'Creating session', data=b'device_type=pc') None,
self._PLATFORM_QUERY = traverse_obj(session_info, ('result', { 'Creating session',
data=b'device_type=pc',
)
self._PLATFORM_QUERY = traverse_obj(
session_info,
(
'result',
{
'platform_uid': 'platform_uid', 'platform_uid': 'platform_uid',
'platform_token': 'platform_token', 'platform_token': 'platform_token',
})) },
),
)
def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None): def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None):
return self._download_json( return self._download_json(
f'https://platform-api.tver.jp/service/api/{path}', video_id, note, f'https://platform-api.tver.jp/service/api/{path}',
fatal=fatal, headers=self._HEADERS, query={ video_id,
note,
fatal=fatal,
headers=self._HEADERS,
query={
**self._PLATFORM_QUERY, **self._PLATFORM_QUERY,
**(query or {}), **(query or {}),
}) },
)
def _yield_episode_ids_for_series(self, series_id): def _yield_episode_ids_for_series(self, series_id):
seasons_info = self._download_json( seasons_info = self._download_json(
f'https://service-api.tver.jp/api/v1/callSeriesSeasons/{series_id}', f'https://service-api.tver.jp/api/v1/callSeriesSeasons/{series_id}',
series_id, 'Downloading seasons info', headers=self._HEADERS) series_id,
'Downloading seasons info',
headers=self._HEADERS,
)
for season_id in traverse_obj( for season_id in traverse_obj(
seasons_info, ('result', 'contents', lambda _, v: v['type'] == 'season', 'content', 'id', {str})): seasons_info,
('result', 'contents', lambda _, v: v['type'] == 'season', 'content', 'id', {str}),
):
episodes_info = self._call_platform_api( episodes_info = self._call_platform_api(
f'v1/callSeasonEpisodes/{season_id}', series_id, f'Downloading season {season_id} episodes info') f'v1/callSeasonEpisodes/{season_id}',
yield from traverse_obj(episodes_info, ( series_id,
'result', 'contents', lambda _, v: v['type'] == 'episode', 'content', 'id', {str})) f'Downloading season {season_id} episodes info',
)
yield from traverse_obj(
episodes_info,
(
'result',
'contents',
lambda _, v: v['type'] == 'episode',
'content',
'id',
{str},
),
)
def _real_extract(self, url): def _real_extract(self, url):
video_id, video_type = self._match_valid_url(url).group('id', 'type') video_id, video_type = self._match_valid_url(url).group('id', 'type')
if video_type == 'series': if video_type == 'series':
series_info = self._call_platform_api( series_info = self._call_platform_api(f'v2/callSeries/{video_id}', video_id, 'Downloading series info')
f'v2/callSeries/{video_id}', video_id, 'Downloading series info')
return self.playlist_from_matches( return self.playlist_from_matches(
self._yield_episode_ids_for_series(video_id), video_id, self._yield_episode_ids_for_series(video_id),
video_id,
traverse_obj(series_info, ('result', 'content', 'content', 'title', {str})), traverse_obj(series_info, ('result', 'content', 'content', 'title', {str})),
ie=TVerIE, getter=lambda x: f'https://tver.jp/episodes/{x}') ie=TVerIE,
getter=lambda x: f'https://tver.jp/episodes/{x}',
)
if video_type != 'episodes': if video_type != 'episodes':
webpage = self._download_webpage(url, video_id, note='Resolving to new URL') webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
video_id = self._match_id(self._search_regex( video_id = self._match_id(
(r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'), self._search_regex(
webpage, 'url regex')) (
r'canonical"\s*href="(https?://tver\.jp/[^"]+)"',
r'&link=(https?://tver\.jp/[^?&]+)[?&]',
),
webpage,
'url regex',
),
)
episode_info = self._call_platform_api( episode_info = self._call_platform_api(
f'v1/callEpisode/{video_id}', video_id, 'Downloading episode info', fatal=False, query={ f'v1/callEpisode/{video_id}',
video_id,
'Downloading episode info',
fatal=False,
query={
'require_data': 'mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]', 'require_data': 'mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
}) },
episode_content = traverse_obj( )
episode_info, ('result', 'episode', 'content')) or {} episode_content = traverse_obj(episode_info, ('result', 'episode', 'content')) or {}
self.write_debug(json.dumps(episode_info, indent=2, ensure_ascii=False))
version = traverse_obj(episode_content, ('version', {str_or_none}), default='5') version = traverse_obj(episode_content, ('version', {str_or_none}), default='5')
video_info = self._download_json( video_info = self._download_json(
f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, 'Downloading video info', f'https://statics.tver.jp/content/episode/{video_id}.json',
query={'v': version}, headers={'Referer': 'https://tver.jp/'}) video_id,
'Downloading video info',
query={'v': version},
headers={'Referer': 'https://tver.jp/'},
)
episode = strip_or_none(episode_content.get('title')) episode = strip_or_none(episode_content.get('title'))
series = str_or_none(episode_content.get('seriesTitle')) series = str_or_none(episode_content.get('seriesTitle'))
title = ( title = join_nonempty(series, episode, delim=' ') or str_or_none(video_info.get('title'))
join_nonempty(series, episode, delim=' ')
or str_or_none(video_info.get('title')))
provider = str_or_none(episode_content.get('productionProviderName')) provider = str_or_none(episode_content.get('productionProviderName'))
onair_label = str_or_none(episode_content.get('broadcastDateLabel')) onair_label = str_or_none(episode_content.get('broadcastDateLabel'))
@ -149,7 +212,8 @@ class TVerIE(InfoExtractor):
'id': quality, 'id': quality,
'url': update_url_query( 'url': update_url_query(
f'https://statics.tver.jp/images/content/thumbnail/episode/{quality}/{video_id}.jpg', f'https://statics.tver.jp/images/content/thumbnail/episode/{quality}/{video_id}.jpg',
{'v': version}), {'v': version},
),
'width': width, 'width': width,
'height': height, 'height': height,
} }
@ -180,29 +244,48 @@ class TVerIE(InfoExtractor):
if episode_number: if episode_number:
data['episode_number'] = int(episode_number) data['episode_number'] = int(episode_number)
onair_label = str_or_none(episode_content.get('broadcastDateLabel'))
if onair_label: if onair_label:
data.update(self._format_broadcast_date(onair_label))
backend = self._configuration_arg('backend', ['streaks'])[0]
if backend not in ('brightcove', 'streaks'):
raise ExtractorError(f'Invalid backend value: {backend}', expected=True)
if backend == 'brightcove':
data = self._brightcove_backend(data, video_info)
else:
data = self._streaks_backend(data, video_info, video_id)
self.write_debug(json.dumps(data, indent=2, ensure_ascii=False))
return data
def _format_broadcast_date(self, onair_label):
if not onair_label:
return {}
match = re.search( match = re.search(
pattern=r'(?:(?P<year>\d{4})年)|(?:(?P<month>\d{1,2})\D(?P<day>\d{1,2})\D)', pattern=r'(?:(?P<year>\d{4})年)|(?:(?P<month>\d{1,2})\D(?P<day>\d{1,2})\D)',
string=onair_label, string=onair_label,
) )
if match: if not match:
air_date = match.groupdict() return {}
if air_date.get('day') and air_date.get('month'):
data['release_date'] = (
f"{datetime.datetime.now().year}{air_date['month'].zfill(2)}{air_date['day'].zfill(2)}"
)
backend = self._configuration_arg('backend', ['streaks'])[0] data = {}
if backend not in ('brightcove', 'streaks'): broadcast_date = match.groupdict()
raise ExtractorError(f'Invalid backend value: {backend}', expected=True)
if backend == 'brightcove': if broadcast_date.get('year'):
return self._brightcove_backend(data, video_info) data['release_year'] = int(broadcast_date['year'])
return self._streaks_backend(data, video_info, video_id) if broadcast_date.get('day') and broadcast_date.get('month'):
data['release_date'] = int(
f"{datetime.datetime.now().year}{broadcast_date['month'].zfill(2)}{broadcast_date['day'].zfill(2)}",
)
return data
def _brightcove_backend(self, result, video_info): def _brightcove_backend(self, result, video_info):
self.write_debug('Using Brightcove backend') self.write_debug('Using Brightcove backend')
@ -216,12 +299,16 @@ class TVerIE(InfoExtractor):
if not r_id.isdigit(): if not r_id.isdigit():
r_id = f'ref:{r_id}' r_id = f'ref:{r_id}'
result.update({ result.update(
{
'_type': 'url_transparent', '_type': 'url_transparent',
'url': smuggle_url( 'url': smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}), self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id),
{'geo_countries': ['JP']},
),
'ie_key': 'BrightcoveNew', 'ie_key': 'BrightcoveNew',
}) },
)
return result return result
@ -240,7 +327,7 @@ class TVerIE(InfoExtractor):
if not ref_id.startswith('ref:'): if not ref_id.startswith('ref:'):
ref_id = f'ref:{ref_id}' ref_id = f'ref:{ref_id}'
url = self.STREAKS_URL_TEMPLATE % (project_id, ref_id, 'aa') url = self.STREAKS_URL_TEMPLATE % (project_id, ref_id)
self.write_debug(f'Streaks URL: {url}') self.write_debug(f'Streaks URL: {url}')
json_info = self._download_json( json_info = self._download_json(
@ -254,26 +341,45 @@ class TVerIE(InfoExtractor):
}, },
) )
res = traverse_obj(json_info, ('sources', 0, 'resolution'), default=None) self.write_debug(json.dumps(json_info, indent=2, ensure_ascii=False))
m3u8_url = traverse_obj(json_info, ('sources', 0, 'src'), default=False)
sources = traverse_obj(json_info, ('sources'), default=[])
formats = []
subtitles = []
for item in sources:
m3u8_url = traverse_obj(item, ('src'), default=None)
if not m3u8_url: if not m3u8_url:
raise ExtractorError('Failed to extract m3u8 URL') continue
self.write_debug(f'M3U8 URL: {m3u8_url}')
formats, subtitles = self._extract_m3u8_formats_and_subtitles( item_formats, item_subtitles = self._extract_m3u8_formats_and_subtitles(
m3u8_url, m3u8_url,
video_id, video_id,
'mp4', 'mp4',
m3u8_id='hls', m3u8_id='hls',
quality=res,
headers={'origin': 'https://tver.jp/', 'referer': 'https://tver.jp/'}, headers={'origin': 'https://tver.jp/', 'referer': 'https://tver.jp/'},
note='Downloading streaks.jp m3u8 information', note='Downloading streaks.jp m3u8 information',
) )
result.update({ if len(item_formats) > 0:
formats.extend(item_formats)
if len(item_subtitles) > 0:
subtitles.extend(item_subtitles)
if len(formats) < 1:
raise ExtractorError('Failed to extract any m3u8 streams from streaks.jp video info')
result.update(
{
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
}) },
)
duration = float_or_none(json_info.get('duration'), 1000) duration = float_or_none(json_info.get('duration'), 1000)
if duration: if duration:

Loading…
Cancel
Save