[Tver] implemented changes requested by bashonly to improve the code

pull/12659/head
ArabCoders 4 months ago
parent 1a45fcd18a
commit 141d2403af

@ -1,123 +1,160 @@
import datetime as dt import datetime as dt
import json
import re import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
mimetype2ext,
parse_iso8601,
qualities, qualities,
require,
smuggle_url, smuggle_url,
str_or_none, str_or_none,
strip_or_none, strip_or_none,
traverse_obj,
update_url_query, update_url_query,
url_or_none,
) )
from ..utils.traversal import require, traverse_obj
class TVerIE(InfoExtractor): class TVerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature)/)+(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature)/)+(?P<id>[a-zA-Z0-9]+)'
_GEO_COUNTRIES = ['JP'] _GEO_COUNTRIES = ['JP']
_GEO_BYPASS = False _GEO_BYPASS = False
_TESTS = [{ _TESTS = [
'skip': 'videos are only available for 7 days', {
'url': 'https://tver.jp/episodes/ep83nf3w4p', 'skip': 'videos are only available for 7 days',
'info_dict': { 'url': 'https://tver.jp/episodes/ep83nf3w4p',
'title': '家事ヤロウ!!! 売り場席巻のチーズSP財前直見×森泉親子の脱東京暮らし密着', 'info_dict': {
'description': 'md5:dc2c06b6acc23f1e7c730c513737719b', 'title': '家事ヤロウ!!! 売り場席巻のチーズSP財前直見×森泉親子の脱東京暮らし密着',
'series': '家事ヤロウ!!!', 'description': 'md5:dc2c06b6acc23f1e7c730c513737719b',
'episode': '売り場席巻のチーズSP財前直見×森泉親子の脱東京暮らし密着', 'series': '家事ヤロウ!!!',
'alt_title': '売り場席巻のチーズSP財前直見×森泉親子の脱東京暮らし密着', 'episode': '売り場席巻のチーズSP財前直見×森泉親子の脱東京暮らし密着',
'channel': 'テレビ朝日', 'alt_title': '売り場席巻のチーズSP財前直見×森泉親子の脱東京暮らし密着',
'id': 'ep83nf3w4p', 'channel': 'テレビ朝日',
'ext': 'mp4', 'id': 'ep83nf3w4p',
'ext': 'mp4',
},
}, },
}, { {
'url': 'https://tver.jp/corner/f0103888', 'url': 'https://tver.jp/corner/f0103888',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://tver.jp/lp/f0033031',
'only_matching': True,
}, {
'url': 'https://tver.jp/series/srtxft431v',
'info_dict': {
'id': 'srtxft431v',
'title': '名探偵コナン',
}, },
'playlist': [ {
{ 'url': 'https://tver.jp/lp/f0033031',
'md5': '779ffd97493ed59b0a6277ea726b389e', 'only_matching': True,
'info_dict': { },
'id': 'ref:conan-1137-241005', {
'ext': 'mp4', 'url': 'https://tver.jp/series/srtxft431v',
'title': '名探偵コナン #1137「行列店、味変の秘密」', 'info_dict': {
'uploader_id': '5330942432001', 'id': 'srtxft431v',
'tags': [], 'title': '名探偵コナン',
'channel': '読売テレビ', },
'series': '名探偵コナン', 'playlist': [
'description': 'md5:601fccc1d2430d942a2c8068c4b33eb5', {
'episode': '#1137「行列店、味変の秘密」', 'md5': '779ffd97493ed59b0a6277ea726b389e',
'duration': 1469.077, 'info_dict': {
'timestamp': 1728030405, 'id': 'ref:conan-1137-241005',
'upload_date': '20241004', 'ext': 'mp4',
'alt_title': '名探偵コナン #1137「行列店、味変の秘密」 読売テレビ 10月5日(土)放送分', 'title': '名探偵コナン #1137「行列店、味変の秘密」',
'thumbnail': r're:https://.+\.jpg', 'uploader_id': '5330942432001',
'tags': [],
'channel': '読売テレビ',
'series': '名探偵コナン',
'description': 'md5:601fccc1d2430d942a2c8068c4b33eb5',
'episode': '#1137「行列店、味変の秘密」',
'duration': 1469.077,
'timestamp': 1728030405,
'upload_date': '20241004',
'alt_title': '名探偵コナン #1137「行列店、味変の秘密」 読売テレビ 10月5日(土)放送分',
'thumbnail': r're:https://.+\.jpg',
},
}, },
}], ],
}, { },
'url': 'https://tver.jp/series/sru35hwdd2', {
'info_dict': { 'url': 'https://tver.jp/series/sru35hwdd2',
'id': 'sru35hwdd2', 'info_dict': {
'title': '神回だけ見せます!', 'id': 'sru35hwdd2',
'title': '神回だけ見せます!',
},
'playlist_count': 11,
}, },
'playlist_count': 11, {
}, { 'url': 'https://tver.jp/series/srkq2shp9d',
'url': 'https://tver.jp/series/srkq2shp9d', 'only_matching': True,
'only_matching': True, },
}] ]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
STREAKS_URL_TEMPLATE = 'https://playback.api.streaks.jp/v1/projects/%s/medias/%s'
_HEADERS = { _HEADERS = {
'x-tver-platform-type': 'web', 'x-tver-platform-type': 'web',
'Origin': 'https://tver.jp', 'origin': 'https://tver.jp/',
'Referer': 'https://tver.jp/', 'referer': 'https://tver.jp/',
} }
_PLATFORM_QUERY = {} _PLATFORM_QUERY = {}
def _real_initialize(self): def _real_initialize(self):
session_info = self._download_json( session_info = self._download_json(
'https://platform-api.tver.jp/v2/api/platform_users/browser/create', 'https://platform-api.tver.jp/v2/api/platform_users/browser/create',
None, 'Creating session', data=b'device_type=pc') None,
self._PLATFORM_QUERY = traverse_obj(session_info, ('result', { 'Creating session',
'platform_uid': 'platform_uid', data=b'device_type=pc',
'platform_token': 'platform_token', )
})) self._PLATFORM_QUERY = traverse_obj(
session_info,
(
'result',
{
'platform_uid': 'platform_uid',
'platform_token': 'platform_token',
},
),
)
def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None): def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None):
return self._download_json( return self._download_json(
f'https://platform-api.tver.jp/service/api/{path}', video_id, note, f'https://platform-api.tver.jp/service/api/{path}',
fatal=fatal, headers=self._HEADERS, query={ video_id,
note,
fatal=fatal,
headers=self._HEADERS,
query={
**self._PLATFORM_QUERY, **self._PLATFORM_QUERY,
**(query or {}), **(query or {}),
}) },
)
def _yield_episode_ids_for_series(self, series_id): def _yield_episode_ids_for_series(self, series_id):
seasons_info = self._download_json( seasons_info = self._download_json(
f'https://service-api.tver.jp/api/v1/callSeriesSeasons/{series_id}', f'https://service-api.tver.jp/api/v1/callSeriesSeasons/{series_id}',
series_id, 'Downloading seasons info', headers=self._HEADERS) series_id,
'Downloading seasons info',
headers=self._HEADERS,
)
for season_id in traverse_obj( for season_id in traverse_obj(
seasons_info, ('result', 'contents', lambda _, v: v['type'] == 'season', 'content', 'id', {str})): seasons_info,
('result', 'contents', lambda _, v: v['type'] == 'season', 'content', 'id', {str}),
):
episodes_info = self._call_platform_api( episodes_info = self._call_platform_api(
f'v1/callSeasonEpisodes/{season_id}', series_id, f'Downloading season {season_id} episodes info') f'v1/callSeasonEpisodes/{season_id}',
yield from traverse_obj(episodes_info, ( series_id,
'result', 'contents', lambda _, v: v['type'] == 'episode', 'content', 'id', {str})) f'Downloading season {season_id} episodes info',
)
yield from traverse_obj(
episodes_info,
(
'result',
'contents',
lambda _, v: v['type'] == 'episode',
'content',
'id',
{str},
),
)
def _real_extract(self, url): def _real_extract(self, url):
video_id, video_type = self._match_valid_url(url).group('id', 'type') video_id, video_type = self._match_valid_url(url).group('id', 'type')
@ -126,36 +163,49 @@ class TVerIE(InfoExtractor):
raise ExtractorError(f'Invalid backend value: {backend}', expected=True) raise ExtractorError(f'Invalid backend value: {backend}', expected=True)
if video_type == 'series': if video_type == 'series':
series_info = self._call_platform_api( series_info = self._call_platform_api(f'v2/callSeries/{video_id}', video_id, 'Downloading series info')
f'v2/callSeries/{video_id}', video_id, 'Downloading series info')
return self.playlist_from_matches( return self.playlist_from_matches(
self._yield_episode_ids_for_series(video_id), video_id, self._yield_episode_ids_for_series(video_id),
video_id,
traverse_obj(series_info, ('result', 'content', 'content', 'title', {str})), traverse_obj(series_info, ('result', 'content', 'content', 'title', {str})),
ie=TVerIE, getter=lambda x: f'https://tver.jp/episodes/{x}') ie=TVerIE,
getter=lambda x: f'https://tver.jp/episodes/{x}',
)
if video_type != 'episodes': if video_type != 'episodes':
webpage = self._download_webpage(url, video_id, note='Resolving to new URL') webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
video_id = self._match_id(self._search_regex( video_id = self._match_id(
(r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'), self._search_regex(
webpage, 'url regex')) (
r'canonical"\s*href="(https?://tver\.jp/[^"]+)"',
r'&link=(https?://tver\.jp/[^?&]+)[?&]',
),
webpage,
'url regex',
),
)
episode_info = self._call_platform_api( episode_info = self._call_platform_api(
f'v1/callEpisode/{video_id}', video_id, 'Downloading episode info', fatal=False, query={ f'v1/callEpisode/{video_id}',
video_id,
'Downloading episode info',
fatal=False,
query={
'require_data': 'mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]', 'require_data': 'mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
}) },
episode_content = traverse_obj( )
episode_info, ('result', 'episode', 'content')) or {} episode_content = traverse_obj(episode_info, ('result', 'episode', 'content')) or {}
version = traverse_obj(episode_content, ('version', {str_or_none}), default='5') version = traverse_obj(episode_content, ('version', {str_or_none}), default='5')
video_info = self._download_json( video_info = self._download_json(
f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, 'Downloading video info', f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, 'Downloading video info',
query={'v': version}, headers={'Referer': 'https://tver.jp/'}) query={'v': version}, headers={'Referer': 'https://tver.jp/'},
)
episode = strip_or_none(episode_content.get('title')) episode = strip_or_none(episode_content.get('title'))
series = str_or_none(episode_content.get('seriesTitle')) series = str_or_none(episode_content.get('seriesTitle'))
title = ( title = join_nonempty(series, episode, delim=' ') or str_or_none(video_info.get('title'))
join_nonempty(series, episode, delim=' ')
or str_or_none(video_info.get('title')))
provider = str_or_none(episode_content.get('productionProviderName')) provider = str_or_none(episode_content.get('productionProviderName'))
onair_label = str_or_none(episode_content.get('broadcastDateLabel')) onair_label = str_or_none(episode_content.get('broadcastDateLabel'))
@ -164,7 +214,8 @@ class TVerIE(InfoExtractor):
'id': quality, 'id': quality,
'url': update_url_query( 'url': update_url_query(
f'https://statics.tver.jp/images/content/thumbnail/episode/{quality}/{video_id}.jpg', f'https://statics.tver.jp/images/content/thumbnail/episode/{quality}/{video_id}.jpg',
{'v': version}), {'v': version},
),
'width': width, 'width': width,
'height': height, 'height': height,
} }
@ -214,14 +265,7 @@ class TVerIE(InfoExtractor):
if not ref_id.startswith('ref:'): if not ref_id.startswith('ref:'):
ref_id = f'ref:{ref_id}' ref_id = f'ref:{ref_id}'
return { return self._streaks_backend(metadata, video_info, video_id)
**self._extract_from_streaks_api(video_info['streaks']['projectID'], ref_id, {
'Origin': 'https://tver.jp',
'Referer': 'https://tver.jp/',
}),
**metadata,
'id': video_id,
}
def _format_broadcast_date(self, onair_label): def _format_broadcast_date(self, onair_label):
""" """
@ -241,8 +285,7 @@ class TVerIE(InfoExtractor):
return {} return {}
match = re.search( match = re.search(
pattern=r'(?:(?P<year>\d{4})年)|(?:(?P<month>\d{1,2})\D(?P<day>\d{1,2})\D)', r'(?:(?P<year>\d{4})年)|(?:(?P<month>\d{1,2})\D(?P<day>\d{1,2})\D)', onair_label,
string=onair_label,
) )
if not match: if not match:
@ -250,138 +293,92 @@ class TVerIE(InfoExtractor):
data = {} data = {}
broadcast_date = match.groupdict() broadcast_date_info = match.groupdict()
data = {
if broadcast_date.get('year'): 'release_year': int_or_none(broadcast_date_info.get('year')),
data['release_year'] = int(broadcast_date['year']) }
day, month = [int_or_none(broadcast_date_info.get(key)) for key in ('day', 'month')]
if broadcast_date.get('day') and broadcast_date.get('month'): if day and month:
if 'release_year' in data: year = data.get('release_year') or dt.datetime.now().year
year = data['release_year'] dt_ = dt.datetime.strptime(f'{year}-{month}-{day}', '%Y-%m-%d')
else: # If the date is in the future, it means the broadcast date is in the previous year
year = dt.datetime.now().year # Ref: https://github.com/yt-dlp/yt-dlp/pull/12282#issuecomment-2678132806
dt_ = dt.datetime.strptime( if dt_ > dt.datetime.now():
f"{year}-{broadcast_date['month']}-{broadcast_date['day']}", year -= 1
'%Y-%m-%d', data['release_timestamp'] = dt.datetime(year=year, month=month, day=day).timestamp()
)
# if the date in the future, it means the broadcast date is in the previous year
# ref: https://github.com/yt-dlp/yt-dlp/pull/12282#issuecomment-2678132806
if dt_ > dt.datetime.now():
year -= 1
data['release_timestamp'] = dt.datetime(
year=year,
month=int(broadcast_date['month']),
day=int(broadcast_date['day']),
).timestamp()
return data return data
# XXX: Remove everything below and subclass TVerIE from StreaksBaseIE when #12679 is merged def _streaks_backend(self, result, video_info, video_id):
_API_URL_TEMPLATE = 'https://{}.api.streaks.jp/v1/projects/{}/medias/{}{}' self.write_debug('Using streaks.jp backend')
def _extract_from_streaks_api(self, project_id, media_id, headers=None, query=None, ssai=False): ref_id = traverse_obj(video_info, ('streaks', 'videoRefID'), get_all=False)
try: project_id = traverse_obj(video_info, ('streaks', 'projectID'), get_all=False)
response = self._download_json(
self._API_URL_TEMPLATE.format('playback', project_id, media_id, ''),
media_id, 'Downloading streaks playback API JSON',
headers={
'Accept': 'application/json',
'Origin': 'https://players.streaks.jp',
**self.geo_verification_headers(),
**(headers or {}),
})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status in {403, 404}:
error = self._parse_json(e.cause.response.read().decode(), media_id, fatal=False)
message = traverse_obj(error, ('message', {str}))
code = traverse_obj(error, ('code', {str}))
if code == 'REQUEST_FAILED':
self.raise_geo_restricted(message, countries=self._GEO_COUNTRIES)
elif code == 'MEDIA_NOT_FOUND':
raise ExtractorError(message, expected=True)
elif code or message:
raise ExtractorError(join_nonempty(code, message, delim=': '))
raise
streaks_id = response['id']
live_status = {
'clip': 'was_live',
'file': 'not_live',
'linear': 'is_live',
'live': 'is_live',
}.get(response.get('type'))
audio_quality_func = qualities(('1', '0'))
formats, subtitles = [], {}
drm_formats = False
for source in traverse_obj(response, ('sources', lambda _, v: v['src'])):
if source.get('key_systems'):
drm_formats = True
continue
src_url = source['src'] if not ref_id:
is_live = live_status == 'is_live' raise ExtractorError('Failed to extract reference ID for streaks.jp stream info')
ext = mimetype2ext(source.get('type'))
if not project_id:
if ext == 'm3u8': raise ExtractorError('Failed to extract project ID for streaks.jp stream info')
if is_live and ssai:
session_params = traverse_obj( if not ref_id.startswith('ref:'):
self._download_json( ref_id = f'ref:{ref_id}'
self._API_URL_TEMPLATE.format('ssai', project_id, streaks_id, '/ssai/session'),
media_id, 'Downloading session parameters', url = self.STREAKS_URL_TEMPLATE % (project_id, ref_id)
headers={'Content-Type': 'application/json'}, # XXX: geo_verification_headers ? self.write_debug(f'Streaks URL: {url}')
data=json.dumps({'id': source['id']}).encode()),
(0, 'query', {urllib.parse.parse_qs})) json_info = self._download_json(
src_url = update_url_query(src_url, session_params) url,
video_id,
fmts, subs = self._extract_m3u8_formats_and_subtitles( 'Downloading streaks.jp streams video info',
src_url, media_id, 'mp4', m3u8_id='hls', headers={
fatal=False, live=is_live, query=query) 'origin': 'https://tver.jp/',
'referer': 'https://tver.jp/',
for fmt in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none'): **self.geo_verification_headers(),
if mobj := re.match(r'hls-[a-z]+_AUDIO-(?P<quality>\d)_\d+-', fmt['format_id']): },
fmt['quality'] = audio_quality_func(mobj.group('quality')) )
elif ext == 'mpd': formats = []
fmts, subs = self._extract_mpd_formats_and_subtitles( subtitles = {}
src_url, media_id, mpd_id='dash', fatal=False) audio_quality_func = qualities(('0', '1', '2'))
else: for item in traverse_obj(json_info, ('sources'), default=[]):
self.report_warning(f'Unsupported stream type: {ext}') m3u8_url = traverse_obj(item, ('src'), default=None)
if not m3u8_url:
continue continue
formats.extend(fmts) item_formats, item_subtitles = self._extract_m3u8_formats_and_subtitles(
self._merge_subtitles(subs, target=subtitles) m3u8_url,
video_id,
if not formats and drm_formats: 'mp4',
self.report_drm(media_id) m3u8_id='hls',
headers={'origin': 'https://tver.jp/', 'referer': 'https://tver.jp/'},
self._remove_duplicate_formats(formats) note='Downloading streaks.jp m3u8 information',
)
for subs in traverse_obj(response, (
'tracks', lambda _, v: v['kind'] in ('subtitles', 'captions') and url_or_none(v['src']), for fmt in item_formats:
)): if mobj := re.match(r'hls-\w*?(?i:audio)-(?P<qual>\d)(?:_(?P<sub_qual>\d))?', fmt['format_id']):
lang = traverse_obj(subs, ('srclang', {str.lower})) or 'ja' fmt['quality'] = audio_quality_func(mobj.group('qual')) * ((-1) ** bool(mobj.group('sub_qual')))
subtitles.setdefault(lang, []).append({'url': subs['src']})
if len(item_formats) > 0:
return { formats.extend(item_formats)
'id': streaks_id,
'display_id': media_id, if len(item_subtitles) > 0:
'channel_id': project_id, subtitles.update(item_subtitles)
'formats': formats,
'subtitles': subtitles, if len(formats) < 1:
'live_status': live_status, raise ExtractorError('Failed to extract any m3u8 streams from streaks.jp video info')
**traverse_obj(response, {
'channel_id': ('project_id', {str}), result.update(
'uploader_id': ('profile', {str}), {
'title': ('name', {str}), 'id': video_id,
'description': ('description', {str}, filter), 'formats': formats,
'duration': ('duration', {float_or_none}), 'subtitles': subtitles,
'tags': ('tags', ..., {str}), },
'thumbnails': (('poster', 'thumbnail'), 'src', {'url': {url_or_none}}), )
'timestamp': ('created_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601}), duration = float_or_none(json_info.get('duration'), 1000)
}), if duration:
} result['duration'] = duration
return result

Loading…
Cancel
Save