revert formatting and apply suggestions

pull/12659/head
bashonly 4 months ago committed by GitHub
parent 141d2403af
commit cb22dd37f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -1,28 +1,32 @@
import datetime as dt import datetime as dt
import json
import re import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
mimetype2ext,
parse_iso8601,
qualities, qualities,
require,
smuggle_url, smuggle_url,
str_or_none, str_or_none,
strip_or_none, strip_or_none,
traverse_obj,
update_url_query, update_url_query,
url_or_none,
) )
from ..utils.traversal import require, traverse_obj
class TVerIE(InfoExtractor): class TVerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature)/)+(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature)/)+(?P<id>[a-zA-Z0-9]+)'
_GEO_COUNTRIES = ['JP'] _GEO_COUNTRIES = ['JP']
_GEO_BYPASS = False _GEO_BYPASS = False
_TESTS = [ _TESTS = [{
{
'skip': 'videos are only available for 7 days', 'skip': 'videos are only available for 7 days',
'url': 'https://tver.jp/episodes/ep83nf3w4p', 'url': 'https://tver.jp/episodes/ep83nf3w4p',
'info_dict': { 'info_dict': {
@ -35,16 +39,13 @@ class TVerIE(InfoExtractor):
'id': 'ep83nf3w4p', 'id': 'ep83nf3w4p',
'ext': 'mp4', 'ext': 'mp4',
}, },
}, }, {
{
'url': 'https://tver.jp/corner/f0103888', 'url': 'https://tver.jp/corner/f0103888',
'only_matching': True, 'only_matching': True,
}, }, {
{
'url': 'https://tver.jp/lp/f0033031', 'url': 'https://tver.jp/lp/f0033031',
'only_matching': True, 'only_matching': True,
}, }, {
{
'url': 'https://tver.jp/series/srtxft431v', 'url': 'https://tver.jp/series/srtxft431v',
'info_dict': { 'info_dict': {
'id': 'srtxft431v', 'id': 'srtxft431v',
@ -69,92 +70,54 @@ class TVerIE(InfoExtractor):
'alt_title': '名探偵コナン #1137「行列店、味変の秘密」 読売テレビ 10月5日(土)放送分', 'alt_title': '名探偵コナン #1137「行列店、味変の秘密」 読売テレビ 10月5日(土)放送分',
'thumbnail': r're:https://.+\.jpg', 'thumbnail': r're:https://.+\.jpg',
}, },
}, }],
], }, {
},
{
'url': 'https://tver.jp/series/sru35hwdd2', 'url': 'https://tver.jp/series/sru35hwdd2',
'info_dict': { 'info_dict': {
'id': 'sru35hwdd2', 'id': 'sru35hwdd2',
'title': '神回だけ見せます!', 'title': '神回だけ見せます!',
}, },
'playlist_count': 11, 'playlist_count': 11,
}, }, {
{
'url': 'https://tver.jp/series/srkq2shp9d', 'url': 'https://tver.jp/series/srkq2shp9d',
'only_matching': True, 'only_matching': True,
}, }]
]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
STREAKS_URL_TEMPLATE = 'https://playback.api.streaks.jp/v1/projects/%s/medias/%s'
_HEADERS = { _HEADERS = {
'x-tver-platform-type': 'web', 'x-tver-platform-type': 'web',
'origin': 'https://tver.jp/', 'Origin': 'https://tver.jp',
'referer': 'https://tver.jp/', 'Referer': 'https://tver.jp/',
} }
_PLATFORM_QUERY = {} _PLATFORM_QUERY = {}
def _real_initialize(self): def _real_initialize(self):
session_info = self._download_json( session_info = self._download_json(
'https://platform-api.tver.jp/v2/api/platform_users/browser/create', 'https://platform-api.tver.jp/v2/api/platform_users/browser/create',
None, None, 'Creating session', data=b'device_type=pc')
'Creating session', self._PLATFORM_QUERY = traverse_obj(session_info, ('result', {
data=b'device_type=pc',
)
self._PLATFORM_QUERY = traverse_obj(
session_info,
(
'result',
{
'platform_uid': 'platform_uid', 'platform_uid': 'platform_uid',
'platform_token': 'platform_token', 'platform_token': 'platform_token',
}, }))
),
)
def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None): def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None):
return self._download_json( return self._download_json(
f'https://platform-api.tver.jp/service/api/{path}', f'https://platform-api.tver.jp/service/api/{path}', video_id, note,
video_id, fatal=fatal, headers=self._HEADERS, query={
note,
fatal=fatal,
headers=self._HEADERS,
query={
**self._PLATFORM_QUERY, **self._PLATFORM_QUERY,
**(query or {}), **(query or {}),
}, })
)
def _yield_episode_ids_for_series(self, series_id): def _yield_episode_ids_for_series(self, series_id):
seasons_info = self._download_json( seasons_info = self._download_json(
f'https://service-api.tver.jp/api/v1/callSeriesSeasons/{series_id}', f'https://service-api.tver.jp/api/v1/callSeriesSeasons/{series_id}',
series_id, series_id, 'Downloading seasons info', headers=self._HEADERS)
'Downloading seasons info',
headers=self._HEADERS,
)
for season_id in traverse_obj( for season_id in traverse_obj(
seasons_info, seasons_info, ('result', 'contents', lambda _, v: v['type'] == 'season', 'content', 'id', {str})):
('result', 'contents', lambda _, v: v['type'] == 'season', 'content', 'id', {str}),
):
episodes_info = self._call_platform_api( episodes_info = self._call_platform_api(
f'v1/callSeasonEpisodes/{season_id}', f'v1/callSeasonEpisodes/{season_id}', series_id, f'Downloading season {season_id} episodes info')
series_id, yield from traverse_obj(episodes_info, (
f'Downloading season {season_id} episodes info', 'result', 'contents', lambda _, v: v['type'] == 'episode', 'content', 'id', {str}))
)
yield from traverse_obj(
episodes_info,
(
'result',
'contents',
lambda _, v: v['type'] == 'episode',
'content',
'id',
{str},
),
)
def _real_extract(self, url): def _real_extract(self, url):
video_id, video_type = self._match_valid_url(url).group('id', 'type') video_id, video_type = self._match_valid_url(url).group('id', 'type')
@ -163,49 +126,36 @@ class TVerIE(InfoExtractor):
raise ExtractorError(f'Invalid backend value: {backend}', expected=True) raise ExtractorError(f'Invalid backend value: {backend}', expected=True)
if video_type == 'series': if video_type == 'series':
series_info = self._call_platform_api(f'v2/callSeries/{video_id}', video_id, 'Downloading series info') series_info = self._call_platform_api(
f'v2/callSeries/{video_id}', video_id, 'Downloading series info')
return self.playlist_from_matches( return self.playlist_from_matches(
self._yield_episode_ids_for_series(video_id), self._yield_episode_ids_for_series(video_id), video_id,
video_id,
traverse_obj(series_info, ('result', 'content', 'content', 'title', {str})), traverse_obj(series_info, ('result', 'content', 'content', 'title', {str})),
ie=TVerIE, ie=TVerIE, getter=lambda x: f'https://tver.jp/episodes/{x}')
getter=lambda x: f'https://tver.jp/episodes/{x}',
)
if video_type != 'episodes': if video_type != 'episodes':
webpage = self._download_webpage(url, video_id, note='Resolving to new URL') webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
video_id = self._match_id( video_id = self._match_id(self._search_regex(
self._search_regex( (r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
( webpage, 'url regex'))
r'canonical"\s*href="(https?://tver\.jp/[^"]+)"',
r'&link=(https?://tver\.jp/[^?&]+)[?&]',
),
webpage,
'url regex',
),
)
episode_info = self._call_platform_api( episode_info = self._call_platform_api(
f'v1/callEpisode/{video_id}', f'v1/callEpisode/{video_id}', video_id, 'Downloading episode info', fatal=False, query={
video_id,
'Downloading episode info',
fatal=False,
query={
'require_data': 'mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]', 'require_data': 'mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
}, })
) episode_content = traverse_obj(
episode_content = traverse_obj(episode_info, ('result', 'episode', 'content')) or {} episode_info, ('result', 'episode', 'content')) or {}
version = traverse_obj(episode_content, ('version', {str_or_none}), default='5') version = traverse_obj(episode_content, ('version', {str_or_none}), default='5')
video_info = self._download_json( video_info = self._download_json(
f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, 'Downloading video info', f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, 'Downloading video info',
query={'v': version}, headers={'Referer': 'https://tver.jp/'}, query={'v': version}, headers={'Referer': 'https://tver.jp/'})
)
episode = strip_or_none(episode_content.get('title')) episode = strip_or_none(episode_content.get('title'))
series = str_or_none(episode_content.get('seriesTitle')) series = str_or_none(episode_content.get('seriesTitle'))
title = join_nonempty(series, episode, delim=' ') or str_or_none(video_info.get('title')) title = (
join_nonempty(series, episode, delim=' ')
or str_or_none(video_info.get('title')))
provider = str_or_none(episode_content.get('productionProviderName')) provider = str_or_none(episode_content.get('productionProviderName'))
onair_label = str_or_none(episode_content.get('broadcastDateLabel')) onair_label = str_or_none(episode_content.get('broadcastDateLabel'))
@ -214,8 +164,7 @@ class TVerIE(InfoExtractor):
'id': quality, 'id': quality,
'url': update_url_query( 'url': update_url_query(
f'https://statics.tver.jp/images/content/thumbnail/episode/{quality}/{video_id}.jpg', f'https://statics.tver.jp/images/content/thumbnail/episode/{quality}/{video_id}.jpg',
{'v': version}, {'v': version}),
),
'width': width, 'width': width,
'height': height, 'height': height,
} }
@ -265,7 +214,14 @@ class TVerIE(InfoExtractor):
if not ref_id.startswith('ref:'): if not ref_id.startswith('ref:'):
ref_id = f'ref:{ref_id}' ref_id = f'ref:{ref_id}'
return self._streaks_backend(metadata, video_info, video_id) return {
**self._extract_from_streaks_api(video_info['streaks']['projectID'], ref_id, {
'Origin': 'https://tver.jp',
'Referer': 'https://tver.jp/',
}),
**metadata,
'id': video_id,
}
def _format_broadcast_date(self, onair_label): def _format_broadcast_date(self, onair_label):
""" """
@ -284,23 +240,19 @@ class TVerIE(InfoExtractor):
if not onair_label: if not onair_label:
return {} return {}
match = re.search( mobj = re.search(
r'(?:(?P<year>\d{4})年)|(?:(?P<month>\d{1,2})\D(?P<day>\d{1,2})\D)', onair_label, r'(?:(?P<year>\d{4})年)|(?:(?P<month>\d{1,2})\D(?P<day>\d{1,2})\D)', onair_label)
) if not mobj:
if not match:
return {} return {}
broadcast_date_info = mobj.groupdict()
data = {}
broadcast_date_info = match.groupdict()
data = { data = {
'release_year': int_or_none(broadcast_date_info.get('year')), 'release_year': int_or_none(broadcast_date_info.get('year')),
} }
day, month = [int_or_none(broadcast_date_info.get(key)) for key in ('day', 'month')] day, month = (int_or_none(broadcast_date_info.get(key)) for key in ('day', 'month'))
if day and month: if day and month:
year = data.get('release_year') or dt.datetime.now().year year = data.get('release_year') or dt.datetime.now().year
dt_ = dt.datetime.strptime(f'{year}-{month}-{day}', '%Y-%m-%d') dt_ = dt.datetime.strptime(f'{year}-{month}-{year}', '%Y-%m-%d')
# If the date is in the future, it means the broadcast date is in the previous year # If the date is in the future, it means the broadcast date is in the previous year
# Ref: https://github.com/yt-dlp/yt-dlp/pull/12282#issuecomment-2678132806 # Ref: https://github.com/yt-dlp/yt-dlp/pull/12282#issuecomment-2678132806
if dt_ > dt.datetime.now(): if dt_ > dt.datetime.now():
@ -309,76 +261,111 @@ class TVerIE(InfoExtractor):
return data return data
def _streaks_backend(self, result, video_info, video_id): # XXX: Remove everything below and subclass TVerIE from StreaksBaseIE when #12679 is merged
self.write_debug('Using streaks.jp backend') _API_URL_TEMPLATE = 'https://{}.api.streaks.jp/v1/projects/{}/medias/{}{}'
ref_id = traverse_obj(video_info, ('streaks', 'videoRefID'), get_all=False)
project_id = traverse_obj(video_info, ('streaks', 'projectID'), get_all=False)
if not ref_id:
raise ExtractorError('Failed to extract reference ID for streaks.jp stream info')
if not project_id: def _extract_from_streaks_api(self, project_id, media_id, headers=None, query=None, ssai=False):
raise ExtractorError('Failed to extract project ID for streaks.jp stream info') try:
response = self._download_json(
if not ref_id.startswith('ref:'): self._API_URL_TEMPLATE.format('playback', project_id, media_id, ''),
ref_id = f'ref:{ref_id}' media_id, 'Downloading streaks playback API JSON',
url = self.STREAKS_URL_TEMPLATE % (project_id, ref_id)
self.write_debug(f'Streaks URL: {url}')
json_info = self._download_json(
url,
video_id,
'Downloading streaks.jp streams video info',
headers={ headers={
'origin': 'https://tver.jp/', 'Accept': 'application/json',
'referer': 'https://tver.jp/', 'Origin': 'https://players.streaks.jp',
**self.geo_verification_headers(), **self.geo_verification_headers(),
}, **(headers or {}),
) })
except ExtractorError as e:
formats = [] if isinstance(e.cause, HTTPError) and e.cause.status in {403, 404}:
subtitles = {} error = self._parse_json(e.cause.response.read().decode(), media_id, fatal=False)
audio_quality_func = qualities(('0', '1', '2')) message = traverse_obj(error, ('message', {str}))
code = traverse_obj(error, ('code', {str}))
for item in traverse_obj(json_info, ('sources'), default=[]): if code == 'REQUEST_FAILED':
m3u8_url = traverse_obj(item, ('src'), default=None) self.raise_geo_restricted(message, countries=self._GEO_COUNTRIES)
if not m3u8_url: elif code == 'MEDIA_NOT_FOUND':
raise ExtractorError(message, expected=True)
elif code or message:
raise ExtractorError(join_nonempty(code, message, delim=': '))
raise
streaks_id = response['id']
live_status = {
'clip': 'was_live',
'file': 'not_live',
'linear': 'is_live',
'live': 'is_live',
}.get(response.get('type'))
audio_quality_func = qualities(('1', '0'))
formats, subtitles = [], {}
drm_formats = False
for source in traverse_obj(response, ('sources', lambda _, v: v['src'])):
if source.get('key_systems'):
drm_formats = True
continue continue
item_formats, item_subtitles = self._extract_m3u8_formats_and_subtitles( src_url = source['src']
m3u8_url, is_live = live_status == 'is_live'
video_id, ext = mimetype2ext(source.get('type'))
'mp4',
m3u8_id='hls', if ext == 'm3u8':
headers={'origin': 'https://tver.jp/', 'referer': 'https://tver.jp/'}, if is_live and ssai:
note='Downloading streaks.jp m3u8 information', session_params = traverse_obj(
) self._download_json(
self._API_URL_TEMPLATE.format('ssai', project_id, streaks_id, '/ssai/session'),
media_id, 'Downloading session parameters',
headers={'Content-Type': 'application/json'}, # XXX: geo_verification_headers ?
data=json.dumps({'id': source['id']}).encode()),
(0, 'query', {urllib.parse.parse_qs}))
src_url = update_url_query(src_url, session_params)
fmts, subs = self._extract_m3u8_formats_and_subtitles(
src_url, media_id, 'mp4', m3u8_id='hls',
fatal=False, live=is_live, query=query)
for fmt in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none'):
if mobj := re.match(r'hls-[a-z]+_AUDIO-(?P<quality>\d)_\d+-', fmt['format_id']):
fmt['quality'] = audio_quality_func(mobj.group('quality'))
elif ext == 'mpd':
fmts, subs = self._extract_mpd_formats_and_subtitles(
src_url, media_id, mpd_id='dash', fatal=False)
else:
self.report_warning(f'Unsupported stream type: {ext}')
continue
for fmt in item_formats: formats.extend(fmts)
if mobj := re.match(r'hls-\w*?(?i:audio)-(?P<qual>\d)(?:_(?P<sub_qual>\d))?', fmt['format_id']): self._merge_subtitles(subs, target=subtitles)
fmt['quality'] = audio_quality_func(mobj.group('qual')) * ((-1) ** bool(mobj.group('sub_qual')))
if len(item_formats) > 0: if not formats and drm_formats:
formats.extend(item_formats) self.report_drm(media_id)
if len(item_subtitles) > 0: self._remove_duplicate_formats(formats)
subtitles.update(item_subtitles)
if len(formats) < 1: for subs in traverse_obj(response, (
raise ExtractorError('Failed to extract any m3u8 streams from streaks.jp video info') 'tracks', lambda _, v: v['kind'] in ('subtitles', 'captions') and url_or_none(v['src']),
)):
lang = traverse_obj(subs, ('srclang', {str.lower})) or 'ja'
subtitles.setdefault(lang, []).append({'url': subs['src']})
result.update( return {
{ 'id': streaks_id,
'id': video_id, 'display_id': media_id,
'channel_id': project_id,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
}, 'live_status': live_status,
) **traverse_obj(response, {
'channel_id': ('project_id', {str}),
duration = float_or_none(json_info.get('duration'), 1000) 'uploader_id': ('profile', {str}),
if duration: 'title': ('name', {str}),
result['duration'] = duration 'description': ('description', {str}, filter),
'duration': ('duration', {float_or_none}),
return result 'tags': ('tags', ..., {str}),
'thumbnails': (('poster', 'thumbnail'), 'src', {'url': {url_or_none}}),
'timestamp': ('created_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601}),
}),
}

Loading…
Cancel
Save