revert formatting changes, refactor/rewrite

pull/12659/head
bashonly 4 months ago committed by GitHub
parent 2b39e34b3f
commit f05f3a9ef5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -1,128 +1,113 @@
import datetime import datetime as dt
import json
import re import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none,
join_nonempty, join_nonempty,
mimetype2ext,
parse_iso8601,
qualities, qualities,
smuggle_url, smuggle_url,
str_or_none, str_or_none,
strip_or_none, strip_or_none,
traverse_obj,
update_url_query, update_url_query,
url_or_none,
) )
from ..utils.traversal import require, traverse_obj
class TVerIE(InfoExtractor): class TVerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature)/)+(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature)/)+(?P<id>[a-zA-Z0-9]+)'
_TESTS = [ _GEO_COUNTRIES = ['JP']
{ _GEO_BYPASS = False
'skip': 'videos are only available for 7 days', _TESTS = [{
'url': 'https://tver.jp/episodes/ep83nf3w4p', 'skip': 'videos are only available for 7 days',
'info_dict': { 'url': 'https://tver.jp/episodes/ep83nf3w4p',
'title': '家事ヤロウ!!! 売り場席巻のチーズSP財前直見×森泉親子の脱東京暮らし密着', 'info_dict': {
'description': 'md5:dc2c06b6acc23f1e7c730c513737719b', 'title': '家事ヤロウ!!! 売り場席巻のチーズSP財前直見×森泉親子の脱東京暮らし密着',
'series': '家事ヤロウ!!!', 'description': 'md5:dc2c06b6acc23f1e7c730c513737719b',
'episode': '売り場席巻のチーズSP財前直見×森泉親子の脱東京暮らし密着', 'series': '家事ヤロウ!!!',
'alt_title': '売り場席巻のチーズSP財前直見×森泉親子の脱東京暮らし密着', 'episode': '売り場席巻のチーズSP財前直見×森泉親子の脱東京暮らし密着',
'channel': 'テレビ朝日', 'alt_title': '売り場席巻のチーズSP財前直見×森泉親子の脱東京暮らし密着',
'id': 'ep83nf3w4p', 'channel': 'テレビ朝日',
'ext': 'mp4', 'id': 'ep83nf3w4p',
}, 'ext': 'mp4',
}, },
{ }, {
'url': 'https://tver.jp/corner/f0103888', 'url': 'https://tver.jp/corner/f0103888',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://tver.jp/lp/f0033031',
'only_matching': True,
}, {
'url': 'https://tver.jp/series/srtxft431v',
'info_dict': {
'id': 'srtxft431v',
'title': '名探偵コナン',
}, },
{ 'playlist': [
'url': 'https://tver.jp/lp/f0033031', {
'only_matching': True, 'md5': '779ffd97493ed59b0a6277ea726b389e',
}, 'info_dict': {
{ 'id': 'ref:conan-1137-241005',
'url': 'https://tver.jp/series/srtxft431v', 'ext': 'mp4',
'info_dict': { 'title': '名探偵コナン #1137「行列店、味変の秘密」',
'id': 'srtxft431v', 'uploader_id': '5330942432001',
'title': '名探偵コナン', 'tags': [],
}, 'channel': '読売テレビ',
'playlist': [ 'series': '名探偵コナン',
{ 'description': 'md5:601fccc1d2430d942a2c8068c4b33eb5',
'md5': '779ffd97493ed59b0a6277ea726b389e', 'episode': '#1137「行列店、味変の秘密」',
'info_dict': { 'duration': 1469.077,
'id': 'ref:conan-1137-241005', 'timestamp': 1728030405,
'ext': 'mp4', 'upload_date': '20241004',
'title': '名探偵コナン #1137「行列店、味変の秘密」', 'alt_title': '名探偵コナン #1137「行列店、味変の秘密」 読売テレビ 10月5日(土)放送分',
'uploader_id': '5330942432001', 'thumbnail': r're:https://.+\.jpg',
'tags': [],
'channel': '読売テレビ',
'series': '名探偵コナン',
'description': 'md5:601fccc1d2430d942a2c8068c4b33eb5',
'episode': '#1137「行列店、味変の秘密」',
'duration': 1469.077,
'timestamp': 1728030405,
'upload_date': '20241004',
'alt_title': '名探偵コナン #1137「行列店、味変の秘密」 読売テレビ 10月5日(土)放送分',
'thumbnail': r're:https://.+\.jpg',
},
}, },
], }],
}, }, {
{ 'url': 'https://tver.jp/series/sru35hwdd2',
'url': 'https://tver.jp/series/sru35hwdd2', 'info_dict': {
'info_dict': { 'id': 'sru35hwdd2',
'id': 'sru35hwdd2', 'title': '神回だけ見せます!',
'title': '神回だけ見せます!',
},
'playlist_count': 11,
}, },
{ 'playlist_count': 11,
'url': 'https://tver.jp/series/srkq2shp9d', }, {
'only_matching': True, 'url': 'https://tver.jp/series/srkq2shp9d',
}, 'only_matching': True,
] }]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
STREAKS_URL_TEMPLATE = 'https://playback.api.streaks.jp/v1/projects/%s/medias/%s'
_HEADERS = { _HEADERS = {
'x-tver-platform-type': 'web', 'x-tver-platform-type': 'web',
'origin': 'https://tver.jp/', 'Origin': 'https://tver.jp',
'referer': 'https://tver.jp/', 'Referer': 'https://tver.jp/',
} }
_PLATFORM_QUERY = {} _PLATFORM_QUERY = {}
def _real_initialize(self): def _real_initialize(self):
session_info = self._download_json( session_info = self._download_json(
'https://platform-api.tver.jp/v2/api/platform_users/browser/create', 'https://platform-api.tver.jp/v2/api/platform_users/browser/create',
None, None, 'Creating session', data=b'device_type=pc')
'Creating session', self._PLATFORM_QUERY = traverse_obj(session_info, ('result', {
data=b'device_type=pc', 'platform_uid': 'platform_uid',
) 'platform_token': 'platform_token',
self._PLATFORM_QUERY = traverse_obj( }))
session_info,
(
'result',
{
'platform_uid': 'platform_uid',
'platform_token': 'platform_token',
},
),
)
def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None): def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None):
return self._download_json( return self._download_json(
f'https://platform-api.tver.jp/service/api/{path}', f'https://platform-api.tver.jp/service/api/{path}', video_id, note,
video_id, fatal=fatal, headers=self._HEADERS, query={
note,
fatal=fatal,
headers=self._HEADERS,
query={
**self._PLATFORM_QUERY, **self._PLATFORM_QUERY,
**(query or {}), **(query or {}),
}, })
)
def _yield_episode_ids_for_series(self, series_id): def _yield_episode_ids_for_series(self, series_id):
seasons_info = self._download_json( seasons_info = self._download_json(
@ -132,76 +117,49 @@ class TVerIE(InfoExtractor):
headers=self._HEADERS, headers=self._HEADERS,
) )
for season_id in traverse_obj( for season_id in traverse_obj(
seasons_info, seasons_info, ('result', 'contents', lambda _, v: v['type'] == 'season', 'content', 'id', {str})):
('result', 'contents', lambda _, v: v['type'] == 'season', 'content', 'id', {str}),
):
episodes_info = self._call_platform_api( episodes_info = self._call_platform_api(
f'v1/callSeasonEpisodes/{season_id}', f'v1/callSeasonEpisodes/{season_id}', series_id, f'Downloading season {season_id} episodes info')
series_id, yield from traverse_obj(episodes_info, (
f'Downloading season {season_id} episodes info', 'result', 'contents', lambda _, v: v['type'] == 'episode', 'content', 'id', {str}))
)
yield from traverse_obj(
episodes_info,
(
'result',
'contents',
lambda _, v: v['type'] == 'episode',
'content',
'id',
{str},
),
)
def _real_extract(self, url): def _real_extract(self, url):
video_id, video_type = self._match_valid_url(url).group('id', 'type') video_id, video_type = self._match_valid_url(url).group('id', 'type')
backend = self._configuration_arg('backend', ['streaks'])[0]
if backend not in ('brightcove', 'streaks'):
raise ExtractorError(f'Invalid backend value: {backend}', expected=True)
if video_type == 'series': if video_type == 'series':
series_info = self._call_platform_api(f'v2/callSeries/{video_id}', video_id, 'Downloading series info') series_info = self._call_platform_api(
f'v2/callSeries/{video_id}', video_id, 'Downloading series info')
return self.playlist_from_matches( return self.playlist_from_matches(
self._yield_episode_ids_for_series(video_id), self._yield_episode_ids_for_series(video_id), video_id,
video_id,
traverse_obj(series_info, ('result', 'content', 'content', 'title', {str})), traverse_obj(series_info, ('result', 'content', 'content', 'title', {str})),
ie=TVerIE, ie=TVerIE, getter=lambda x: f'https://tver.jp/episodes/{x}')
getter=lambda x: f'https://tver.jp/episodes/{x}',
)
if video_type != 'episodes': if video_type != 'episodes':
webpage = self._download_webpage(url, video_id, note='Resolving to new URL') webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
video_id = self._match_id( video_id = self._match_id(self._search_regex(
self._search_regex( (r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
( webpage, 'url regex'))
r'canonical"\s*href="(https?://tver\.jp/[^"]+)"',
r'&link=(https?://tver\.jp/[^?&]+)[?&]',
),
webpage,
'url regex',
),
)
episode_info = self._call_platform_api( episode_info = self._call_platform_api(
f'v1/callEpisode/{video_id}', f'v1/callEpisode/{video_id}', video_id, 'Downloading episode info', fatal=False, query={
video_id,
'Downloading episode info',
fatal=False,
query={
'require_data': 'mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]', 'require_data': 'mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
}, })
) episode_content = traverse_obj(
episode_content = traverse_obj(episode_info, ('result', 'episode', 'content')) or {} episode_info, ('result', 'episode', 'content')) or {}
version = traverse_obj(episode_content, ('version', {str_or_none}), default='5') version = traverse_obj(episode_content, ('version', {str_or_none}), default='5')
video_info = self._download_json( video_info = self._download_json(
f'https://statics.tver.jp/content/episode/{video_id}.json', f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, 'Downloading video info',
video_id, query={'v': version}, headers={'Referer': 'https://tver.jp/'})
'Downloading video info',
query={'v': version},
headers={'Referer': 'https://tver.jp/'},
)
episode = strip_or_none(episode_content.get('title')) episode = strip_or_none(episode_content.get('title'))
series = str_or_none(episode_content.get('seriesTitle')) series = str_or_none(episode_content.get('seriesTitle'))
title = join_nonempty(series, episode, delim=' ') or str_or_none(video_info.get('title')) title = (
join_nonempty(series, episode, delim=' ')
or str_or_none(video_info.get('title')))
provider = str_or_none(episode_content.get('productionProviderName')) provider = str_or_none(episode_content.get('productionProviderName'))
onair_label = str_or_none(episode_content.get('broadcastDateLabel')) onair_label = str_or_none(episode_content.get('broadcastDateLabel'))
@ -210,8 +168,7 @@ class TVerIE(InfoExtractor):
'id': quality, 'id': quality,
'url': update_url_query( 'url': update_url_query(
f'https://statics.tver.jp/images/content/thumbnail/episode/{quality}/{video_id}.jpg', f'https://statics.tver.jp/images/content/thumbnail/episode/{quality}/{video_id}.jpg',
{'v': version}, {'v': version}),
),
'width': width, 'width': width,
'height': height, 'height': height,
} }
@ -223,39 +180,52 @@ class TVerIE(InfoExtractor):
] ]
] ]
data = { metadata = {
'title': title, 'title': title,
'series': series, 'series': series,
'episode': episode, 'episode': episode,
# an another title which is considered "full title" for some viewers # an another title which is considered "full title" for some viewers
'alt_title': join_nonempty(title, provider, onair_label, delim=' '), 'alt_title': join_nonempty(title, provider, onair_label, delim=' '),
'channel': provider, 'channel': provider,
'description': str_or_none(video_info.get('description')),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
**traverse_obj(video_info, {
'description': ('description', {str}),
'timestamp': ('viewStatus', 'startAt', {int_or_none}),
'episode_number': ('no', {int_or_none}),
}),
} }
ts = traverse_obj(video_info, ('viewStatus', 'startAt', {int}), default=None)
if ts:
data['timestamp'] = ts
episode_number = traverse_obj(video_info, ('no', {str_or_none}), default=None)
if episode_number:
data['episode_number'] = int(episode_number)
if onair_label: if onair_label:
data.update(self._format_broadcast_date(onair_label)) metadata.update(self._format_broadcast_date(onair_label))
backend = self._configuration_arg('backend', ['streaks'])[0]
if backend not in ('brightcove', 'streaks'):
raise ExtractorError(f'Invalid backend value: {backend}', expected=True)
if backend == 'brightcove': if backend == 'brightcove':
data = self._brightcove_backend(data, video_info) p_id = video_info['video']['accountID']
else: r_id = traverse_obj(video_info, (
data = self._streaks_backend(data, video_info, video_id) 'video', ('videoRefID', 'videoID'), {str}, any, {require('reference ID')}))
if not r_id.isdecimal():
r_id = f'ref:{r_id}'
return {
**metadata,
'_type': 'url_transparent',
'url': smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id),
{'geo_countries': ['JP']}),
'ie_key': 'BrightcoveNew',
}
return data ref_id = video_info['streaks']['videoRefID']
if not ref_id.startswith('ref:'):
ref_id = f'ref:{ref_id}'
return {
**self._extract_from_streaks_api(video_info['streaks']['projectID'], ref_id, {
'Origin': 'https://tver.jp',
'Referer': 'https://tver.jp/',
}),
**metadata,
'id': video_id,
}
def _format_broadcast_date(self, onair_label): def _format_broadcast_date(self, onair_label):
""" """
@ -293,17 +263,17 @@ class TVerIE(InfoExtractor):
if 'release_year' in data: if 'release_year' in data:
year = data['release_year'] year = data['release_year']
else: else:
year = datetime.datetime.now().year year = dt.datetime.now().year
dt = datetime.datetime.strptime( dt_ = dt.datetime.strptime(
f"{year}-{broadcast_date['month']}-{broadcast_date['day']}", f"{year}-{broadcast_date['month']}-{broadcast_date['day']}",
'%Y-%m-%d', '%Y-%m-%d',
) )
# if the date in the future, it means the broadcast date is in the previous year # if the date in the future, it means the broadcast date is in the previous year
# ref: https://github.com/yt-dlp/yt-dlp/pull/12282#issuecomment-2678132806 # ref: https://github.com/yt-dlp/yt-dlp/pull/12282#issuecomment-2678132806
if dt > datetime.datetime.now(): if dt_ > dt.datetime.now():
year -= 1 year -= 1
data['release_timestamp'] = datetime.datetime( data['release_timestamp'] = dt.datetime(
year=year, year=year,
month=int(broadcast_date['month']), month=int(broadcast_date['month']),
day=int(broadcast_date['day']), day=int(broadcast_date['day']),
@ -311,115 +281,111 @@ class TVerIE(InfoExtractor):
return data return data
def _brightcove_backend(self, result, video_info): # XXX: Remove everything below and subclass TVerIE from StreaksBaseIE when #12679 is merged
self.write_debug('Using Brightcove backend') _API_URL_TEMPLATE = 'https://{}.api.streaks.jp/v1/projects/{}/medias/{}{}'
p_id = video_info['video']['accountID'] def _extract_from_streaks_api(self, project_id, media_id, headers=None, query=None, ssai=False):
r_id = traverse_obj(video_info, ('video', ('videoRefID', 'videoID')), get_all=False) try:
response = self._download_json(
if not r_id: self._API_URL_TEMPLATE.format('playback', project_id, media_id, ''),
raise ExtractorError('Failed to extract reference ID for Brightcove') media_id, 'Downloading streaks playback API JSON',
headers={
if not r_id.isdigit(): 'Accept': 'application/json',
r_id = f'ref:{r_id}' 'Origin': 'https://players.streaks.jp',
**self.geo_verification_headers(),
result.update( **(headers or {}),
{ })
'_type': 'url_transparent', except ExtractorError as e:
'url': smuggle_url( if isinstance(e.cause, HTTPError) and e.cause.status in {403, 404}:
self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), error = self._parse_json(e.cause.response.read().decode(), media_id, fatal=False)
{'geo_countries': ['JP']}, message = traverse_obj(error, ('message', {str}))
), code = traverse_obj(error, ('code', {str}))
'ie_key': 'BrightcoveNew', if code == 'REQUEST_FAILED':
}, self.raise_geo_restricted(message, countries=self._GEO_COUNTRIES)
) elif code == 'MEDIA_NOT_FOUND':
raise ExtractorError(message, expected=True)
return result elif code or message:
raise ExtractorError(join_nonempty(code, message, delim=': '))
def _streaks_backend(self, result, video_info, video_id): raise
self.write_debug('Using streaks.jp backend')
streaks_id = response['id']
ref_id = traverse_obj(video_info, ('streaks', 'videoRefID'), get_all=False) live_status = {
project_id = traverse_obj(video_info, ('streaks', 'projectID'), get_all=False) 'clip': 'was_live',
'file': 'not_live',
if not ref_id: 'linear': 'is_live',
raise ExtractorError('Failed to extract reference ID for streaks.jp stream info') 'live': 'is_live',
}.get(response.get('type'))
if not project_id:
raise ExtractorError('Failed to extract project ID for streaks.jp stream info') audio_quality_func = qualities(('1', '0'))
formats, subtitles = [], {}
if not ref_id.startswith('ref:'): drm_formats = False
ref_id = f'ref:{ref_id}'
for source in traverse_obj(response, ('sources', lambda _, v: v['src'])):
url = self.STREAKS_URL_TEMPLATE % (project_id, ref_id) if source.get('key_systems'):
self.write_debug(f'Streaks URL: {url}') drm_formats = True
json_info = self._download_json(
url,
video_id,
'Downloading streaks.jp streams video info',
headers={
'origin': 'https://tver.jp/',
'referer': 'https://tver.jp/',
**self.geo_verification_headers(),
},
)
sources = traverse_obj(json_info, ('sources'), default=[])
formats = []
subtitles = {}
for item in sources:
m3u8_url = traverse_obj(item, ('src'), default=None)
if not m3u8_url:
continue continue
item_formats, item_subtitles = self._extract_m3u8_formats_and_subtitles( src_url = source['src']
m3u8_url, is_live = live_status == 'is_live'
video_id, ext = mimetype2ext(source.get('type'))
'mp4',
m3u8_id='hls', if ext == 'm3u8':
headers={'origin': 'https://tver.jp/', 'referer': 'https://tver.jp/'}, if is_live and ssai:
note='Downloading streaks.jp m3u8 information', session_params = traverse_obj(
) self._download_json(
self._API_URL_TEMPLATE.format('ssai', project_id, streaks_id, '/ssai/session'),
build_qualities = [] media_id, 'Downloading session parameters',
headers={'Content-Type': 'application/json'}, # XXX: geo_verification_headers ?
for f in item_formats: data=json.dumps({'id': source['id']}).encode()),
# hls-ts_AUDIO-1_1-pro_105ba7d8d0f4452bb452acf09466b2a5 (0, 'query', {urllib.parse.parse_qs}))
if mobj := re.search(r'hls-ts_AUDIO-(.+?)-', f['format_id']): src_url = update_url_query(src_url, session_params)
build_qualities.append(mobj.group(1))
fmts, subs = self._extract_m3u8_formats_and_subtitles(
if len(build_qualities) > 0: src_url, media_id, 'mp4', m3u8_id='hls',
# it seems best quality is 0_X. see ref for more info fatal=False, live=is_live, query=query)
# ref: https://github.com/yt-dlp/yt-dlp/issues/12643#issuecomment-2745263178
build_qualities.sort(key=lambda x: int(x.split('_')[0]), reverse=True) for fmt in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none'):
quality = qualities(tuple(build_qualities)) if mobj := re.match(r'hls-[a-z]+_AUDIO-(?P<quality>\d)_\d+-', fmt['format_id']):
fmt['quality'] = audio_quality_func(mobj.group('quality'))
for f in item_formats:
if mobj := re.search(r'hls-ts_AUDIO-(.+?)-', f['format_id']): elif ext == 'mpd':
f['quality'] = quality(mobj.group(1)) fmts, subs = self._extract_mpd_formats_and_subtitles(
src_url, media_id, mpd_id='dash', fatal=False)
if len(item_formats) > 0:
formats.extend(item_formats)
if len(item_subtitles) > 0: else:
subtitles.update(item_subtitles) self.report_warning(f'Unsupported stream type: {ext}')
continue
if len(formats) < 1:
raise ExtractorError('Failed to extract any m3u8 streams from streaks.jp video info')
result.update(
{
'id': video_id,
'formats': formats,
'subtitles': subtitles,
},
)
duration = float_or_none(json_info.get('duration'), 1000)
if duration:
result['duration'] = duration
return result formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
if not formats and drm_formats:
self.report_drm(media_id)
self._remove_duplicate_formats(formats)
for subs in traverse_obj(response, (
'tracks', lambda _, v: v['kind'] in ('subtitles', 'captions') and url_or_none(v['src']),
)):
lang = traverse_obj(subs, ('srclang', {str.lower})) or 'ja'
subtitles.setdefault(lang, []).append({'url': subs['src']})
return {
'id': streaks_id,
'display_id': media_id,
'channel_id': project_id,
'formats': formats,
'subtitles': subtitles,
'live_status': live_status,
**traverse_obj(response, {
'channel_id': ('project_id', {str}),
'uploader_id': ('profile', {str}),
'title': ('name', {str}),
'description': ('description', {str}, filter),
'duration': ('duration', {float_or_none}),
'tags': ('tags', ..., {str}),
'thumbnails': (('poster', 'thumbnail'), 'src', {'url': {url_or_none}}),
'timestamp': ('created_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601}),
}),
}

Loading…
Cancel
Save