diff --git a/yt_dlp/extractor/streaks.py b/yt_dlp/extractor/streaks.py index e8494cf598..c7c854ed58 100644 --- a/yt_dlp/extractor/streaks.py +++ b/yt_dlp/extractor/streaks.py @@ -1,4 +1,5 @@ import json +import re import urllib.parse from .common import InfoExtractor @@ -7,9 +8,11 @@ from ..utils import ( ExtractorError, filter_dict, float_or_none, + join_nonempty, mimetype2ext, parse_iso8601, - str_or_none, + qualities, + unsmuggle_url, update_url_query, url_or_none, ) @@ -19,103 +22,118 @@ from ..utils.traversal import traverse_obj class StreaksBaseIE(InfoExtractor): _API_URL_TEMPLATE = 'https://{}.api.streaks.jp/v1/projects/{}/medias/{}{}' _GEO_COUNTRIES = ['JP'] + _GEO_BYPASS = False - def _parse_streaks_metadata(self, project_id, media_id, headers=None, query=None, ssai=False): + def _extract_from_streaks_api(self, project_id, media_id, headers=None, query=None, ssai=False): try: - streaks = self._download_json( + response = self._download_json( self._API_URL_TEMPLATE.format('playback', project_id, media_id, ''), - media_id, headers=filter_dict({ - 'Content-Type': 'application/json', + media_id, 'Downloading streaks playback API JSON', + headers={ + 'Accept': 'application/json', 'Origin': 'https://players.streaks.jp', **self.geo_verification_headers(), - } | (headers or {})), - ) + **(headers or {}), + }) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status in {403, 404}: - error = self._parse_json(e.cause.response.read().decode(), media_id) - message = error.get('message') - if error.get('code') == 'REQUEST_FAILED': - self.raise_geo_restricted(message, self._GEO_COUNTRIES) - elif error.get('code') == 'MEDIA_NOT_FOUND': + error = self._parse_json(e.cause.response.read().decode(), media_id, fatal=False) + message = traverse_obj(error, ('message', {str})) + code = traverse_obj(error, ('code', {str})) + if code == 'REQUEST_FAILED': + self.raise_geo_restricted(message, countries=self._GEO_COUNTRIES) + elif code == 'MEDIA_NOT_FOUND': raise ExtractorError(message, expected=True) - raise ExtractorError(message) + elif code or message: + raise ExtractorError(join_nonempty(code, message, delim=': ')) raise + streaks_id = response['id'] live_status = { 'clip': 'was_live', 'file': 'not_live', 'linear': 'is_live', 'live': 'is_live', - }[streaks['type']] + }.get(response.get('type')) + audio_quality_func = qualities(('1', '0')) formats, subtitles = [], {} - for source in streaks.get('sources', []): + drm_formats = False + + for source in traverse_obj(response, ('sources', lambda _, v: v['src'])): + if source.get('key_systems'): + drm_formats = True + continue + + src_url = source['src'] + is_live = live_status == 'is_live' ext = mimetype2ext(source.get('type')) - has_drm = bool(source.get('key_systems')) - if src := source.get('src'): - if ext == 'm3u8': - if is_live := live_status == 'is_live' and ssai: - session = dict(traverse_obj(self._download_json( - self._API_URL_TEMPLATE.format( - 'ssai', project_id, streaks['id'], '/ssai/session'), - media_id, headers={ - 'Content-Type': 'application/json', - }, data=json.dumps({ - 'id': streaks['sources'][0]['id'], - }).encode(), - ), (0, 'query', {urllib.parse.parse_qsl}))) - src = update_url_query(src, session) - - fmts, subs = self._extract_m3u8_formats_and_subtitles( - src, media_id, 'mp4', m3u8_id='hls', - fatal=False, live=is_live, query=query or {}) - elif ext == 'mpd': - fmts, subs = self._extract_mpd_formats_and_subtitles( - src, media_id, mpd_id='dash', fatal=False) - else: - raise ExtractorError(f'Unsupported type: {ext}') - for n, f in enumerate(fmts): - if f.get('vcodec') == 'none': - f['quality'] = -n - if has_drm: - f['has_drm'] = True + + if ext == 'm3u8': + if is_live and ssai: + session_params = traverse_obj( + self._download_json( + self._API_URL_TEMPLATE.format('ssai', project_id, streaks_id, '/ssai/session'), + media_id, 'Downloading session parameters', + headers={'Content-Type': 'application/json'}, # XXX: geo_verification_headers ? + data=json.dumps({'id': source['id']}).encode()), + (0, 'query', {urllib.parse.parse_qs})) + src_url = update_url_query(src_url, session_params) + + fmts, subs = self._extract_m3u8_formats_and_subtitles( + src_url, media_id, 'mp4', m3u8_id='hls', + fatal=False, live=is_live, query=query) + + for fmt in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none'): + if mobj := re.match(r'hls-[a-z]+_AUDIO-(?P\d)_\d+-', fmt['format_id']): + fmt['quality'] = audio_quality_func(mobj.group('quality')) + + elif ext == 'mpd': + fmts, subs = self._extract_mpd_formats_and_subtitles( + src_url, media_id, mpd_id='dash', fatal=False) + + else: + self.report_warning(f'Unsupported stream type: {ext}') + continue + formats.extend(fmts) - subtitles = self._merge_subtitles(subtitles, subs) + self._merge_subtitles(subs, target=subtitles) + + if not formats and drm_formats: + self.report_drm(media_id) + self._remove_duplicate_formats(formats) - if not formats: - self.raise_no_formats('This content is currently unavailable', True, media_id) - for track in streaks.get('tracks', []): - if track.get('kind') == 'subtitles' and (src := traverse_obj(track, ('src', {url_or_none}))): - lang = (track.get('srclang') or 'ja').lower() - subtitles.setdefault(lang, []).append({'url': src}) + for subs in traverse_obj(response, ( + 'tracks', lambda _, v: v['kind'] in ('subtitles', 'captions') and url_or_none(v['src']), + )): + lang = traverse_obj(subs, ('srclang', {str.lower})) or 'ja' + subtitles.setdefault(lang, []).append({'url': subs['src']}) return { + 'id': streaks_id, 'display_id': media_id, + 'channel_id': project_id, 'formats': formats, - 'live_status': live_status, 'subtitles': subtitles, - **traverse_obj(streaks, { - 'id': (('id', ('ref_id', {lambda x: f'ref:{x}'})), {str_or_none}, filter, any), + 'live_status': live_status, + **traverse_obj(response, { + 'channel_id': ('project_id', {str}), + 'uploader_id': ('profile', {str}), 'title': ('name', {str}), - 'channel_id': ('channel_id', {str_or_none}), 'description': ('description', {str}, filter), 'duration': ('duration', {float_or_none}), - 'episode_id': ('program_id', {str_or_none}), 'tags': ('tags', ..., {str}), 'thumbnails': (('poster', 'thumbnail'), 'src', {'url': {url_or_none}}), - 'timestamp': ('updated_at', {parse_iso8601}), - 'uploader': ('project_id', {str_or_none}), + 'timestamp': ('created_at', {parse_iso8601}), + 'modified_timestamp': ('updated_at', {parse_iso8601}), }), } class StreaksIE(StreaksBaseIE): - IE_NAME = 'streaks' - IE_DESC = 'STREAKS' - _VALID_URL = [ - r'https?://players\.streaks\.jp/(?P[\w-]+)/(?P\w+)/index\.html\?m=(?P(?:ref:)?[\w-]+)', + r'https?://players\.streaks\.jp/(?P[\w-]+)/[\da-f]+/index\.html\?(?:[^#]+&)?m=(?P(?:ref:)?[\w-]+)', r'https?://playback\.api\.streaks\.jp/v1/projects/(?P[\w-]+)/medias/(?P(?:ref:)?[\w-]+)', ] _TESTS = [{ @@ -129,7 +147,7 @@ class StreaksIE(StreaksBaseIE): 'live_status': 'not_live', 'timestamp': 1690356180, 'upload_date': '20230726', - 'uploader': 'tipness', + 'channel_id': 'tipness', 'uploader_id': '08155cd19dc14c12bebefb69b92eafcc', }, }, { @@ -144,24 +162,9 @@ class StreaksIE(StreaksBaseIE): 'thumbnail': r're:https?://.+\.jpg', 'timestamp': 1741586302, 'upload_date': '20250310', - 'uploader': 'ktv-web', + 'channel_id': 'ktv-web', 'uploader_id': '0298e8964c164ab384c07ef6e08c444b', }, - }, { - 'url': 'https://players.streaks.jp/sp-jbc/a12d7ee0f40c49d6a0a2bff520639677/index.html?m=5f89c62f37ee4a68be8e6e3b1396c7d8', - 'info_dict': { - 'id': '5f89c62f37ee4a68be8e6e3b1396c7d8', - 'ext': 'mp4', - 'title': '30715小田井涼平のあい旅#58.mp4', - 'display_id': '5f89c62f37ee4a68be8e6e3b1396c7d8', - 'duration': 3420.017, - 'live_status': 'not_live', - 'timestamp': 1710741433, - 'upload_date': '20240318', - 'uploader': 'sp-jbc', - 'uploader_id': 'a12d7ee0f40c49d6a0a2bff520639677', - }, - 'skip': 'DRM Protected', }, { 'url': 'https://playback.api.streaks.jp/v1/projects/ktv-web/medias/b5411938e1e5435dac71edf829dd4813', 'info_dict': { @@ -172,7 +175,7 @@ class StreaksIE(StreaksBaseIE): 'live_status': 'not_live', 'thumbnail': r're:https?://.+\.jpg', 'timestamp': 1737522999, - 'uploader': 'ktv-web', + 'channel_id': 'ktv-web', 'upload_date': '20250122', }, }, { @@ -186,7 +189,7 @@ class StreaksIE(StreaksBaseIE): 'duration': 12960.0, 'live_status': 'was_live', 'timestamp': 1722896263, - 'uploader': 'tver-olympic', + 'channel_id': 'tver-olympic', 'upload_date': '20240805', }, }, { @@ -199,19 +202,24 @@ class StreaksIE(StreaksBaseIE): 'display_id': 'ref:simul-02', 'live_status': 'is_live', 'timestamp': 1730339858, - 'uploader': 'tbs', + 'channel_id': 'tbs', 'upload_date': '20241031', }, + }, { + # DRM protected + 'url': 'https://players.streaks.jp/sp-jbc/a12d7ee0f40c49d6a0a2bff520639677/index.html?m=5f89c62f37ee4a68be8e6e3b1396c7d8', + 'only_matching': True, }] def _real_extract(self, url): - match = self._match_valid_url(url).groupdict() - project_id, uploader_id, media_id = ( - match.get(k) for k in ('project_id', 'uploader_id', 'media_id')) + url, smuggled_data = unsmuggle_url(url, {}) + project_id, media_id = self._match_valid_url(url).group('project_id', 'media_id') - return { - **self._parse_streaks_metadata(project_id, media_id, headers={ - 'X-Streaks-Api-Key': self._configuration_arg('x_streaks_api_key', [None])[0], - }), - 'uploader_id': uploader_id if uploader_id else None, - } + return self._extract_from_streaks_api(project_id, media_id, headers=filter_dict({ + 'X-Streaks-Api-Key': self._configuration_arg( + 'api_key', [smuggled_data.get('api_key')], casesense=True)[0], + **traverse_obj(smuggled_data, { + 'Origin': 'Origin', + 'Referer': 'Referer', + }, casesense=False), + }))