From a95757d3b7ca2741aeca4d5aea8e1792f361d5b4 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Fri, 4 Oct 2024 18:04:52 +0900 Subject: [PATCH 1/4] [extractor] parse CENC + Clear Key information in DASH manifests --- test/test_InfoExtractor.py | 104 ++++++++++++++++++++++++++++ test/testdata/mpd/clearkey_cenc.mpd | 29 ++++++++ test/testdata/mpd/w3c_pssh.mpd | 13 ++++ yt_dlp/extractor/common.py | 98 ++++++++++++++++++++++++-- 4 files changed, 239 insertions(+), 5 deletions(-) create mode 100644 test/testdata/mpd/clearkey_cenc.mpd create mode 100644 test/testdata/mpd/w3c_pssh.mpd diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 31e8f82448..3fee480f39 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1369,6 +1369,110 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ }, ], }, + ), ( + # Clear Key with CENC default_KID + 'clearkey_cenc', + 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd', # mpd_url + 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/', # mpd_base_url + [{ + 'manifest_url': 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd', + 'ext': 'mp4', + 'format_id': '1', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.64001f', + 'tbr': 389.802, + 'width': 512, + 'height': 288, + 'dash_cenc': { + 'laurl': 'https://drm-clearkey-testvectors.axtest.net/AcquireLicense', + 'key_ids': ['9eb4050de44b4802932e27d75083e266'], + }, + }, { + 'manifest_url': 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd', + 'ext': 'mp4', + 'format_id': '2', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.64001f', + 'tbr': 764.935, + 'width': 640, + 'height': 360, + 'dash_cenc': { + 'laurl': 'https://drm-clearkey-testvectors.axtest.net/AcquireLicense', + 'key_ids': ['9eb4050de44b4802932e27d75083e266'], + }, + }, { + 'manifest_url': 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd', + 'ext': 'mp4', + 'format_id': '3', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.640028', + 'tbr': 1120.439, + 'width': 852, + 'height': 480, + 'dash_cenc': { + 'laurl': 'https://drm-clearkey-testvectors.axtest.net/AcquireLicense', + 'key_ids': ['9eb4050de44b4802932e27d75083e266'], + }, + }, { + 'manifest_url': 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd', + 'ext': 'mp4', + 'format_id': '4', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.640032', + 'tbr': 1945.258, + 'width': 1280, + 'height': 720, + 'dash_cenc': { + 'laurl': 'https://drm-clearkey-testvectors.axtest.net/AcquireLicense', + 'key_ids': ['9eb4050de44b4802932e27d75083e266'], + }, + }, { + 'manifest_url': 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd', + 'ext': 'mp4', + 'format_id': '5', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.640033', + 'tbr': 2726.377, + 'width': 1920, + 'height': 1080, + 'dash_cenc': { + 'laurl': 'https://drm-clearkey-testvectors.axtest.net/AcquireLicense', + 'key_ids': ['9eb4050de44b4802932e27d75083e266'], + }, + }], + {}, + ), ( + # default CENC KID overridden via W3C PSSH box, no license server in manifest + 'w3c_pssh', + 'https://unknown/manifest.mpd', # mpd_url + 'https://unknown/', # mpd_base_url + [{ + 'manifest_url': 'https://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '1', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.64001f', + 'tbr': 389.802, + 'width': 512, + 'height': 288, + 'dash_cenc': { + 'key_ids': ['43215678123412341234123412341234'], + }, + 'has_drm': True, + }], + {}, ), ] diff --git a/test/testdata/mpd/clearkey_cenc.mpd b/test/testdata/mpd/clearkey_cenc.mpd new file mode 100644 index 0000000000..40f2123836 --- /dev/null +++ b/test/testdata/mpd/clearkey_cenc.mpd @@ -0,0 +1,29 @@ + + + + + + + + https://drm-clearkey-testvectors.axtest.net/AcquireLicense + + + + + + + + + + + diff --git a/test/testdata/mpd/w3c_pssh.mpd b/test/testdata/mpd/w3c_pssh.mpd new file mode 100644 index 0000000000..d72cd866cd --- /dev/null +++ b/test/testdata/mpd/w3c_pssh.mpd @@ -0,0 +1,13 @@ + + + + + + AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAFDIVZ4EjQSNBI0EjQSNBI0AAAAAA== + + + + + + + diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 3430036f4b..1069686a95 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -14,12 +14,14 @@ import netrc import os import random import re +import struct import subprocess import sys import time import types import urllib.parse import urllib.request +import uuid import xml.etree.ElementTree from ..compat import ( @@ -258,6 +260,15 @@ class InfoExtractor: * ffmpeg_args_out Extra arguments for ffmpeg downloader (output) * is_dash_periods Whether the format is a result of merging multiple DASH periods. + * dash_cenc A dictionary of DASH CENC decryption information + used by the native DASH downloader when set. + * laurl The Clear Key license server URL from which + CENC keys will be downloaded. + * key_ids List of key IDs (as hex) to request from the ClearKey + license server. + * key The CENC key (as hex) used to decrypt fragments. + If `key` is given, any license server URL and + key IDs will be ignored. RTMP formats can also have the additional fields: page_url, app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn, rtmp_protocol, rtmp_real_time @@ -2669,7 +2680,10 @@ class InfoExtractor: assert 'is_dash_periods' not in f, 'format already processed' f['is_dash_periods'] = True format_key = tuple(v for k, v in f.items() if k not in ( - ('format_id', 'fragments', 'manifest_stream_number'))) + ('format_id', 'fragments', 'manifest_stream_number', 'dash_cenc'))) + if 'dash_cenc' in f: + format_key = format_key + tuple( + tuple(v) if isinstance(v, list) else v for v in f['dash_cenc'].values()) if format_key not in formats: formats[format_key] = f elif 'fragments' in f: @@ -2703,8 +2717,18 @@ class InfoExtractor: def _add_ns(path): return self._xpath_ns(path, namespace) - def is_drm_protected(element): - return element.find(_add_ns('ContentProtection')) is not None + def extract_drm_info(element): + has_drm = False + cenc_info = {} + for cp_e in element.findall(_add_ns('ContentProtection')): + has_drm = True + self._extract_mpd_content_protection_info(cp_e, cenc_info) + info = {'dash_cenc': cenc_info} if cenc_info else {} + if has_drm and not ( + cenc_info.get('key') or cenc_info.get('laurl') and cenc_info.get('key_ids') + ): + info['has_drm'] = True + return info def extract_multisegment_info(element, ms_parent_info): ms_info = ms_parent_info.copy() @@ -2778,6 +2802,7 @@ class InfoExtractor: 'timescale': 1, }) for adaptation_set in period.findall(_add_ns('AdaptationSet')): + adaptation_set_drm_info = extract_drm_info(adaptation_set) adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info) for representation in adaptation_set.findall(_add_ns('Representation')): representation_attrib = adaptation_set.attrib.copy() @@ -2864,8 +2889,8 @@ class InfoExtractor: 'acodec': 'none', 'vcodec': 'none', } - if is_drm_protected(adaptation_set) or is_drm_protected(representation): - f['has_drm'] = True + f.update(adaptation_set_drm_info) + f.update(extract_drm_info(representation)) representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info) def prepare_template(template_name, identifiers): @@ -3026,6 +3051,69 @@ class InfoExtractor: period_entry['subtitles'][lang or 'und'].append(f) yield period_entry + def _extract_mpd_content_protection_info(self, cp_e, cenc_info): + """ + Extract supported DASH-CENC parameters for an MPD ContentProtection element. + + Called multiple times per extracted format in an MPD (once per ContentProtection element + within AdaptationSet and Representation elements). Subclasses may override this method + when necessary (such as when the Clear Key license server URL is provided separately + from the manifest or when an extractor needs to process the optional data section in W3C + PSSH boxes). + + Note that the `has_drm` flag will be set for any format that does not meet one or more + of these conditions: + + * Both `laurl` and `key_ids` are set (indicating the native DASH downloader should + use the specified Clear Key server URL to retreive the CENC key for this format. + * `key_id` is set (indicating the native DASH downloader should use the specified + CENC key for this format). + + References: + 1. DASH-IF Content Protection Identifiers + https://dashif.org/identifiers/content_protection/ + 2. DASH-IF Content Protection Guidelines + https://dashif.org/docs/IOP-Guidelines/DASH-IF-IOP-Part6-v5.0.0.pdf + 3. W3C "cenc" Initialization Data Format + https://w3c.github.io/encrypted-media/format-registry/initdata/cenc.html + """ + scheme_id = cp_e.get('schemeIdUri') + if scheme_id == 'urn:mpeg:dash:mp4protection:2011': + if cp_e.get('value') == 'cenc': + # ISO/IEC 23009-1 MPEG Common Encryption (CENC) + if not cenc_info.get('key_ids'): + try: + default_kid = uuid.UUID(cp_e.get('{urn:mpeg:cenc:2013}default_KID')).hex + cenc_info['key_ids'] = [default_kid] + except (ValueError, TypeError): + pass + elif scheme_id == 'urn:uuid:e2719d58-a985-b3c9-781a-b030af78d30e': + # Clear Key DASH-IF + for tag, ns in itertools.product( + ('Laurl', 'laurl'), + ('https://dashif.org/CPS', 'http://dashif.org/guidelines/clearKey'), + ): + url_e = cp_e.find(self._xpath_ns(tag, ns)) + if url_e is not None: + cenc_info['laurl'] = url_e.text + break + elif scheme_id == 'urn:uuid:1077efec-c0b2-4d02-ace3-3c1e52e2fb4b': + # W3C Common System ID + pssh_e = cp_e.find(self._xpath_ns('pssh', 'urn:mpeg:cenc:2013')) + if pssh_e is not None: + # W3C PSSH box (may contain Clear Key KIDs but can also be used + # to store KIDs for other DRM systems) + try: + pssh_box = base64.b64decode(pssh_e.text) + kid_count, = struct.unpack('!L', pssh_box[28:32]) + kids = [] + for i in range(kid_count): + kid = pssh_box[32 + i * 16:32 + (i + 1) * 16] + kids.append(kid.hex()) + cenc_info['key_ids'] = kids + except (ValueError, TypeError, struct.error): + pass + def _extract_ism_formats(self, *args, **kwargs): fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs) if subs: From 6b0ce3193961dafbd2ac8eb9a9d1df062b2aa03a Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sat, 5 Oct 2024 00:59:58 +0900 Subject: [PATCH 2/4] [fd/dash, pp/ffmpeg] support DASH CENC decryption --- yt_dlp/YoutubeDL.py | 10 +++++++ yt_dlp/downloader/dash.py | 46 ++++++++++++++++++++++++++++++++ yt_dlp/postprocessor/__init__.py | 1 + yt_dlp/postprocessor/ffmpeg.py | 26 +++++++++++++++--- 4 files changed, 79 insertions(+), 4 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4f45d7faf6..0e86fd7bcf 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -48,6 +48,7 @@ from .plugins import directories as plugin_directories from .postprocessor import _PLUGIN_CLASSES as plugin_pps from .postprocessor import ( EmbedThumbnailPP, + FFmpegCENCDecryptPP, FFmpegFixupDuplicateMoovPP, FFmpegFixupDurationPP, FFmpegFixupM3u8PP, @@ -3384,6 +3385,8 @@ class YoutubeDL: self.report_error(f'{msg}. Aborting') return + decrypter = FFmpegCENCDecryptPP(self) + info_dict.setdefault('__files_to_cenc_decrypt', []) if info_dict.get('requested_formats') is not None: old_ext = info_dict['ext'] if self.params.get('merge_output_format') is None: @@ -3464,8 +3467,12 @@ class YoutubeDL: downloaded.append(fname) partial_success, real_download = self.dl(fname, new_info) info_dict['__real_download'] = info_dict['__real_download'] or real_download + if new_info.get('dash_cenc', {}).get('key'): + info_dict['__files_to_cenc_decrypt'].append((fname, new_info['dash_cenc']['key'])) success = success and partial_success + if downloaded and info_dict['__files_to_cenc_decrypt'] and decrypter.available: + info_dict['__postprocessors'].append(decrypter) if downloaded and merger.available and not self.params.get('allow_unplayable_formats'): info_dict['__postprocessors'].append(merger) info_dict['__files_to_merge'] = downloaded @@ -3482,6 +3489,9 @@ class YoutubeDL: # So we should try to resume the download success, real_download = self.dl(temp_filename, info_dict) info_dict['__real_download'] = real_download + if info_dict.get('dash_cenc', {}).get('key') and decrypter.available: + info_dict['__postprocessors'].append(decrypter) + info_dict['__files_to_cenc_decrypt'] = [(temp_filename, info_dict['dash_cenc']['key'])] else: self.report_file_already_downloaded(dl_filename) diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py index afc79b6caf..84ff79c8af 100644 --- a/yt_dlp/downloader/dash.py +++ b/yt_dlp/downloader/dash.py @@ -1,8 +1,13 @@ +import base64 +import binascii +import json import time import urllib.parse from . import get_suitable_downloader from .fragment import FragmentFD +from ..networking import Request +from ..networking.exceptions import RequestError from ..utils import update_url_query, urljoin @@ -60,6 +65,9 @@ class DashSegmentsFD(FragmentFD): args.append([ctx, fragments_to_download, fmt]) + if 'dash_cenc' in info_dict and not info_dict['dash_cenc'].get('key'): + self._get_clearkey_cenc(info_dict) + return self.download_and_append_fragments_multiple(*args, is_fatal=lambda idx: idx == 0) def _resolve_fragments(self, fragments, ctx): @@ -88,3 +96,41 @@ class DashSegmentsFD(FragmentFD): 'index': i, 'url': fragment_url, } + + def _get_clearkey_cenc(self, info_dict): + dash_cenc = info_dict.get('dash_cenc', {}) + laurl = dash_cenc.get('laurl') + if not laurl: + self.report_error('No Clear Key license server URL for encrypted DASH stream') + return + key_ids = dash_cenc.get('key_ids') + if not key_ids: + self.report_error('No requested CENC KIDs for encrypted DASH stream') + return + payload = json.dumps({ + 'kids': [ + base64.urlsafe_b64encode(bytes.fromhex(k)).decode().rstrip('=') + for k in key_ids + ], + 'type': 'temporary', + }).encode() + try: + response = self.ydl.urlopen(Request( + laurl, data=payload, headers={'Content-Type': 'application/json'})) + data = json.loads(response.read()) + except (RequestError, json.JSONDecodeError) as err: + self.report_error(f'Failed to retrieve key from Clear Key license server: {err}') + return + keys = data.get('keys', []) + if len(keys) > 1: + self.report_warning('Clear Key license server returned multiple keys but only single key CENC is supported') + for key in keys: + k = key.get('k') + if k: + try: + dash_cenc['key'] = base64.urlsafe_b64decode(f'{k}==').hex() + info_dict['dash_cenc'] = dash_cenc + return + except (ValueError, binascii.Error): + pass + self.report_error('Clear key license server did not return any valid CENC keys') diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py index 164540b5db..8673724065 100644 --- a/yt_dlp/postprocessor/__init__.py +++ b/yt_dlp/postprocessor/__init__.py @@ -8,6 +8,7 @@ from .ffmpeg import ( FFmpegCopyStreamPP, FFmpegEmbedSubtitlePP, FFmpegExtractAudioPP, + FFmpegCENCDecryptPP, FFmpegFixupDuplicateMoovPP, FFmpegFixupDurationPP, FFmpegFixupM3u8PP, diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 164c46d143..6670a3d418 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -331,7 +331,7 @@ class FFmpegPostProcessor(PostProcessor): [(path, []) for path in input_paths], [(out_path, opts)], **kwargs) - def real_run_ffmpeg(self, input_path_opts, output_path_opts, *, expected_retcodes=(0,)): + def real_run_ffmpeg(self, input_path_opts, output_path_opts, *, prepend_opts=None, expected_retcodes=(0,)): self.check_version() oldest_mtime = min( @@ -342,6 +342,9 @@ class FFmpegPostProcessor(PostProcessor): if self.basename == 'ffmpeg': cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')] + if prepend_opts: + cmd += prepend_opts + def make_args(file, args, name, number): keys = [f'_{name}{number}', f'_{name}'] if name == 'o': @@ -857,12 +860,23 @@ class FFmpegMergerPP(FFmpegPostProcessor): return True +class FFmpegCENCDecryptPP(FFmpegPostProcessor): + @PostProcessor._restrict_to(images=False) + def run(self, info): + for filename, key in info.get('__files_to_cenc_decrypt', []): + temp_filename = prepend_extension(filename, 'temp') + self.to_screen(f'Decrypting "{filename}"') + self.run_ffmpeg(filename, temp_filename, self.stream_copy_opts(), prepend_opts=['-decryption_key', key]) + os.replace(temp_filename, filename) + return [], info + + class FFmpegFixupPostProcessor(FFmpegPostProcessor): - def _fixup(self, msg, filename, options): + def _fixup(self, msg, filename, options, prepend_opts=None): temp_filename = prepend_extension(filename, 'temp') self.to_screen(f'{msg} of "{filename}"') - self.run_ffmpeg(filename, temp_filename, options) + self.run_ffmpeg(filename, temp_filename, options, prepend_opts=prepend_opts) os.replace(temp_filename, filename) @@ -934,7 +948,11 @@ class FFmpegCopyStreamPP(FFmpegFixupPostProcessor): @PostProcessor._restrict_to(images=False) def run(self, info): - self._fixup(self.MESSAGE, info['filepath'], self.stream_copy_opts()) + self._fixup( + self.MESSAGE, + info['filepath'], + self.stream_copy_opts(), + ) return [], info From e0ce6eed9210b1f031d1f43a182e16837eec7f75 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sat, 5 Oct 2024 14:50:13 +0900 Subject: [PATCH 3/4] [extractor] Parse DASH-SEA content protection in DASH manifests --- test/test_InfoExtractor.py | 65 ++++++++++++++++++++ test/testdata/mpd/dash_sea.mpd | 109 +++++++++++++++++++++++++++++++++ yt_dlp/extractor/common.py | 55 +++++++++++------ 3 files changed, 211 insertions(+), 18 deletions(-) create mode 100644 test/testdata/mpd/dash_sea.mpd diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 3fee480f39..a5738ff8f9 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1473,6 +1473,71 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'has_drm': True, }], {}, + ), ( + # DASH SEA with AES-128-CBC + 'dash_sea', + 'https://unknown/manifest.mpd', # mpd_url + 'https://unknown/', # mpd_base_url + [{ + 'manifest_url': 'https://unknown/manifest.mpd', + 'ext': 'm4a', + 'format_id': '5_A_aac_eng_2_127999_2_1_1', + 'format_note': 'DASH audio', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'none', + 'tbr': 127.999, + 'hls_aes': { + 'uri': 'https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012', + 'iv': '0x7BD31E102B0CE9CCD39691782533656C', + }, + }, { + 'manifest_url': 'https://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '1_V_video_3', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.64001F', + 'tbr': 258.591, + 'width': 960, + 'height': 540, + 'hls_aes': { + 'uri': 'https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012', + 'iv': '0x7BD31E102B0CE9CCD39691782533656C', + }, + }, { + 'manifest_url': 'https://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '1_V_video_2', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.64001F', + 'tbr': 422.519, + 'width': 1280, + 'height': 720, + 'hls_aes': { + 'uri': 'https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012', + 'iv': '0x7BD31E102B0CE9CCD39691782533656C', + }, + }, { + 'manifest_url': 'https://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '1_V_video_1', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.640028', + 'tbr': 628.102, + 'width': 1920, + 'height': 1080, + 'hls_aes': { + 'uri': 'https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012', + 'iv': '0x7BD31E102B0CE9CCD39691782533656C', + }, + }], + {}, ), ] diff --git a/test/testdata/mpd/dash_sea.mpd b/test/testdata/mpd/dash_sea.mpd new file mode 100644 index 0000000000..0eeb9798d6 --- /dev/null +++ b/test/testdata/mpd/dash_sea.mpd @@ -0,0 +1,109 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 1069686a95..90b6641810 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -248,7 +248,9 @@ class InfoExtractor: * hls_aes A dictionary of HLS AES-128 decryption information used by the native HLS downloader to override the values in the media playlist when an '#EXT-X-KEY' tag - is present in the playlist: + is present in the playlist. Used by the native DASH downloader + when DASH-SEA with AES-128-CBC content protection is present + in the manifest.: * uri The URI from which the key will be downloaded * key The key (as hex) used to decrypt fragments. If `key` is given, any key URI will be ignored @@ -261,7 +263,8 @@ class InfoExtractor: * is_dash_periods Whether the format is a result of merging multiple DASH periods. * dash_cenc A dictionary of DASH CENC decryption information - used by the native DASH downloader when set. + used by the native DASH downloader when MPEG CENC content protection + is present in the manifest. * laurl The Clear Key license server URL from which CENC keys will be downloaded. * key_ids List of key IDs (as hex) to request from the ClearKey @@ -2680,10 +2683,11 @@ class InfoExtractor: assert 'is_dash_periods' not in f, 'format already processed' f['is_dash_periods'] = True format_key = tuple(v for k, v in f.items() if k not in ( - ('format_id', 'fragments', 'manifest_stream_number', 'dash_cenc'))) - if 'dash_cenc' in f: - format_key = format_key + tuple( - tuple(v) if isinstance(v, list) else v for v in f['dash_cenc'].values()) + ('format_id', 'fragments', 'manifest_stream_number', 'dash_cenc', 'hls_aes'))) + for k in ('dash_cenc', 'hls_aes'): + if k in f: + format_key = format_key + tuple( + tuple(v) if isinstance(v, list) else v for v in f[k].values()) if format_key not in formats: formats[format_key] = f elif 'fragments' in f: @@ -2718,15 +2722,13 @@ class InfoExtractor: return self._xpath_ns(path, namespace) def extract_drm_info(element): + info = {} has_drm = False - cenc_info = {} for cp_e in element.findall(_add_ns('ContentProtection')): has_drm = True - self._extract_mpd_content_protection_info(cp_e, cenc_info) - info = {'dash_cenc': cenc_info} if cenc_info else {} - if has_drm and not ( - cenc_info.get('key') or cenc_info.get('laurl') and cenc_info.get('key_ids') - ): + self._extract_mpd_content_protection_info(cp_e, info) + cenc_info = info.get('dash_cenc', {}) + if has_drm and not ('hls_aes' in info or cenc_info.get('key') or (cenc_info.get('laurl') and cenc_info.get('key_ids'))): info['has_drm'] = True return info @@ -3051,7 +3053,7 @@ class InfoExtractor: period_entry['subtitles'][lang or 'und'].append(f) yield period_entry - def _extract_mpd_content_protection_info(self, cp_e, cenc_info): + def _extract_mpd_content_protection_info(self, cp_e, info): """ Extract supported DASH-CENC parameters for an MPD ContentProtection element. @@ -3061,13 +3063,16 @@ class InfoExtractor: from the manifest or when an extractor needs to process the optional data section in W3C PSSH boxes). - Note that the `has_drm` flag will be set for any format that does not meet one or more - of these conditions: + Note that after all ContentProtection elements have been handled, the `has_drm` flag + will be set for any format that does not meet one or more of these conditions: - * Both `laurl` and `key_ids` are set (indicating the native DASH downloader should - use the specified Clear Key server URL to retreive the CENC key for this format. - * `key_id` is set (indicating the native DASH downloader should use the specified + * `dash_cenc` is set and both `laurl` and `key_ids` are set (indicating the native + DASH downloader should use the specified Clear Key server URL to retreive the CENC key for this format). + * `dash_cenc` is set and `key` is set (indicating the native DASH downloader should + use the specified CENC key for this format). + * `hls_aes` is set (indicating the native DASH downloader should use DASH SEA + AES-128-CBC decryption for this format). References: 1. DASH-IF Content Protection Identifiers @@ -3078,6 +3083,7 @@ class InfoExtractor: https://w3c.github.io/encrypted-media/format-registry/initdata/cenc.html """ scheme_id = cp_e.get('schemeIdUri') + cenc_info = info.get('dash_cenc', {}) if scheme_id == 'urn:mpeg:dash:mp4protection:2011': if cp_e.get('value') == 'cenc': # ISO/IEC 23009-1 MPEG Common Encryption (CENC) @@ -3113,6 +3119,19 @@ class InfoExtractor: cenc_info['key_ids'] = kids except (ValueError, TypeError, struct.error): pass + elif scheme_id == 'urn:mpeg:dash:sea:2012': + # ISO/IEC 23009-4 DASH Segment Encryption and Authentication (AES-128-CBC) + sea_ns = 'urn:mpeg:dash:schema:sea:2012' + se_e = cp_e.find(self._xpath_ns('SegmentEncryption', sea_ns)) + ks_e = cp_e.find(self._xpath_ns('KeySystem', sea_ns)) + crypto_e = cp_e.find(self._xpath_ns('CryptoPeriod', sea_ns)) + if (se_e is not None and se_e.get('schemeIdUri') == 'urn:mpeg:dash:sea:aes128-cbc:2013' + and ks_e is not None and ks_e.get('keySystemUri') == 'urn:mpeg:dash:sea:keysys:http:2013' + and crypto_e is not None and crypto_e.get('keyUriTemplate') and crypto_e.get('IV') + ): + info['hls_aes'] = {'uri': crypto_e.get('keyUriTemplate'), 'iv': crypto_e.get('IV')} + if cenc_info: + info['dash_cenc'] = cenc_info def _extract_ism_formats(self, *args, **kwargs): fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs) From bd62cdba1ae49820964fb2d338df7fc6165cbbf9 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sat, 5 Oct 2024 15:43:32 +0900 Subject: [PATCH 4/4] [fd/dash] support DASH SEA (AES-128-CBC) decryption --- yt_dlp/downloader/dash.py | 41 ++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py index 84ff79c8af..f1826333a0 100644 --- a/yt_dlp/downloader/dash.py +++ b/yt_dlp/downloader/dash.py @@ -8,7 +8,7 @@ from . import get_suitable_downloader from .fragment import FragmentFD from ..networking import Request from ..networking.exceptions import RequestError -from ..utils import update_url_query, urljoin +from ..utils import remove_start, traverse_obj, update_url_query, urljoin class DashSegmentsFD(FragmentFD): @@ -54,6 +54,25 @@ class DashSegmentsFD(FragmentFD): if extra_param_to_segment_url: extra_query = urllib.parse.parse_qs(extra_param_to_segment_url) + hls_aes = fmt.get('hls_aes', {}) + if hls_aes: + decrypt_info = {'METHOD', 'AES-128'} + key = hls_aes.get('key') + if key: + key = binascii.unhexlify(remove_start(key, '0x')) + assert len(key) in (16, 24, 32), 'Invalid length for HLS AES-128 key' + decrypt_info['KEY'] = key + iv = hls_aes.get('iv') + if iv: + iv = binascii.unhexlify(remove_start(iv, '0x').zfill(32)) + decrypt_info['IV'] = iv + uri = hls_aes.get('uri') + if uri: + if extra_query: + uri = update_url_query(uri, extra_query) + decrypt_info['URI'] = uri + ctx['decrypt_info'] = decrypt_info + fragments_to_download = self._get_fragments(fmt, ctx, extra_query) if real_downloader: @@ -65,8 +84,11 @@ class DashSegmentsFD(FragmentFD): args.append([ctx, fragments_to_download, fmt]) - if 'dash_cenc' in info_dict and not info_dict['dash_cenc'].get('key'): - self._get_clearkey_cenc(info_dict) + cenc_key = traverse_obj(info_dict, ('dash_cenc', 'key')) + cenc_key_ids = traverse_obj(info_dict, ('dash_cenc', 'key_ids')) + clearkey_laurl = traverse_obj(info_dict, ('dash_cenc', 'laurl')) + if not cenc_key and cenc_key_ids and clearkey_laurl: + self._get_clearkey_cenc(info_dict, clearkey_laurl, cenc_key_ids) return self.download_and_append_fragments_multiple(*args, is_fatal=lambda idx: idx == 0) @@ -95,18 +117,11 @@ class DashSegmentsFD(FragmentFD): 'fragment_count': fragment.get('fragment_count'), 'index': i, 'url': fragment_url, + 'decrypt_info': ctx.get('decrypt_info', {'METHOD': 'NONE'}), } - def _get_clearkey_cenc(self, info_dict): + def _get_clearkey_cenc(self, info_dict, laurl, key_ids): dash_cenc = info_dict.get('dash_cenc', {}) - laurl = dash_cenc.get('laurl') - if not laurl: - self.report_error('No Clear Key license server URL for encrypted DASH stream') - return - key_ids = dash_cenc.get('key_ids') - if not key_ids: - self.report_error('No requested CENC KIDs for encrypted DASH stream') - return payload = json.dumps({ 'kids': [ base64.urlsafe_b64encode(bytes.fromhex(k)).decode().rstrip('=') @@ -128,7 +143,7 @@ class DashSegmentsFD(FragmentFD): k = key.get('k') if k: try: - dash_cenc['key'] = base64.urlsafe_b64decode(f'{k}==').hex() + dash_cenc.update({'key': base64.urlsafe_b64decode(f'{k}==').hex()}) info_dict['dash_cenc'] = dash_cenc return except (ValueError, binascii.Error):