From e0ce6eed9210b1f031d1f43a182e16837eec7f75 Mon Sep 17 00:00:00 2001 From: Peter Rowlands <peter@pmrowla.com> Date: Sat, 5 Oct 2024 14:50:13 +0900 Subject: [PATCH] [extractor] Parse DASH-SEA content protection in DASH manifests --- test/test_InfoExtractor.py | 65 ++++++++++++++++++++ test/testdata/mpd/dash_sea.mpd | 109 +++++++++++++++++++++++++++++++++ yt_dlp/extractor/common.py | 55 +++++++++++------ 3 files changed, 211 insertions(+), 18 deletions(-) create mode 100644 test/testdata/mpd/dash_sea.mpd diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 3fee480f39..a5738ff8f9 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1473,6 +1473,71 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'has_drm': True, }], {}, + ), ( + # DASH SEA with AES-128-CBC + 'dash_sea', + 'https://unknown/manifest.mpd', # mpd_url + 'https://unknown/', # mpd_base_url + [{ + 'manifest_url': 'https://unknown/manifest.mpd', + 'ext': 'm4a', + 'format_id': '5_A_aac_eng_2_127999_2_1_1', + 'format_note': 'DASH audio', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'none', + 'tbr': 127.999, + 'hls_aes': { + 'uri': 'https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012', + 'iv': '0x7BD31E102B0CE9CCD39691782533656C', + }, + }, { + 'manifest_url': 'https://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '1_V_video_3', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.64001F', + 'tbr': 258.591, + 'width': 960, + 'height': 540, + 'hls_aes': { + 'uri': 'https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012', + 'iv': '0x7BD31E102B0CE9CCD39691782533656C', + }, + }, { + 'manifest_url': 'https://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '1_V_video_2', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.64001F', + 'tbr': 422.519, + 'width': 1280, + 'height': 720, + 'hls_aes': { + 'uri': 'https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012', + 'iv': '0x7BD31E102B0CE9CCD39691782533656C', + }, + }, { + 'manifest_url': 'https://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '1_V_video_1', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.640028', + 'tbr': 628.102, + 'width': 1920, + 'height': 1080, + 'hls_aes': { + 'uri': 'https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012', + 'iv': '0x7BD31E102B0CE9CCD39691782533656C', + }, + }], + {}, ), ] diff --git a/test/testdata/mpd/dash_sea.mpd b/test/testdata/mpd/dash_sea.mpd new file mode 100644 index 0000000000..0eeb9798d6 --- /dev/null +++ b/test/testdata/mpd/dash_sea.mpd @@ -0,0 +1,109 @@ +<?xml version="1.0" encoding="utf-8"?> +<MPD + xmlns="urn:mpeg:dash:schema:mpd:2011" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" profiles="urn:mpeg:dash:profile:isoff-live:2011" type="static" + xmlns:sea="urn:mpeg:dash:schema:sea:2012" mediaPresentationDuration="PT3M32.949S" minBufferTime="PT3S"> + <Period> + <AdaptationSet id="1" group="5" profiles="ccff" bitstreamSwitching="false" segmentAlignment="true" contentType="audio" mimeType="audio/mp4" codecs="mp4a.40.2" lang="en"> + <ContentProtection schemeIdUri="urn:mpeg:dash:sea:2012"> + <sea:SegmentEncryption schemeIdUri="urn:mpeg:dash:sea:aes128-cbc:2013"/> + <sea:KeySystem keySystemUri="urn:mpeg:dash:sea:keysys:http:2013"/> + <sea:CryptoPeriod keyUriTemplate="https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012" IV="0x7BD31E102B0CE9CCD39691782533656C"/> + </ContentProtection> + <Label>aac_eng_2_127999_2_1</Label> + <SegmentTemplate timescale="10000000" media="QualityLevels($Bandwidth$)/Fragments(aac_eng_2_127999_2_1=$Time$,format=mpd-time-csf)" initialization="QualityLevels($Bandwidth$)/Fragments(aac_eng_2_127999_2_1=i,format=mpd-time-csf)"> + <SegmentTimeline> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333" r="1"/> + <S d="20053334"/> + <S d="20053333"/> + <S d="3840000"/> + </SegmentTimeline> + </SegmentTemplate> + <Representation id="5_A_aac_eng_2_127999_2_1_1" bandwidth="127999" audioSamplingRate="48000"/> + </AdaptationSet> + <AdaptationSet id="2" group="1" profiles="ccff" bitstreamSwitching="false" segmentAlignment="true" contentType="video" mimeType="video/mp4" codecs="avc1.640028" maxWidth="1920" maxHeight="1080" startWithSAP="1"> + <ContentProtection schemeIdUri="urn:mpeg:dash:sea:2012"> + <sea:SegmentEncryption schemeIdUri="urn:mpeg:dash:sea:aes128-cbc:2013"/> + <sea:KeySystem keySystemUri="urn:mpeg:dash:sea:keysys:http:2013"/> + <sea:CryptoPeriod keyUriTemplate="https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012" IV="0x7BD31E102B0CE9CCD39691782533656C"/> + </ContentProtection> + <SegmentTemplate timescale="10000000" media="QualityLevels($Bandwidth$)/Fragments(video=$Time$,format=mpd-time-csf)" initialization="QualityLevels($Bandwidth$)/Fragments(video=i,format=mpd-time-csf)"> + <SegmentTimeline> + <S d="20000000" r="105"/> + <S d="8666666"/> + </SegmentTimeline> + </SegmentTemplate> + <Representation id="1_V_video_1" bandwidth="628102" width="1920" height="1080"/> + <Representation id="1_V_video_2" bandwidth="422519" codecs="avc1.64001F" width="1280" height="720"/> + <Representation id="1_V_video_3" bandwidth="258591" codecs="avc1.64001F" width="960" height="540"/> + </AdaptationSet> + </Period> +</MPD> diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 1069686a95..90b6641810 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -248,7 +248,9 @@ class InfoExtractor: * hls_aes A dictionary of HLS AES-128 decryption information used by the native HLS downloader to override the values in the media playlist when an '#EXT-X-KEY' tag - is present in the playlist: + is present in the playlist. Used by the native DASH downloader + when DASH-SEA with AES-128-CBC content protection is present + in the manifest.: * uri The URI from which the key will be downloaded * key The key (as hex) used to decrypt fragments. If `key` is given, any key URI will be ignored @@ -261,7 +263,8 @@ class InfoExtractor: * is_dash_periods Whether the format is a result of merging multiple DASH periods. * dash_cenc A dictionary of DASH CENC decryption information - used by the native DASH downloader when set. + used by the native DASH downloader when MPEG CENC content protection + is present in the manifest. * laurl The Clear Key license server URL from which CENC keys will be downloaded. * key_ids List of key IDs (as hex) to request from the ClearKey @@ -2680,10 +2683,11 @@ class InfoExtractor: assert 'is_dash_periods' not in f, 'format already processed' f['is_dash_periods'] = True format_key = tuple(v for k, v in f.items() if k not in ( - ('format_id', 'fragments', 'manifest_stream_number', 'dash_cenc'))) - if 'dash_cenc' in f: - format_key = format_key + tuple( - tuple(v) if isinstance(v, list) else v for v in f['dash_cenc'].values()) + ('format_id', 'fragments', 'manifest_stream_number', 'dash_cenc', 'hls_aes'))) + for k in ('dash_cenc', 'hls_aes'): + if k in f: + format_key = format_key + tuple( + tuple(v) if isinstance(v, list) else v for v in f[k].values()) if format_key not in formats: formats[format_key] = f elif 'fragments' in f: @@ -2718,15 +2722,13 @@ class InfoExtractor: return self._xpath_ns(path, namespace) def extract_drm_info(element): + info = {} has_drm = False - cenc_info = {} for cp_e in element.findall(_add_ns('ContentProtection')): has_drm = True - self._extract_mpd_content_protection_info(cp_e, cenc_info) - info = {'dash_cenc': cenc_info} if cenc_info else {} - if has_drm and not ( - cenc_info.get('key') or cenc_info.get('laurl') and cenc_info.get('key_ids') - ): + self._extract_mpd_content_protection_info(cp_e, info) + cenc_info = info.get('dash_cenc', {}) + if has_drm and not ('hls_aes' in info or cenc_info.get('key') or (cenc_info.get('laurl') and cenc_info.get('key_ids'))): info['has_drm'] = True return info @@ -3051,7 +3053,7 @@ class InfoExtractor: period_entry['subtitles'][lang or 'und'].append(f) yield period_entry - def _extract_mpd_content_protection_info(self, cp_e, cenc_info): + def _extract_mpd_content_protection_info(self, cp_e, info): """ Extract supported DASH-CENC parameters for an MPD ContentProtection element. @@ -3061,13 +3063,16 @@ class InfoExtractor: from the manifest or when an extractor needs to process the optional data section in W3C PSSH boxes). - Note that the `has_drm` flag will be set for any format that does not meet one or more - of these conditions: + Note that after all ContentProtection elements have been handled, the `has_drm` flag + will be set for any format that does not meet one or more of these conditions: - * Both `laurl` and `key_ids` are set (indicating the native DASH downloader should - use the specified Clear Key server URL to retreive the CENC key for this format. - * `key_id` is set (indicating the native DASH downloader should use the specified + * `dash_cenc` is set and both `laurl` and `key_ids` are set (indicating the native + DASH downloader should use the specified Clear Key server URL to retreive the CENC key for this format). + * `dash_cenc` is set and `key` is set (indicating the native DASH downloader should + use the specified CENC key for this format). + * `hls_aes` is set (indicating the native DASH downloader should use DASH SEA + AES-128-CBC decryption for this format). References: 1. DASH-IF Content Protection Identifiers @@ -3078,6 +3083,7 @@ class InfoExtractor: https://w3c.github.io/encrypted-media/format-registry/initdata/cenc.html """ scheme_id = cp_e.get('schemeIdUri') + cenc_info = info.get('dash_cenc', {}) if scheme_id == 'urn:mpeg:dash:mp4protection:2011': if cp_e.get('value') == 'cenc': # ISO/IEC 23009-1 MPEG Common Encryption (CENC) @@ -3113,6 +3119,19 @@ class InfoExtractor: cenc_info['key_ids'] = kids except (ValueError, TypeError, struct.error): pass + elif scheme_id == 'urn:mpeg:dash:sea:2012': + # ISO/IEC 23009-4 DASH Segment Encryption and Authentication (AES-128-CBC) + sea_ns = 'urn:mpeg:dash:schema:sea:2012' + se_e = cp_e.find(self._xpath_ns('SegmentEncryption', sea_ns)) + ks_e = cp_e.find(self._xpath_ns('KeySystem', sea_ns)) + crypto_e = cp_e.find(self._xpath_ns('CryptoPeriod', sea_ns)) + if (se_e is not None and se_e.get('schemeIdUri') == 'urn:mpeg:dash:sea:aes128-cbc:2013' + and ks_e is not None and ks_e.get('keySystemUri') == 'urn:mpeg:dash:sea:keysys:http:2013' + and crypto_e is not None and crypto_e.get('keyUriTemplate') and crypto_e.get('IV') + ): + info['hls_aes'] = {'uri': crypto_e.get('keyUriTemplate'), 'iv': crypto_e.get('IV')} + if cenc_info: + info['dash_cenc'] = cenc_info def _extract_ism_formats(self, *args, **kwargs): fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)