From e0ce6eed9210b1f031d1f43a182e16837eec7f75 Mon Sep 17 00:00:00 2001
From: Peter Rowlands <peter@pmrowla.com>
Date: Sat, 5 Oct 2024 14:50:13 +0900
Subject: [PATCH] [extractor] Parse DASH-SEA content protection in DASH
 manifests

---
 test/test_InfoExtractor.py     |  65 ++++++++++++++++++++
 test/testdata/mpd/dash_sea.mpd | 109 +++++++++++++++++++++++++++++++++
 yt_dlp/extractor/common.py     |  55 +++++++++++------
 3 files changed, 211 insertions(+), 18 deletions(-)
 create mode 100644 test/testdata/mpd/dash_sea.mpd

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 3fee480f39..a5738ff8f9 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -1473,6 +1473,71 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'has_drm': True,
                 }],
                 {},
+            ), (
+                # DASH SEA with AES-128-CBC
+                'dash_sea',
+                'https://unknown/manifest.mpd',  # mpd_url
+                'https://unknown/',  # mpd_base_url
+                [{
+                    'manifest_url': 'https://unknown/manifest.mpd',
+                    'ext': 'm4a',
+                    'format_id': '5_A_aac_eng_2_127999_2_1_1',
+                    'format_note': 'DASH audio',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'none',
+                    'tbr': 127.999,
+                    'hls_aes': {
+                        'uri': 'https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012',
+                        'iv': '0x7BD31E102B0CE9CCD39691782533656C',
+                    },
+                }, {
+                    'manifest_url': 'https://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '1_V_video_3',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.64001F',
+                    'tbr': 258.591,
+                    'width': 960,
+                    'height': 540,
+                    'hls_aes': {
+                        'uri': 'https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012',
+                        'iv': '0x7BD31E102B0CE9CCD39691782533656C',
+                    },
+                }, {
+                    'manifest_url': 'https://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '1_V_video_2',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.64001F',
+                    'tbr': 422.519,
+                    'width': 1280,
+                    'height': 720,
+                    'hls_aes': {
+                        'uri': 'https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012',
+                        'iv': '0x7BD31E102B0CE9CCD39691782533656C',
+                    },
+                }, {
+                    'manifest_url': 'https://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '1_V_video_1',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.640028',
+                    'tbr': 628.102,
+                    'width': 1920,
+                    'height': 1080,
+                    'hls_aes': {
+                        'uri': 'https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012',
+                        'iv': '0x7BD31E102B0CE9CCD39691782533656C',
+                    },
+                }],
+                {},
             ),
         ]
 
diff --git a/test/testdata/mpd/dash_sea.mpd b/test/testdata/mpd/dash_sea.mpd
new file mode 100644
index 0000000000..0eeb9798d6
--- /dev/null
+++ b/test/testdata/mpd/dash_sea.mpd
@@ -0,0 +1,109 @@
+<?xml version="1.0" encoding="utf-8"?>
+<MPD
+	xmlns="urn:mpeg:dash:schema:mpd:2011"
+	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" profiles="urn:mpeg:dash:profile:isoff-live:2011" type="static"
+	xmlns:sea="urn:mpeg:dash:schema:sea:2012" mediaPresentationDuration="PT3M32.949S" minBufferTime="PT3S">
+	<Period>
+		<AdaptationSet id="1" group="5" profiles="ccff" bitstreamSwitching="false" segmentAlignment="true" contentType="audio" mimeType="audio/mp4" codecs="mp4a.40.2" lang="en">
+			<ContentProtection schemeIdUri="urn:mpeg:dash:sea:2012">
+				<sea:SegmentEncryption schemeIdUri="urn:mpeg:dash:sea:aes128-cbc:2013"/>
+				<sea:KeySystem keySystemUri="urn:mpeg:dash:sea:keysys:http:2013"/>
+				<sea:CryptoPeriod keyUriTemplate="https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012" IV="0x7BD31E102B0CE9CCD39691782533656C"/>
+			</ContentProtection>
+			<Label>aac_eng_2_127999_2_1</Label>
+			<SegmentTemplate timescale="10000000" media="QualityLevels($Bandwidth$)/Fragments(aac_eng_2_127999_2_1=$Time$,format=mpd-time-csf)" initialization="QualityLevels($Bandwidth$)/Fragments(aac_eng_2_127999_2_1=i,format=mpd-time-csf)">
+				<SegmentTimeline>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333" r="1"/>
+					<S d="20053334"/>
+					<S d="20053333"/>
+					<S d="3840000"/>
+				</SegmentTimeline>
+			</SegmentTemplate>
+			<Representation id="5_A_aac_eng_2_127999_2_1_1" bandwidth="127999" audioSamplingRate="48000"/>
+		</AdaptationSet>
+		<AdaptationSet id="2" group="1" profiles="ccff" bitstreamSwitching="false" segmentAlignment="true" contentType="video" mimeType="video/mp4" codecs="avc1.640028" maxWidth="1920" maxHeight="1080" startWithSAP="1">
+			<ContentProtection schemeIdUri="urn:mpeg:dash:sea:2012">
+				<sea:SegmentEncryption schemeIdUri="urn:mpeg:dash:sea:aes128-cbc:2013"/>
+				<sea:KeySystem keySystemUri="urn:mpeg:dash:sea:keysys:http:2013"/>
+				<sea:CryptoPeriod keyUriTemplate="https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012" IV="0x7BD31E102B0CE9CCD39691782533656C"/>
+			</ContentProtection>
+			<SegmentTemplate timescale="10000000" media="QualityLevels($Bandwidth$)/Fragments(video=$Time$,format=mpd-time-csf)" initialization="QualityLevels($Bandwidth$)/Fragments(video=i,format=mpd-time-csf)">
+				<SegmentTimeline>
+					<S d="20000000" r="105"/>
+					<S d="8666666"/>
+				</SegmentTimeline>
+			</SegmentTemplate>
+			<Representation id="1_V_video_1" bandwidth="628102" width="1920" height="1080"/>
+			<Representation id="1_V_video_2" bandwidth="422519" codecs="avc1.64001F" width="1280" height="720"/>
+			<Representation id="1_V_video_3" bandwidth="258591" codecs="avc1.64001F" width="960" height="540"/>
+		</AdaptationSet>
+	</Period>
+</MPD>
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 1069686a95..90b6641810 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -248,7 +248,9 @@ class InfoExtractor:
                     * hls_aes    A dictionary of HLS AES-128 decryption information
                                  used by the native HLS downloader to override the
                                  values in the media playlist when an '#EXT-X-KEY' tag
-                                 is present in the playlist:
+                                 is present in the playlist. Used by the native DASH downloader
+                                 when DASH-SEA with AES-128-CBC content protection is present
+                                 in the manifest.:
                                  * uri  The URI from which the key will be downloaded
                                  * key  The key (as hex) used to decrypt fragments.
                                         If `key` is given, any key URI will be ignored
@@ -261,7 +263,8 @@ class InfoExtractor:
                     * is_dash_periods  Whether the format is a result of merging
                                  multiple DASH periods.
                     * dash_cenc  A dictionary of DASH CENC decryption information
-                                 used by the native DASH downloader when set.
+                                 used by the native DASH downloader when MPEG CENC content protection
+                                 is present in the manifest.
                                  * laurl    The Clear Key license server URL from which
                                             CENC keys will be downloaded.
                                  * key_ids  List of key IDs (as hex) to request from the ClearKey
@@ -2680,10 +2683,11 @@ class InfoExtractor:
                 assert 'is_dash_periods' not in f, 'format already processed'
                 f['is_dash_periods'] = True
                 format_key = tuple(v for k, v in f.items() if k not in (
-                    ('format_id', 'fragments', 'manifest_stream_number', 'dash_cenc')))
-                if 'dash_cenc' in f:
-                    format_key = format_key + tuple(
-                        tuple(v) if isinstance(v, list) else v for v in f['dash_cenc'].values())
+                    ('format_id', 'fragments', 'manifest_stream_number', 'dash_cenc', 'hls_aes')))
+                for k in ('dash_cenc', 'hls_aes'):
+                    if k in f:
+                        format_key = format_key + tuple(
+                            tuple(v) if isinstance(v, list) else v for v in f[k].values())
                 if format_key not in formats:
                     formats[format_key] = f
                 elif 'fragments' in f:
@@ -2718,15 +2722,13 @@ class InfoExtractor:
             return self._xpath_ns(path, namespace)
 
         def extract_drm_info(element):
+            info = {}
             has_drm = False
-            cenc_info = {}
             for cp_e in element.findall(_add_ns('ContentProtection')):
                 has_drm = True
-                self._extract_mpd_content_protection_info(cp_e, cenc_info)
-            info = {'dash_cenc': cenc_info} if cenc_info else {}
-            if has_drm and not (
-                cenc_info.get('key') or cenc_info.get('laurl') and cenc_info.get('key_ids')
-            ):
+                self._extract_mpd_content_protection_info(cp_e, info)
+            cenc_info = info.get('dash_cenc', {})
+            if has_drm and not ('hls_aes' in info or cenc_info.get('key') or (cenc_info.get('laurl') and cenc_info.get('key_ids'))):
                 info['has_drm'] = True
             return info
 
@@ -3051,7 +3053,7 @@ class InfoExtractor:
                         period_entry['subtitles'][lang or 'und'].append(f)
             yield period_entry
 
-    def _extract_mpd_content_protection_info(self, cp_e, cenc_info):
+    def _extract_mpd_content_protection_info(self, cp_e, info):
         """
         Extract supported DASH-CENC parameters for an MPD ContentProtection element.
 
@@ -3061,13 +3063,16 @@ class InfoExtractor:
         from the manifest or when an extractor needs to process the optional data section in W3C
         PSSH boxes).
 
-        Note that the `has_drm` flag will be set for any format that does not meet one or more
-        of these conditions:
+        Note that after all ContentProtection elements have been handled, the `has_drm` flag
+        will be set for any format that does not meet one or more of these conditions:
 
-            * Both `laurl` and `key_ids` are set (indicating the native DASH downloader should
-               use the specified Clear Key server URL to retreive the CENC key for this format.
-            * `key_id` is set (indicating the native DASH downloader should use the specified
+            * `dash_cenc` is set and both `laurl` and `key_ids` are set (indicating the native
+               DASH downloader should use the specified Clear Key server URL to retreive the
                CENC key for this format).
+            * `dash_cenc` is set and `key` is set (indicating the native DASH downloader should
+               use the specified CENC key for this format).
+            * `hls_aes` is set (indicating the native DASH downloader should use DASH SEA
+              AES-128-CBC decryption for this format).
 
         References:
          1. DASH-IF Content Protection Identifiers
@@ -3078,6 +3083,7 @@ class InfoExtractor:
             https://w3c.github.io/encrypted-media/format-registry/initdata/cenc.html
         """
         scheme_id = cp_e.get('schemeIdUri')
+        cenc_info = info.get('dash_cenc', {})
         if scheme_id == 'urn:mpeg:dash:mp4protection:2011':
             if cp_e.get('value') == 'cenc':
                 # ISO/IEC 23009-1 MPEG Common Encryption (CENC)
@@ -3113,6 +3119,19 @@ class InfoExtractor:
                     cenc_info['key_ids'] = kids
                 except (ValueError, TypeError, struct.error):
                     pass
+        elif scheme_id == 'urn:mpeg:dash:sea:2012':
+            # ISO/IEC 23009-4 DASH Segment Encryption and Authentication (AES-128-CBC)
+            sea_ns = 'urn:mpeg:dash:schema:sea:2012'
+            se_e = cp_e.find(self._xpath_ns('SegmentEncryption', sea_ns))
+            ks_e = cp_e.find(self._xpath_ns('KeySystem', sea_ns))
+            crypto_e = cp_e.find(self._xpath_ns('CryptoPeriod', sea_ns))
+            if (se_e is not None and se_e.get('schemeIdUri') == 'urn:mpeg:dash:sea:aes128-cbc:2013'
+                    and ks_e is not None and ks_e.get('keySystemUri') == 'urn:mpeg:dash:sea:keysys:http:2013'
+                    and crypto_e is not None and crypto_e.get('keyUriTemplate') and crypto_e.get('IV')
+                    ):
+                info['hls_aes'] = {'uri': crypto_e.get('keyUriTemplate'), 'iv': crypto_e.get('IV')}
+        if cenc_info:
+            info['dash_cenc'] = cenc_info
 
     def _extract_ism_formats(self, *args, **kwargs):
         fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)