From fdfac32149d5a4ab0365a02159057b94f15044fd Mon Sep 17 00:00:00 2001 From: Randalix Date: Tue, 12 Aug 2025 23:55:21 +0200 Subject: [PATCH 01/10] feat: Update SouthParkDeIE to use new API extraction logic --- yt_dlp/extractor/southpark.py | 50 ++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index 3d661a86ac..61251a010a 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -1,4 +1,8 @@ from .mtv import MTVServicesInfoExtractor +from ..utils import ( + traverse_obj, + random_uuidv4, +) class SouthParkIE(MTVServicesInfoExtractor): @@ -99,14 +103,46 @@ class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE }, }] - def _get_feed_url(self, uri, url=None): - video_id = self._id_from_uri(uri) - config = self._download_json( - f'http://media.mtvnservices.com/pmt/e1/access/index.html?uri={uri}&configtype=edge&ref={url}', video_id) - return self._remove_template_parameter(config['feedWithQueryParams']) + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) - def _get_feed_query(self, uri): - return + data = self._parse_json(self._search_regex( + r'window\.__DATA__\s*=\s*({.+?});', webpage, 'data'), display_id) + + # Find the videoDetail object by first finding the MainContainer component + video_detail = traverse_obj(data, ( + 'children', lambda _, v: v.get('type') == 'MainContainer', + 'children', 0, 'children', 0, 'props', 'videoDetail' + ), get_all=False) + + # Fallback for a simpler data structure found on some pages + if not video_detail: + video_detail = traverse_obj(data, ('children', 0, 'videoDetail'), get_all=False) + + api_url = video_detail['videoServiceUrl'] + + # Call the Topaz API to get the final stream URL + api_data = self._download_json( + api_url, display_id, 'Fetching video metadata', query={ + 'ssus': random_uuidv4(), + 'clientPlatform': 'mobile', + }) + + hls_url = traverse_obj(api_data, ('stitchedstream', 'source')) + + return { + 'id': video_detail['id'], + 'display_id': display_id, + 'url': hls_url, + 'title': video_detail.get('title'), + 'description': video_detail.get('description'), + 'duration': traverse_obj(video_detail, ('duration', 'milliseconds'), expected_type=int) / 1000, + 'season_number': video_detail.get('seasonNumber'), + 'episode_number': traverse_obj(video_detail, 'episodeAiringOrder'), + 'timestamp': traverse_obj(video_detail, ('publishDate', 'timestamp')), + 'series': traverse_obj(video_detail, ('parentEntity', 'title')), + } class SouthParkLatIE(SouthParkIE): # XXX: Do not subclass from concrete IE From 52876aa5a79ab698b93037a987ca945f14959ac2 Mon Sep 17 00:00:00 2001 From: Randalix Date: Wed, 13 Aug 2025 00:00:48 +0200 Subject: [PATCH 02/10] fix: Update SouthParkDeIE to use modern API extraction --- yt_dlp/extractor/southpark.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index 61251a010a..83700a3675 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -1,7 +1,7 @@ from .mtv import MTVServicesInfoExtractor from ..utils import ( - traverse_obj, random_uuidv4, + traverse_obj, ) @@ -110,26 +110,20 @@ class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE data = self._parse_json(self._search_regex( r'window\.__DATA__\s*=\s*({.+?});', webpage, 'data'), display_id) - # Find the videoDetail object by first finding the MainContainer component video_detail = traverse_obj(data, ( 'children', lambda _, v: v.get('type') == 'MainContainer', 'children', 0, 'children', 0, 'props', 'videoDetail' - ), get_all=False) - - # Fallback for a simpler data structure found on some pages - if not video_detail: - video_detail = traverse_obj(data, ('children', 0, 'videoDetail'), get_all=False) + ), default=traverse_obj(data, ('children', 0, 'videoDetail'))) api_url = video_detail['videoServiceUrl'] - # Call the Topaz API to get the final stream URL api_data = self._download_json( api_url, display_id, 'Fetching video metadata', query={ 'ssus': random_uuidv4(), 'clientPlatform': 'mobile', }) - hls_url = traverse_obj(api_data, ('stitchedstream', 'source')) + hls_url = traverse_obj(api_data, ('stitchedstream', 'source'), expected_type=str) return { 'id': video_detail['id'], From 3228b3de4913a2862830ba945e0e02a9d8ae11bb Mon Sep 17 00:00:00 2001 From: Randalix Date: Wed, 13 Aug 2025 00:05:38 +0200 Subject: [PATCH 03/10] fix: Improve SouthParkDeIE to handle various page layouts --- yt_dlp/extractor/southpark.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index 83700a3675..ca2a3bae74 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -110,10 +110,17 @@ class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE data = self._parse_json(self._search_regex( r'window\.__DATA__\s*=\s*({.+?});', webpage, 'data'), display_id) - video_detail = traverse_obj(data, ( - 'children', lambda _, v: v.get('type') == 'MainContainer', - 'children', 0, 'children', 0, 'props', 'videoDetail' - ), default=traverse_obj(data, ('children', 0, 'videoDetail'))) + # Try multiple paths to find the video data, handling both regular and special episodes + video_detail = traverse_obj(data, [ + # Path for regular episodes (more complex) + ('children', lambda _, v: v.get('type') == 'MainContainer', + 'children', 0, 'children', 0, 'props', 'videoDetail'), + # Fallback path for special episodes (simpler) + ('children', 0, 'videoDetail'), + ]) + + if not video_detail: + raise ExtractorError('Could not find video data in page') api_url = video_detail['videoServiceUrl'] From 1d5852663d674ef39009751c0cfc62da259ccf9b Mon Sep 17 00:00:00 2001 From: Randalix Date: Wed, 13 Aug 2025 00:05:53 +0200 Subject: [PATCH 04/10] fix: Import ExtractorError in SouthParkDeIE --- yt_dlp/extractor/southpark.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index ca2a3bae74..cfcfa3e682 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -2,6 +2,7 @@ from .mtv import MTVServicesInfoExtractor from ..utils import ( random_uuidv4, traverse_obj, + ExtractorError, # Added ExtractorError import ) From 35c83c26dc305cc2c8efae12209458ac12cd6236 Mon Sep 17 00:00:00 2001 From: Randalix Date: Wed, 13 Aug 2025 00:08:12 +0200 Subject: [PATCH 05/10] fix: Correct traverse_obj call in SouthParkDeIE --- yt_dlp/extractor/southpark.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index cfcfa3e682..db9f5d8f6f 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -111,14 +111,13 @@ class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE data = self._parse_json(self._search_regex( r'window\.__DATA__\s*=\s*({.+?});', webpage, 'data'), display_id) - # Try multiple paths to find the video data, handling both regular and special episodes - video_detail = traverse_obj(data, [ - # Path for regular episodes (more complex) + # CORRECTED: Provide paths as separate arguments, not a list + video_detail = traverse_obj(data, + # Path for regular episodes ('children', lambda _, v: v.get('type') == 'MainContainer', 'children', 0, 'children', 0, 'props', 'videoDetail'), - # Fallback path for special episodes (simpler) - ('children', 0, 'videoDetail'), - ]) + # Fallback path for special episodes + ('children', 0, 'videoDetail')) if not video_detail: raise ExtractorError('Could not find video data in page') From 8207f3b82451a67a53fc742247282fe8ae60a00a Mon Sep 17 00:00:00 2001 From: Randalix Date: Wed, 13 Aug 2025 00:10:17 +0200 Subject: [PATCH 06/10] fix: Correct SouthParkDeIE _real_extract to use traverse_obj correctly --- yt_dlp/extractor/southpark.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index db9f5d8f6f..9b79040a22 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -111,13 +111,14 @@ class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE data = self._parse_json(self._search_regex( r'window\.__DATA__\s*=\s*({.+?});', webpage, 'data'), display_id) - # CORRECTED: Provide paths as separate arguments, not a list + # Try multiple paths and, crucially, get only the FIRST match, not a list video_detail = traverse_obj(data, # Path for regular episodes ('children', lambda _, v: v.get('type') == 'MainContainer', 'children', 0, 'children', 0, 'props', 'videoDetail'), # Fallback path for special episodes - ('children', 0, 'videoDetail')) + ('children', 0, 'videoDetail'), + get_all=False) if not video_detail: raise ExtractorError('Could not find video data in page') @@ -130,7 +131,7 @@ class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE 'clientPlatform': 'mobile', }) - hls_url = traverse_obj(api_data, ('stitchedstream', 'source'), expected_type=str) + hls_url = traverse_obj(api_data, ('stitchedstream', 'source'), expected_type=str, get_all=False) return { 'id': video_detail['id'], From d3de29389dab6c40bcd48e2bdd32b9c71d3f0a3b Mon Sep 17 00:00:00 2001 From: Randalix Date: Wed, 13 Aug 2025 00:14:45 +0200 Subject: [PATCH 07/10] fix: Correct SouthParkDeIE _real_extract method --- yt_dlp/extractor/southpark.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index 9b79040a22..5d70c9d830 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -111,14 +111,10 @@ class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE data = self._parse_json(self._search_regex( r'window\.__DATA__\s*=\s*({.+?});', webpage, 'data'), display_id) - # Try multiple paths and, crucially, get only the FIRST match, not a list - video_detail = traverse_obj(data, - # Path for regular episodes - ('children', lambda _, v: v.get('type') == 'MainContainer', - 'children', 0, 'children', 0, 'props', 'videoDetail'), - # Fallback path for special episodes - ('children', 0, 'videoDetail'), - get_all=False) + video_detail = traverse_obj(data, ( + 'children', lambda _, v: v.get('type') == 'MainContainer', + 'children', 0, 'children', 0, 'props', 'videoDetail' + ), ('children', 0, 'videoDetail'), get_all=False) if not video_detail: raise ExtractorError('Could not find video data in page') @@ -131,12 +127,11 @@ class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE 'clientPlatform': 'mobile', }) - hls_url = traverse_obj(api_data, ('stitchedstream', 'source'), expected_type=str, get_all=False) + hls_url = traverse_obj(api_data, ('stitchedstream', 'source'), expected_type=str) - return { + info = { 'id': video_detail['id'], 'display_id': display_id, - 'url': hls_url, 'title': video_detail.get('title'), 'description': video_detail.get('description'), 'duration': traverse_obj(video_detail, ('duration', 'milliseconds'), expected_type=int) / 1000, @@ -145,6 +140,9 @@ class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE 'timestamp': traverse_obj(video_detail, ('publishDate', 'timestamp')), 'series': traverse_obj(video_detail, ('parentEntity', 'title')), } + info['formats'] = self._extract_m3u8_formats( + hls_url, display_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') + return info class SouthParkLatIE(SouthParkIE): # XXX: Do not subclass from concrete IE From 25cb4168763b17d19025696bcdf3e7fb9ae6ab7e Mon Sep 17 00:00:00 2001 From: Randalix Date: Wed, 13 Aug 2025 07:44:05 +0200 Subject: [PATCH 08/10] style: Fix import formatting and add trailing comma --- yt_dlp/extractor/southpark.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index 5d70c9d830..3c1d45fc6a 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -1,9 +1,5 @@ from .mtv import MTVServicesInfoExtractor -from ..utils import ( - random_uuidv4, - traverse_obj, - ExtractorError, # Added ExtractorError import -) +from ..utils import random_uuidv4, traverse_obj, ExtractorError class SouthParkIE(MTVServicesInfoExtractor): @@ -114,7 +110,7 @@ class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE video_detail = traverse_obj(data, ( 'children', lambda _, v: v.get('type') == 'MainContainer', 'children', 0, 'children', 0, 'props', 'videoDetail' - ), ('children', 0, 'videoDetail'), get_all=False) + ), ('children', 0, 'videoDetail'), get_all=False,) if not video_detail: raise ExtractorError('Could not find video data in page') From 7dd2ee0072ecc1fa978861e79b91086c85c1da67 Mon Sep 17 00:00:00 2001 From: Randalix Date: Wed, 13 Aug 2025 14:08:15 +0200 Subject: [PATCH 09/10] style: Fix ruff linting errors in southpark extractor --- yt_dlp/extractor/southpark.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index 3c1d45fc6a..d4db2ce72e 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -1,5 +1,5 @@ -from .mtv import MTVServicesInfoExtractor from ..utils import random_uuidv4, traverse_obj, ExtractorError +from .mtv import MTVServicesInfoExtractor class SouthParkIE(MTVServicesInfoExtractor): @@ -107,10 +107,15 @@ class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE data = self._parse_json(self._search_regex( r'window\.__DATA__\s*=\s*({.+?});', webpage, 'data'), display_id) - video_detail = traverse_obj(data, ( - 'children', lambda _, v: v.get('type') == 'MainContainer', - 'children', 0, 'children', 0, 'props', 'videoDetail' - ), ('children', 0, 'videoDetail'), get_all=False,) + video_detail = traverse_obj( + data, + # Path for regular episodes + ('children', lambda _, v: v.get('type') == 'MainContainer', + 'children', 0, 'children', 0, 'props', 'videoDetail'), + # Fallback path for special episodes + ('children', 0, 'videoDetail'), + get_all=False, + ) if not video_detail: raise ExtractorError('Could not find video data in page') From fa6fe651a164447d2d5aed62cbeb91243cf365e5 Mon Sep 17 00:00:00 2001 From: Randalix Date: Wed, 13 Aug 2025 14:13:54 +0200 Subject: [PATCH 10/10] fix: imports --- yt_dlp/extractor/southpark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index d4db2ce72e..d36adfd785 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -1,5 +1,5 @@ -from ..utils import random_uuidv4, traverse_obj, ExtractorError from .mtv import MTVServicesInfoExtractor +from ..utils import ExtractorError, random_uuidv4, traverse_obj class SouthParkIE(MTVServicesInfoExtractor):