From 426a0e06c8abf5ec9a0999ea3d5b9deecfdd7065 Mon Sep 17 00:00:00 2001 From: Derrick Hammer Date: Mon, 18 Aug 2025 02:28:22 -0400 Subject: [PATCH] feat(extractor): add SkoolIE extractor - Implements extractor for skool.com videos - Handles authentication via auth_token cookie - Extracts video metadata including: - Title, description, duration - HLS formats and subtitles - Thumbnail and aspect ratio - Includes test case --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/skool.py | 90 +++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 yt_dlp/extractor/skool.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index bb595f924b..f325d1fa4b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1877,6 +1877,7 @@ from .simplecast import ( from .sina import SinaIE from .sixplay import SixPlayIE from .skeb import SkebIE +from .skool import SkoolIE from .sky import ( SkyNewsIE, SkyNewsStoryIE, diff --git a/yt_dlp/extractor/skool.py b/yt_dlp/extractor/skool.py new file mode 100644 index 0000000000..9f30b0fb03 --- /dev/null +++ b/yt_dlp/extractor/skool.py @@ -0,0 +1,90 @@ + +from .common import InfoExtractor +from ..utils import NO_DEFAULT, float_or_none, int_or_none, js_to_json, traverse_obj, unified_timestamp + + +class SkoolIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?skool\.com/(?P[^/]+)/classroom/(?P[^/?]+)(?:\?id=(?P[^&]+))?' + _REQUIRED_COOKIE = 'auth_token' + _TESTS = [{ + 'url': 'https://www.skool.com/skoolers/classroom/c4b8d595?md=86ca3282abf4421687df974a7cac98db', + 'info_dict': { + 'id': '2694137c46c7456d823ded9ac8ea716f', + 'ext': 'mp4', + 'title': '1. Context - $100M Money Models ยท Skoolers', + 'description': 'Private club for skool owners. Let\'s build communities together.', + 'duration': 706666, + 'aspect_ratio': 1.78, + 'thumbnail': r're:^https://thumb\.video\.skool\.com/[^/]+/00000000\.jpg\?token=.*', + }, + 'params': { + 'skip_download': True, + }, + }] + + def _search_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT, **kw): + if default == '{}': + self._downloader.deprecation_warning('using `default=\'{}\'` is deprecated, use `default={}` instead') + default = {} + if default is not NO_DEFAULT: + fatal = False + + return self._search_json( + r']+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data', + video_id, end_pattern='', fatal=fatal, default=default, **kw) + + def _real_extract(self, url): + # Check if the required cookie is present + cookies = self._get_cookies(url) + if self._REQUIRED_COOKIE not in cookies: + self.raise_login_required( + msg=f'This video requires the "{self._REQUIRED_COOKIE}" cookie to be set.', + method='cookies', + metadata_available=True, # Allows extraction of metadata even without the cookie + ) + + mobj = self._match_valid_url(url) + classroom = mobj.group('class') + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id or classroom) + + next_data = self._search_data( + webpage, video_id, + transform_source=js_to_json, + ) + + video_data = traverse_obj(next_data, ('props', 'pageProps', 'video'), dict) + + video_id = video_data.get('id') + title = traverse_obj(next_data, ('props', 'pageProps', 'settings', 'pageTitle')) + description = self._og_search_description(webpage, default=None) + playback_id = video_data.get('playbackId') + playback_token = video_data.get('playbackToken') + + formats = [] + subtitles = {} + + if playback_id and playback_token: + m3u8_url = f'https://stream.video.skool.com/{playback_id}.m3u8?token={playback_token}' + fmts, subs = self._extract_m3u8_formats_and_subtitles( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False, headers={ + 'Referer': url, + }) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + 'subtitles': subtitles, + 'http_headers': { + 'Referer': url, + }, + 'duration': int_or_none(video_data.get('duration')), + 'aspect_ratio': float_or_none(video_data.get('aspectRatio')), + 'thumbnail': f'https://thumb.video.skool.com/{video_data.get("playbackId")}/00000000.jpg?token={video_data.get("thumbnailToken")}', + 'expire': unified_timestamp(video_data.get('expire')), + }