feat(extractor): add SkoolIE extractor

- Implements extractor for skool.com videos - Handles authentication via auth_token cookie - Extracts video metadata including: - Title, description, duration - HLS formats and subtitles - Thumbnail and aspect ratio - Includes test case
17 hours ago · 426a0e06c8
parent 404bd889d0
commit 426a0e06c8
2 changed files with 91 additions and 0 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -1877,6 +1877,7 @@ from .simplecast import (
 from .sina import SinaIE
 from .sixplay import SixPlayIE
 from .skeb import SkebIE
+from .skool import SkoolIE
 from .sky import (
    SkyNewsIE,
    SkyNewsStoryIE,
--- a/yt_dlp/extractor/skool.py
+++ b/yt_dlp/extractor/skool.py
@ -0,0 +1,90 @@
+
+from .common import InfoExtractor
+from ..utils import NO_DEFAULT, float_or_none, int_or_none, js_to_json, traverse_obj, unified_timestamp
+
+
+class SkoolIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?skool\.com/(?P<group>[^/]+)/classroom/(?P<class>[^/?]+)(?:\?id=(?P<id>[^&]+))?'
+    _REQUIRED_COOKIE = 'auth_token'
+    _TESTS = [{
+        'url': 'https://www.skool.com/skoolers/classroom/c4b8d595?md=86ca3282abf4421687df974a7cac98db',
+        'info_dict': {
+            'id': '2694137c46c7456d823ded9ac8ea716f',
+            'ext': 'mp4',
+            'title': '1. Context - $100M Money Models · Skoolers',
+            'description': 'Private club for skool owners. Let\'s build communities together.',
+            'duration': 706666,
+            'aspect_ratio': 1.78,
+            'thumbnail': r're:^https://thumb\.video\.skool\.com/[^/]+/00000000\.jpg\?token=.*',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
+
+    def _search_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT, **kw):
+        if default == '{}':
+            self._downloader.deprecation_warning('using `default=\'{}\'` is deprecated, use `default={}` instead')
+            default = {}
+        if default is not NO_DEFAULT:
+            fatal = False
+
+        return self._search_json(
+            r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
+            video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)
+
+    def _real_extract(self, url):
+        # Check if the required cookie is present
+        cookies = self._get_cookies(url)
+        if self._REQUIRED_COOKIE not in cookies:
+            self.raise_login_required(
+                msg=f'This video requires the "{self._REQUIRED_COOKIE}" cookie to be set.',
+                method='cookies',
+                metadata_available=True,  # Allows extraction of metadata even without the cookie
+            )
+
+        mobj = self._match_valid_url(url)
+        classroom = mobj.group('class')
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id or classroom)
+
+        next_data = self._search_data(
+            webpage, video_id,
+            transform_source=js_to_json,
+        )
+
+        video_data = traverse_obj(next_data, ('props', 'pageProps', 'video'), dict)
+
+        video_id = video_data.get('id')
+        title = traverse_obj(next_data, ('props', 'pageProps', 'settings', 'pageTitle'))
+        description = self._og_search_description(webpage, default=None)
+        playback_id = video_data.get('playbackId')
+        playback_token = video_data.get('playbackToken')
+
+        formats = []
+        subtitles = {}
+
+        if playback_id and playback_token:
+            m3u8_url = f'https://stream.video.skool.com/{playback_id}.m3u8?token={playback_token}'
+            fmts, subs = self._extract_m3u8_formats_and_subtitles(
+                m3u8_url, video_id, 'mp4', 'm3u8_native',
+                m3u8_id='hls', fatal=False, headers={
+                    'Referer': url,
+                })
+            formats.extend(fmts)
+            self._merge_subtitles(subs, target=subtitles)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'formats': formats,
+            'subtitles': subtitles,
+            'http_headers': {
+                'Referer': url,
+            },
+            'duration': int_or_none(video_data.get('duration')),
+            'aspect_ratio': float_or_none(video_data.get('aspectRatio')),
+            'thumbnail': f'https://thumb.video.skool.com/{video_data.get("playbackId")}/00000000.jpg?token={video_data.get("thumbnailToken")}',
+            'expire': unified_timestamp(video_data.get('expire')),
+        }