feat(extractor): add SkoolIE extractor

- Implements extractor for skool.com videos
- Handles authentication via auth_token cookie
- Extracts video metadata including:
  - Title, description, duration
  - HLS formats and subtitles
  - Thumbnail and aspect ratio
- Includes test case
Derrick Hammer 19 hours ago
parent 404bd889d0
commit 426a0e06c8

@ -1877,6 +1877,7 @@ from .simplecast import (
from .sina import SinaIE from .sina import SinaIE
from .sixplay import SixPlayIE from .sixplay import SixPlayIE
from .skeb import SkebIE from .skeb import SkebIE
from .skool import SkoolIE
from .sky import ( from .sky import (
SkyNewsIE, SkyNewsIE,
SkyNewsStoryIE, SkyNewsStoryIE,

@ -0,0 +1,90 @@
from .common import InfoExtractor
from ..utils import NO_DEFAULT, float_or_none, int_or_none, js_to_json, traverse_obj, unified_timestamp
class SkoolIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?skool\.com/(?P<group>[^/]+)/classroom/(?P<class>[^/?]+)(?:\?id=(?P<id>[^&]+))?'
_REQUIRED_COOKIE = 'auth_token'
_TESTS = [{
'url': 'https://www.skool.com/skoolers/classroom/c4b8d595?md=86ca3282abf4421687df974a7cac98db',
'info_dict': {
'id': '2694137c46c7456d823ded9ac8ea716f',
'ext': 'mp4',
'title': '1. Context - $100M Money Models · Skoolers',
'description': 'Private club for skool owners. Let\'s build communities together.',
'duration': 706666,
'aspect_ratio': 1.78,
'thumbnail': r're:^https://thumb\.video\.skool\.com/[^/]+/00000000\.jpg\?token=.*',
},
'params': {
'skip_download': True,
},
}]
def _search_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT, **kw):
if default == '{}':
self._downloader.deprecation_warning('using `default=\'{}\'` is deprecated, use `default={}` instead')
default = {}
if default is not NO_DEFAULT:
fatal = False
return self._search_json(
r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)
def _real_extract(self, url):
# Check if the required cookie is present
cookies = self._get_cookies(url)
if self._REQUIRED_COOKIE not in cookies:
self.raise_login_required(
msg=f'This video requires the "{self._REQUIRED_COOKIE}" cookie to be set.',
method='cookies',
metadata_available=True, # Allows extraction of metadata even without the cookie
)
mobj = self._match_valid_url(url)
classroom = mobj.group('class')
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id or classroom)
next_data = self._search_data(
webpage, video_id,
transform_source=js_to_json,
)
video_data = traverse_obj(next_data, ('props', 'pageProps', 'video'), dict)
video_id = video_data.get('id')
title = traverse_obj(next_data, ('props', 'pageProps', 'settings', 'pageTitle'))
description = self._og_search_description(webpage, default=None)
playback_id = video_data.get('playbackId')
playback_token = video_data.get('playbackToken')
formats = []
subtitles = {}
if playback_id and playback_token:
m3u8_url = f'https://stream.video.skool.com/{playback_id}.m3u8?token={playback_token}'
fmts, subs = self._extract_m3u8_formats_and_subtitles(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False, headers={
'Referer': url,
})
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'id': video_id,
'title': title,
'description': description,
'formats': formats,
'subtitles': subtitles,
'http_headers': {
'Referer': url,
},
'duration': int_or_none(video_data.get('duration')),
'aspect_ratio': float_or_none(video_data.get('aspectRatio')),
'thumbnail': f'https://thumb.video.skool.com/{video_data.get("playbackId")}/00000000.jpg?token={video_data.get("thumbnailToken")}',
'expire': unified_timestamp(video_data.get('expire')),
}
Loading…
Cancel
Save