mirror of https://github.com/yt-dlp/yt-dlp
feat(extractor): add SkoolIE extractor
- Implements extractor for skool.com videos - Handles authentication via auth_token cookie - Extracts video metadata including: - Title, description, duration - HLS formats and subtitles - Thumbnail and aspect ratio - Includes test case
parent
404bd889d0
commit
426a0e06c8
@ -0,0 +1,90 @@
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import NO_DEFAULT, float_or_none, int_or_none, js_to_json, traverse_obj, unified_timestamp
|
||||
|
||||
|
||||
class SkoolIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?skool\.com/(?P<group>[^/]+)/classroom/(?P<class>[^/?]+)(?:\?id=(?P<id>[^&]+))?'
|
||||
_REQUIRED_COOKIE = 'auth_token'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.skool.com/skoolers/classroom/c4b8d595?md=86ca3282abf4421687df974a7cac98db',
|
||||
'info_dict': {
|
||||
'id': '2694137c46c7456d823ded9ac8ea716f',
|
||||
'ext': 'mp4',
|
||||
'title': '1. Context - $100M Money Models · Skoolers',
|
||||
'description': 'Private club for skool owners. Let\'s build communities together.',
|
||||
'duration': 706666,
|
||||
'aspect_ratio': 1.78,
|
||||
'thumbnail': r're:^https://thumb\.video\.skool\.com/[^/]+/00000000\.jpg\?token=.*',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _search_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT, **kw):
|
||||
if default == '{}':
|
||||
self._downloader.deprecation_warning('using `default=\'{}\'` is deprecated, use `default={}` instead')
|
||||
default = {}
|
||||
if default is not NO_DEFAULT:
|
||||
fatal = False
|
||||
|
||||
return self._search_json(
|
||||
r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
|
||||
video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Check if the required cookie is present
|
||||
cookies = self._get_cookies(url)
|
||||
if self._REQUIRED_COOKIE not in cookies:
|
||||
self.raise_login_required(
|
||||
msg=f'This video requires the "{self._REQUIRED_COOKIE}" cookie to be set.',
|
||||
method='cookies',
|
||||
metadata_available=True, # Allows extraction of metadata even without the cookie
|
||||
)
|
||||
|
||||
mobj = self._match_valid_url(url)
|
||||
classroom = mobj.group('class')
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id or classroom)
|
||||
|
||||
next_data = self._search_data(
|
||||
webpage, video_id,
|
||||
transform_source=js_to_json,
|
||||
)
|
||||
|
||||
video_data = traverse_obj(next_data, ('props', 'pageProps', 'video'), dict)
|
||||
|
||||
video_id = video_data.get('id')
|
||||
title = traverse_obj(next_data, ('props', 'pageProps', 'settings', 'pageTitle'))
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
playback_id = video_data.get('playbackId')
|
||||
playback_token = video_data.get('playbackToken')
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
||||
if playback_id and playback_token:
|
||||
m3u8_url = f'https://stream.video.skool.com/{playback_id}.m3u8?token={playback_token}'
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False, headers={
|
||||
'Referer': url,
|
||||
})
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': {
|
||||
'Referer': url,
|
||||
},
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'aspect_ratio': float_or_none(video_data.get('aspectRatio')),
|
||||
'thumbnail': f'https://thumb.video.skool.com/{video_data.get("playbackId")}/00000000.jpg?token={video_data.get("thumbnailToken")}',
|
||||
'expire': unified_timestamp(video_data.get('expire')),
|
||||
}
|
Loading…
Reference in New Issue