From 990566c324d271ee27b3b6fc3280e7a4b2424668 Mon Sep 17 00:00:00 2001 From: McSwindler Date: Tue, 23 Apr 2024 22:48:18 -0500 Subject: [PATCH] [watchertv] create DropoutBase IEs for Dropout and WatcherTV to extend --- yt_dlp/extractor/dropout.py | 179 ++++++++++++++++++---------------- yt_dlp/extractor/watchertv.py | 7 +- 2 files changed, 96 insertions(+), 90 deletions(-) diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py index d5eb93139f..6af8148d36 100644 --- a/yt_dlp/extractor/dropout.py +++ b/yt_dlp/extractor/dropout.py @@ -17,7 +17,84 @@ from ..utils import ( ) -class DropoutIE(InfoExtractor): +class DropoutBaseIE(InfoExtractor): + _HOST = None + + def _get_authenticity_token(self, display_id): + signin_page = self._download_webpage( + f'{self._HOST}/login', display_id, note='Getting authenticity token') + return self._html_search_regex( + r'name=["\']authenticity_token["\'] value=["\'](.+?)["\']', + signin_page, 'authenticity_token') + + def _login(self, display_id): + username, password = self._get_login_info() + if not username: + return True + + response = self._download_webpage( + f'{self._HOST}/login', display_id, note='Logging in', fatal=False, + data=urlencode_postdata({ + 'email': username, + 'password': password, + 'authenticity_token': self._get_authenticity_token(display_id), + 'utf8': True, + })) + + user_has_subscription = self._search_regex( + r'user_has_subscription:\s*["\'](.+?)["\']', response, 'subscription status', default='none') + if user_has_subscription.lower() == 'true': + return + elif user_has_subscription.lower() == 'false': + return 'Account is not subscribed' + else: + return 'Incorrect username/password' + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = None + if self._get_cookies(self._HOST).get('_session'): + webpage = self._download_webpage(url, display_id) + if not webpage or '
[^\/$&?#]+)(?:/?$|/season:(?P[0-9]+)/?$)' _VIDEO_IE = DropoutIE _TESTS = [ @@ -207,19 +230,3 @@ class DropoutSeasonIE(InfoExtractor): }, }, ] - - def _fetch_page(self, url, season_id, page): - page += 1 - webpage = self._download_webpage( - f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400}) - yield from [self.url_result(item_url, self._VIDEO_IE) for item_url in traverse_obj( - get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))] - - def _real_extract(self, url): - season_id = self._match_id(url) - season_num = self._match_valid_url(url).group('season') or 1 - season_title = season_id.replace('-', ' ').title() - - return self.playlist_result( - OnDemandPagedList(functools.partial(self._fetch_page, url, season_id), self._PAGE_SIZE), - f'{season_id}-season-{season_num}', f'{season_title} - Season {season_num}') diff --git a/yt_dlp/extractor/watchertv.py b/yt_dlp/extractor/watchertv.py index 9079e8e486..a031007669 100644 --- a/yt_dlp/extractor/watchertv.py +++ b/yt_dlp/extractor/watchertv.py @@ -1,7 +1,7 @@ -from .dropout import DropoutIE, DropoutSeasonIE +from .dropout import DropoutBaseIE, DropoutSeasonBaseIE -class WatcherTVIE(DropoutIE): +class WatcherTVIE(DropoutBaseIE): _HOST = 'https://www.watchertv.com' _NETRC_MACHINE = 'watchertv' @@ -76,8 +76,7 @@ class WatcherTVIE(DropoutIE): ] -class WatcherTVSeasonIE(DropoutSeasonIE): - _PAGE_SIZE = 24 +class WatcherTVSeasonIE(DropoutSeasonBaseIE): _VALID_URL = r'https?://(?:www\.)?watchertv\.com/(?P[^\/$&?#]+)(?:/?$|/season:(?P[0-9]+)/?$)' _VIDEO_IE = WatcherTVIE _TESTS = [