diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 06340fcd8d..933632dafc 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -948,6 +948,7 @@ from .lcp import ( from .lecture2go import Lecture2GoIE from .lecturio import ( LecturioIE, + LecturioDeIE, LecturioCourseIE, LecturioDeCourseIE, ) diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index e9b7d8b9af..46bfaa3c81 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -14,29 +14,50 @@ from ..utils import ( ) -class LecturioBaseIE(InfoExtractor): +class LecturioIE(InfoExtractor): + _VALID_URL = r'https://app\.lecturio\.com/([^/]+/(?P[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P\d+))' + + _TESTS = [{ + 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', + 'md5': '9a42cf1d8282a6311bf7211bbde26fde', + 'info_dict': { + 'id': '39634', + 'ext': 'mp4', + 'title': 'Important Concepts and Terms — Introduction to Microbiology', + }, + 'skip': 'Requires lecturio account credentials', + }, { + 'url': 'https://app.lecturio.com/#/lecture/c/6434/39634', + 'only_matching': True, + }] + _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/' - _DE_API_BASE_URL = 'https://lecturio.de/api/de/latest/html5/' _LOGIN_URL = 'https://app.lecturio.com/en/login' - _DE_LOGIN_URL = 'https://www.lecturio.de/anmelden.html' - _NETRC_MACHINE = 'lecturio' - is_DE = None + _NETRC_MACHINE = 'lecturio' - # Find out if url is german before starting anything else - def extract(self, url): - self.is_DE = True if re.match(r"https://(?:www\.)?lecturio\.de/", url) else False - return super().extract(url) + _CC_LANGS = { + 'Arabic': 'ar', + 'Bulgarian': 'bg', + 'German': 'de', + 'English': 'en', + 'Spanish': 'es', + 'Persian': 'fa', + 'French': 'fr', + 'Japanese': 'ja', + 'Polish': 'pl', + 'Pashto': 'ps', + 'Russian': 'ru', + } def _perform_login(self, username, password): - login_url = self._DE_LOGIN_URL if self.is_DE else self._LOGIN_URL # Sets some cookies _, urlh = self._download_webpage_handle( - login_url, None, 'Downloading login popup') + self._LOGIN_URL, None, 'Downloading login popup') def is_logged(url_handle): - return login_url not in url_handle.geturl() + return self._LOGIN_URL not in url_handle.geturl() # Already logged in if is_logged(urlh): @@ -49,7 +70,7 @@ class LecturioBaseIE(InfoExtractor): } response, urlh = self._download_webpage_handle( - login_url, None, 'Logging in', + self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form)) # Logged in successfully @@ -63,55 +84,14 @@ class LecturioBaseIE(InfoExtractor): raise ExtractorError('Unable to login: %s' % errors, expected=True) raise ExtractorError('Unable to log in') - -class LecturioIE(LecturioBaseIE): - _VALID_URL = r'''(?x) - https:// - (?: - app\.lecturio\.com/([^/]+/(?P[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P\d+))| - (?:www\.)?lecturio\.de/[^/]+/(?P[^/?#&]+)\.vortrag - ) - ''' - _TESTS = [{ - 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', - 'md5': '9a42cf1d8282a6311bf7211bbde26fde', - 'info_dict': { - 'id': '39634', - 'ext': 'mp4', - 'title': 'Important Concepts and Terms — Introduction to Microbiology', - }, - 'skip': 'Requires lecturio account credentials', - }, { - 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', - 'only_matching': True, - }, { - 'url': 'https://app.lecturio.com/#/lecture/c/6434/39634', - 'only_matching': True, - }] - - _CC_LANGS = { - 'Arabic': 'ar', - 'Bulgarian': 'bg', - 'German': 'de', - 'English': 'en', - 'Spanish': 'es', - 'Persian': 'fa', - 'French': 'fr', - 'Japanese': 'ja', - 'Polish': 'pl', - 'Pashto': 'ps', - 'Russian': 'ru', - } - def _real_extract(self, url): mobj = self._match_valid_url(url) - nt = mobj.group('nt') or mobj.group('nt_de') + nt = mobj.group('nt') lecture_id = mobj.group('id') display_id = nt or lecture_id api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json' - video = self._download_json( - (self._DE_API_BASE_URL if self.is_DE else self._API_BASE_URL) + api_path, display_id) + video = self._download_json(self._API_BASE_URL + api_path, display_id) title = video['title'].strip() if not lecture_id: pid = video.get('productId') or video.get('uid') @@ -179,8 +159,20 @@ class LecturioIE(LecturioBaseIE): 'automatic_captions': automatic_captions, } +# German Lecturio simply requires different URLs +class LecturioDeIE(LecturioIE): + _VALID_URL = r'https://www\.lecturio\.de/[^/]+/(?P[^/?#&]+)\.vortrag' + + _TESTS = [{ + 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', + 'only_matching': True, + }] + + _API_BASE_URL = 'https://lecturio.de/api/de/latest/html5/' + _LOGIN_URL = 'https://www.lecturio.de/anmelden.html' -class LecturioCourseIE(LecturioBaseIE): + +class LecturioCourseIE(LecturioIE): _VALID_URL = r'https://app\.lecturio\.com/(?:[^/]+/(?P[^/?#&]+)\.course|(?:#/)?course/c/(?P\d+))' _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/', @@ -217,12 +209,12 @@ class LecturioCourseIE(LecturioBaseIE): clean_html(course.get('description'))) -class LecturioDeCourseIE(LecturioBaseIE): +class LecturioDeCourseIE(LecturioDeIE): _VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P[^/?#&]+)\.kurs' - _TEST = { + _TESTS = [{ 'url': 'https://www.lecturio.de/jura/grundrechte.kurs', 'only_matching': True, - } + }] def _real_extract(self, url): display_id = self._match_id(url)