From ff740b365e83f0967c1dff807dde434baf1967a2 Mon Sep 17 00:00:00 2001 From: Fridolin Kutterer Date: Thu, 29 Jun 2023 11:16:35 +0200 Subject: [PATCH 1/7] Made Lecturio poll the german API if a german URL is given Formatting fixes --- yt_dlp/extractor/lecturio.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 973764c63f..e9b7d8b9af 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -16,16 +16,27 @@ from ..utils import ( class LecturioBaseIE(InfoExtractor): _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/' + _DE_API_BASE_URL = 'https://lecturio.de/api/de/latest/html5/' _LOGIN_URL = 'https://app.lecturio.com/en/login' + _DE_LOGIN_URL = 'https://www.lecturio.de/anmelden.html' _NETRC_MACHINE = 'lecturio' + is_DE = None + + # Find out if url is german before starting anything else + def extract(self, url): + self.is_DE = True if re.match(r"https://(?:www\.)?lecturio\.de/", url) else False + return super().extract(url) + def _perform_login(self, username, password): + + login_url = self._DE_LOGIN_URL if self.is_DE else self._LOGIN_URL # Sets some cookies _, urlh = self._download_webpage_handle( - self._LOGIN_URL, None, 'Downloading login popup') + login_url, None, 'Downloading login popup') def is_logged(url_handle): - return self._LOGIN_URL not in url_handle.geturl() + return login_url not in url_handle.geturl() # Already logged in if is_logged(urlh): @@ -38,7 +49,7 @@ class LecturioBaseIE(InfoExtractor): } response, urlh = self._download_webpage_handle( - self._LOGIN_URL, None, 'Logging in', + login_url, None, 'Logging in', data=urlencode_postdata(login_form)) # Logged in successfully @@ -98,8 +109,9 @@ class LecturioIE(LecturioBaseIE): lecture_id = mobj.group('id') display_id = nt or lecture_id api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json' + video = self._download_json( - self._API_BASE_URL + api_path, display_id) + (self._DE_API_BASE_URL if self.is_DE else self._API_BASE_URL) + api_path, display_id) title = video['title'].strip() if not lecture_id: pid = video.get('productId') or video.get('uid') From 6486009b1e0174314056060cd1468dd91c2a9841 Mon Sep 17 00:00:00 2001 From: Fridolin Kutterer Date: Sat, 11 Nov 2023 19:42:58 +0100 Subject: [PATCH 2/7] Cleaned up Class structure, added extractors to index --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/lecturio.py | 112 +++++++++++++++----------------- 2 files changed, 53 insertions(+), 60 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 06340fcd8d..933632dafc 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -948,6 +948,7 @@ from .lcp import ( from .lecture2go import Lecture2GoIE from .lecturio import ( LecturioIE, + LecturioDeIE, LecturioCourseIE, LecturioDeCourseIE, ) diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index e9b7d8b9af..46bfaa3c81 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -14,29 +14,50 @@ from ..utils import ( ) -class LecturioBaseIE(InfoExtractor): +class LecturioIE(InfoExtractor): + _VALID_URL = r'https://app\.lecturio\.com/([^/]+/(?P[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P\d+))' + + _TESTS = [{ + 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', + 'md5': '9a42cf1d8282a6311bf7211bbde26fde', + 'info_dict': { + 'id': '39634', + 'ext': 'mp4', + 'title': 'Important Concepts and Terms — Introduction to Microbiology', + }, + 'skip': 'Requires lecturio account credentials', + }, { + 'url': 'https://app.lecturio.com/#/lecture/c/6434/39634', + 'only_matching': True, + }] + _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/' - _DE_API_BASE_URL = 'https://lecturio.de/api/de/latest/html5/' _LOGIN_URL = 'https://app.lecturio.com/en/login' - _DE_LOGIN_URL = 'https://www.lecturio.de/anmelden.html' - _NETRC_MACHINE = 'lecturio' - is_DE = None + _NETRC_MACHINE = 'lecturio' - # Find out if url is german before starting anything else - def extract(self, url): - self.is_DE = True if re.match(r"https://(?:www\.)?lecturio\.de/", url) else False - return super().extract(url) + _CC_LANGS = { + 'Arabic': 'ar', + 'Bulgarian': 'bg', + 'German': 'de', + 'English': 'en', + 'Spanish': 'es', + 'Persian': 'fa', + 'French': 'fr', + 'Japanese': 'ja', + 'Polish': 'pl', + 'Pashto': 'ps', + 'Russian': 'ru', + } def _perform_login(self, username, password): - login_url = self._DE_LOGIN_URL if self.is_DE else self._LOGIN_URL # Sets some cookies _, urlh = self._download_webpage_handle( - login_url, None, 'Downloading login popup') + self._LOGIN_URL, None, 'Downloading login popup') def is_logged(url_handle): - return login_url not in url_handle.geturl() + return self._LOGIN_URL not in url_handle.geturl() # Already logged in if is_logged(urlh): @@ -49,7 +70,7 @@ class LecturioBaseIE(InfoExtractor): } response, urlh = self._download_webpage_handle( - login_url, None, 'Logging in', + self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form)) # Logged in successfully @@ -63,55 +84,14 @@ class LecturioBaseIE(InfoExtractor): raise ExtractorError('Unable to login: %s' % errors, expected=True) raise ExtractorError('Unable to log in') - -class LecturioIE(LecturioBaseIE): - _VALID_URL = r'''(?x) - https:// - (?: - app\.lecturio\.com/([^/]+/(?P[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P\d+))| - (?:www\.)?lecturio\.de/[^/]+/(?P[^/?#&]+)\.vortrag - ) - ''' - _TESTS = [{ - 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', - 'md5': '9a42cf1d8282a6311bf7211bbde26fde', - 'info_dict': { - 'id': '39634', - 'ext': 'mp4', - 'title': 'Important Concepts and Terms — Introduction to Microbiology', - }, - 'skip': 'Requires lecturio account credentials', - }, { - 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', - 'only_matching': True, - }, { - 'url': 'https://app.lecturio.com/#/lecture/c/6434/39634', - 'only_matching': True, - }] - - _CC_LANGS = { - 'Arabic': 'ar', - 'Bulgarian': 'bg', - 'German': 'de', - 'English': 'en', - 'Spanish': 'es', - 'Persian': 'fa', - 'French': 'fr', - 'Japanese': 'ja', - 'Polish': 'pl', - 'Pashto': 'ps', - 'Russian': 'ru', - } - def _real_extract(self, url): mobj = self._match_valid_url(url) - nt = mobj.group('nt') or mobj.group('nt_de') + nt = mobj.group('nt') lecture_id = mobj.group('id') display_id = nt or lecture_id api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json' - video = self._download_json( - (self._DE_API_BASE_URL if self.is_DE else self._API_BASE_URL) + api_path, display_id) + video = self._download_json(self._API_BASE_URL + api_path, display_id) title = video['title'].strip() if not lecture_id: pid = video.get('productId') or video.get('uid') @@ -179,8 +159,20 @@ class LecturioIE(LecturioBaseIE): 'automatic_captions': automatic_captions, } +# German Lecturio simply requires different URLs +class LecturioDeIE(LecturioIE): + _VALID_URL = r'https://www\.lecturio\.de/[^/]+/(?P[^/?#&]+)\.vortrag' + + _TESTS = [{ + 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', + 'only_matching': True, + }] + + _API_BASE_URL = 'https://lecturio.de/api/de/latest/html5/' + _LOGIN_URL = 'https://www.lecturio.de/anmelden.html' -class LecturioCourseIE(LecturioBaseIE): + +class LecturioCourseIE(LecturioIE): _VALID_URL = r'https://app\.lecturio\.com/(?:[^/]+/(?P[^/?#&]+)\.course|(?:#/)?course/c/(?P\d+))' _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/', @@ -217,12 +209,12 @@ class LecturioCourseIE(LecturioBaseIE): clean_html(course.get('description'))) -class LecturioDeCourseIE(LecturioBaseIE): +class LecturioDeCourseIE(LecturioDeIE): _VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P[^/?#&]+)\.kurs' - _TEST = { + _TESTS = [{ 'url': 'https://www.lecturio.de/jura/grundrechte.kurs', 'only_matching': True, - } + }] def _real_extract(self, url): display_id = self._match_id(url) From 8dcf87c769c4ea18b9318c425351f289fe956227 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 31 May 2024 04:12:46 +0000 Subject: [PATCH 3/7] Fixes --- yt_dlp/extractor/lecturio.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 08f45cca6a..9ee864731c 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -17,9 +17,7 @@ from ..utils import ( class LecturioBaseIE(InfoExtractor): _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/' _LOGIN_URL = 'https://app.lecturio.com/en/login' - _NETRC_MACHINE = 'lecturio' - _CC_LANGS = { 'Arabic': 'ar', 'Bulgarian': 'bg', @@ -146,7 +144,6 @@ class LecturioBaseIE(InfoExtractor): class LecturioIE(LecturioBaseIE): _VALID_URL = r'https?://app\.lecturio\.com/([^/?#]+/(?P[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P\d+))' - _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', 'md5': '9a42cf1d8282a6311bf7211bbde26fde', @@ -161,15 +158,12 @@ class LecturioIE(LecturioBaseIE): 'only_matching': True, }] - -class LecturioDeIE(LecturioIE): - _VALID_URL = r'https?://www\.lecturio\.de/[^/?#]+/(?P[^/?#&]+)\.vortrag' - +class LecturioDeIE(LecturioBaseIE): + _VALID_URL = r'https?://www\.lecturio\.de/[^/?#]+/(?P)(?P[^/?#&]+)\.vortrag' _TESTS = [{ 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', 'only_matching': True, }] - _API_BASE_URL = 'https://lecturio.de/api/de/latest/html5/' _LOGIN_URL = 'https://www.lecturio.de/anmelden.html' @@ -220,7 +214,6 @@ class LecturioDeCourseIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) entries = [] @@ -230,7 +223,7 @@ class LecturioDeCourseIE(InfoExtractor): lecture_url = urljoin(url, mobj.group('url')) lecture_id = mobj.group('id') entries.append(self.url_result( - lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id)) + lecture_url, LecturioDeIE, video_id=lecture_id)) title = self._search_regex( r']*>([^<]+)', webpage, 'title', default=None) From 9d8933d4d7813e9d4947c1e22f71203746936018 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 31 May 2024 04:13:13 +0000 Subject: [PATCH 4/7] cleanup --- yt_dlp/extractor/lecturio.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 9ee864731c..4927641995 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -33,7 +33,6 @@ class LecturioBaseIE(InfoExtractor): } def _perform_login(self, username, password): - # Sets some cookies _, urlh = self._download_webpage_handle( self._LOGIN_URL, None, 'Downloading login popup') From 111dd1b4a896d4f8e811dccce2ee0715adc30654 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 31 May 2024 04:14:17 +0000 Subject: [PATCH 5/7] tests formatting --- yt_dlp/extractor/lecturio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 4927641995..5e59493c63 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -206,7 +206,7 @@ class LecturioCourseIE(LecturioBaseIE): class LecturioDeCourseIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?lecturio\.de/[^/?#]+/(?P[^/?#&]+)\.kurs' - _TEST = { + _TESTS = [{ 'url': 'https://www.lecturio.de/jura/grundrechte.kurs', 'only_matching': True, }] From 0b6d13c28978b38bca44a04fa3df015fbac969d9 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 31 May 2024 04:15:43 +0000 Subject: [PATCH 6/7] formatting --- yt_dlp/extractor/lecturio.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 5e59493c63..e88a21d515 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -157,6 +157,7 @@ class LecturioIE(LecturioBaseIE): 'only_matching': True, }] + class LecturioDeIE(LecturioBaseIE): _VALID_URL = r'https?://www\.lecturio\.de/[^/?#]+/(?P)(?P[^/?#&]+)\.vortrag' _TESTS = [{ @@ -213,6 +214,7 @@ class LecturioDeCourseIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) entries = [] From a20320f816006b1cff1997db4a39c4c4816785ee Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 27 Jun 2025 16:46:52 +0000 Subject: [PATCH 7/7] cause merge conflict --- yt_dlp/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/version.py b/yt_dlp/version.py index a90b288c9a..13100a5a6b 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,6 +1,6 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2024.05.27' +__version__ = '2024.05.27' # cause merge conflict RELEASE_GIT_HEAD = '12b248ce60be1aa1362edd839d915bba70dbee4b'