diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index 3fdfb5ff4e..63ed9515aa 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -1,3 +1,5 @@ +import urllib.parse + from .common import InfoExtractor from ..utils import ( UnsupportedError, @@ -13,6 +15,7 @@ from ..utils import ( unsmuggle_url, url_or_none, urlencode_postdata, + value, ) from ..utils.traversal import traverse_obj @@ -184,7 +187,7 @@ class BlackboardCollaborateLaunchIE(InfoExtractor): class BlackboardClassCollaborateIE(InfoExtractor): - _VALID_URL = r'https?://(?P[a-z]+)-lti\.bbcollab\.com/collab/ui/scheduler/lti\?token=[\w%\.\-=]+' + _VALID_URL = r'https?://(?P[a-z]+)-lti\.bbcollab\.com/collab/ui/scheduler/lti' _TESTS = [ { @@ -193,13 +196,20 @@ class BlackboardClassCollaborateIE(InfoExtractor): }, ] - def _call_api(self, region, video_id='', api_call='', token=None, note='Downloading JSON metadata', fatal=True): - if video_id == '': - channel_id = jwt_decode_hs256(token)['context'] - - return self._download_json(f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id + api_call}', - video_id or channel_id, note=note, - headers={'Authorization': f'Bearer {token}'} if token else '', fatal=fatal) + def _call_api(self, region, video_id=None, path=None, token=None, note='Downloading JSON metadata', fatal=True): + return self._download_json( + f'https://{region}.bbcollab.com/collab/api/csa/recordings/{join_nonempty(video_id, path, delim="/")}', + video_id or jwt_decode_hs256(token)['context'], note=note, fatal=fatal, + headers={'Authorization': f'Bearer {token}'} if token else '') + + def _entries(self, data, region, token): + for item in traverse_obj(data, ('results', ...)): + yield self.url_result(self._call_api(region, item['id'], 'url', token)['url'], BlackboardCollaborateLaunchIE, **traverse_obj(item, { + 'id': ('id', {str_or_none}), + 'view_count': ('playbackCount', {int_or_none}), + 'duration': ('duration', {int_or_none(scale=1000)}), + 'availability': ('publicLinkAllowed', {self._parse_availability}), + })) def _parse_availability(self, public_link_allowed): if public_link_allowed: @@ -210,45 +220,35 @@ class BlackboardClassCollaborateIE(InfoExtractor): def _real_extract(self, url): url, data = unsmuggle_url(url, {}) region = self._match_valid_url(url)['region'] - token = parse_qs(url).get('token') + token = parse_qs(url).get('token')[-1] playlist_info = self._call_api(region, token=token, note='Downloading playlist information') - entries = traverse_obj(playlist_info, ('results', ..., { - 'id': ('id', {str_or_none}), - 'view_count': ('playbackCount', {int_or_none}), - 'duration': ('duration', {int_or_none(scale=1000)}), - 'availability': ('publicLinkAllowed', {self._parse_availability}), - })) - - for i in entries: - i['_type'] = 'url' - i['url'] = self._call_api(region, i['id'], '/url', token)['url'] - i['ie_key'] = BlackboardCollaborateLaunchIE.ie_key() return self.playlist_result( - entries, - playlist_count=playlist_info['size'], - title=data.get('title'), - alt_title=data.get('alt_title'), - description=data.get('description'), - modified_timestamp=data.get('modified_timestamp'), - channel_id=data.get('channel_id')) + entries=self._entries(playlist_info, region, token), + playlist_count=playlist_info.get('size'), + **data, + ) class BlackboardCollaborateUltraSingleCourseIE(InfoExtractor): - # Support format of either host/webapps/collab-ultra/tool/collabultra?course_id=course_id or - # host/webapps/collab-ultra/tool/collabultra/lti/launch?course_id=course_id - # host/webapps/blackboard/execute/courseMain?course_id=course_id - # host/ultra/courses/course_id/cl/outline + """ + Match various URL formats including: + * host/webapps/collab-ultra/tool/collabultra?course_id=course_id or + * host/webapps/collab-ultra/tool/collabultra/lti/launch?course_id=course_id + * host/webapps/blackboard/execute/courseMain?course_id=course_id + * host/ultra/courses/course_id/cl/outline + * host/webapps/blackboard/execute/announcement?course_id=course_id + * host/webapps/blackboard/content/listContent.jsp?course_id=course_id + """ _VALID_URL = r'''(?x) - https://(?P[\w\.]+)/(?: + https://[\w\.]+/(?: (?:webapps/ - (?:collab-ultra/tool/collabultra(?:/lti/launch)? - |blackboard/execute/courseMain) - \?[\w\d_=&]*course_id=(?P[\d_]+)) - |(?:ultra/courses/(?P[\d_]+)/cl/outline))''' + (?:collab-ultra/tool/collabultra(?:/lti/launch)? + |blackboard/(?:execute/(?:courseMain|announcement)|content/listContent.jsp))) + |(?:ultra/courses/(?P[\d_]+)/cl/outline))''' - _TESTS = [ # All Require a login + _TESTS = [ { 'url': 'https://umb.umassonline.net/webapps/collab-ultra/tool/collabultra/lti/launch?course_id=_70544_1', 'only_matching': True, @@ -265,38 +265,52 @@ class BlackboardCollaborateUltraSingleCourseIE(InfoExtractor): 'url': 'https://blackboard.salford.ac.uk/ultra/courses/_175809_1/cl/outline', 'only_matching': True, }, + { + 'url': 'https://blackboard.example.com/webapps/blackboard/execute/announcement?method=search&context=course_entry&course_id=_123456_1', + 'only_matching': True, + }, + { + 'url': 'https://vuws.westernsydney.edu.au/webapps/blackboard/content/listContent.jsp?course_id= _41005_1&content_id=_7747469_1', + 'only_matching': True, + }, ] def _real_extract(self, url): - mobj = self._match_valid_url(url) - course_id = mobj.group('course_id') or mobj.group('course_id2') - host = mobj.group('host') + course_id = parse_qs(url).get('course_id')[-1] or self._match_valid_url(url)['course_id'] + host = urllib.parse.urlparse(url).hostname course_data = self._download_webpage( - f'https://{host}/webapps/collab-ultra/tool/collabultra/lti/launch?course_id={course_id}', course_id, 'Downloading course data') + f'https://{host}/webapps/collab-ultra/tool/collabultra/lti/launch?course_id={course_id}', + course_id, note='Downloading course data') attrs = self._hidden_inputs(course_data) endpoint = self._html_search_regex(r']+action="([^"]+)"', course_data, 'form_action') - redirect_url = self._request_webpage(endpoint, course_id, 'Getting authentication token', data=urlencode_postdata(attrs)).url - course_info = self._download_json(f'https://{host}/learn/api/v1/courses/{course_id}', - course_id, 'Downloading extra metadata', fatal=False) + # Ref: https://developer.blackboard.com/portal/displayApi + course_info = self._download_json(f'https://{host}/learn/api/v1/courses/{course_id}', course_id, + note='Downloading extra metadata', fatal=False) - return self.url_result(smuggle_url(redirect_url, { - 'title': course_info.get('displayName'), - 'alt_title': course_info.get('displayId'), # Could also use courseId - 'description': course_info.get('description'), - 'modified_timestamp': parse_iso8601(course_info.get('modifiedDate')), - 'channel_id': course_id, - }), ie=BlackboardClassCollaborateIE.ie_key(), video_id=None) + if self.suitable(redirect_url): + raise UnsupportedError(redirect_url) + + return self.url_result(smuggle_url( + redirect_url, + traverse_obj(course_info, { + 'title': (('name', 'displayName'), {str}, any), + 'id': (('id', 'displayId', 'courseId'), {str}, any), + 'url': (('guestAccessUrl', 'externalAccessUrl', {value(join_nonempty('https://', host, course_info['homePageUrl'], delim=''))}), {url_or_none}, any), + 'description': ('description', {str}), + 'modified_timestamp': ('modifiedDate', {parse_iso8601}), + }), + ), ie=BlackboardClassCollaborateIE, video_id=None) class BlackboardCollaborateUltraAllCoursesIE(InfoExtractor): - _VALID_URL = r'https://(?P[\w\.]+)/ultra/institution-page' + _VALID_URL = r'https://[\w\.]+/ultra/institution-page' - _TESTS = [ # All Require a login + _TESTS = [ { 'url': 'https://umb.umassonline.net/ultra/institution-page', 'only_matching': True, @@ -315,28 +329,38 @@ class BlackboardCollaborateUltraAllCoursesIE(InfoExtractor): }, ] + def _download_course_list(self, host, offset): + # Ref: https://developer.blackboard.com/portal/displayApi + return self._download_json( + f'https://{host}/learn/api/v1/users/me/memberships?fields=course&includeCount=true&offset={offset}', + video_id=None, note='Finding courses') + + def _entries(self, data, host): + for item in traverse_obj(data, (..., 'course')): + if item['isAvailable']: + yield self.url_result( + ie=BlackboardCollaborateUltraSingleCourseIE, + **traverse_obj(item, { + 'id': ('id', {str}, any), + 'display_id': ('courseId', 'displayId', {str}, any), + 'description': ('description', {str}), + 'title': ('displayName', 'name', {str}, any), + 'availability': ('isAllowGuests', {bool}, any), + 'url': ('guestAccessUrl', 'externalAccessUrl', + {value(join_nonempty('https://', host, item['homePageUrl'], delim=''))}, + {url_or_none}, any), + })) + def _real_extract(self, url): - host = self._match_valid_url(url)['host'] - endpoint = f'https://{host}/learn/api/v1/users/me/memberships?fields=course&includeCount=true' - number_of_courses, courses_found, user_id, entries = 1, 0, None, [] + host = urllib.parse.urlparse(url).hostname + first_page = self._download_course_list(host, 0) + results = first_page['results'] + number_of_courses = traverse_obj(first_page, ('paging', 'count')) + courses_found = number_of_courses - # Number of results per page seems to depend on the host and while it can be changed by '&limit=', each host seems to have a different upperbound, so a loop might be better while number_of_courses > courses_found: - current_page = self._download_json(f'{endpoint}&offset={courses_found}', user_id, - 'Finding courses') - number_of_courses = traverse_obj(current_page, ('paging', 'count')) - user_id = traverse_obj(current_page, ('results', '0', 'userId')) + current_page = self._download_course_list(host, number_of_courses) + results.append(*current_page['results']) courses_found += len(current_page['results']) - for current_course in traverse_obj(current_page, ('results', ..., 'course')): - if current_course['isAvailable']: - entries.append({ - 'id': current_course.get('id'), - 'title': current_course.get('displayName'), - 'alt_title': current_course.get('displayId'), - '_type': 'url', - 'url': current_course.get('externalAccessUrl') or f"{host}/{current_course.get('homePageUrl')}", - 'ie_key': BlackboardCollaborateUltraSingleCourseIE.ie_key(), - }) - - return self.playlist_result(entries) + return self.playlist_result(self._entries(results, host))