[ie/blackboardcollaborate] Support more URl formats and cleanup ClassCollaborate and CollaborateUltra extractors

pull/13760/head
flanter21 1 month ago
parent b4a3632666
commit 6f9fae2337

@ -1,3 +1,5 @@
import urllib.parse
from .common import InfoExtractor
from ..utils import (
UnsupportedError,
@ -13,6 +15,7 @@ from ..utils import (
unsmuggle_url,
url_or_none,
urlencode_postdata,
value,
)
from ..utils.traversal import traverse_obj
@ -184,7 +187,7 @@ class BlackboardCollaborateLaunchIE(InfoExtractor):
class BlackboardClassCollaborateIE(InfoExtractor):
_VALID_URL = r'https?://(?P<region>[a-z]+)-lti\.bbcollab\.com/collab/ui/scheduler/lti\?token=[\w%\.\-=]+'
_VALID_URL = r'https?://(?P<region>[a-z]+)-lti\.bbcollab\.com/collab/ui/scheduler/lti'
_TESTS = [
{
@ -193,13 +196,20 @@ class BlackboardClassCollaborateIE(InfoExtractor):
},
]
def _call_api(self, region, video_id='', api_call='', token=None, note='Downloading JSON metadata', fatal=True):
if video_id == '':
channel_id = jwt_decode_hs256(token)['context']
return self._download_json(f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id + api_call}',
video_id or channel_id, note=note,
headers={'Authorization': f'Bearer {token}'} if token else '', fatal=fatal)
def _call_api(self, region, video_id=None, path=None, token=None, note='Downloading JSON metadata', fatal=True):
return self._download_json(
f'https://{region}.bbcollab.com/collab/api/csa/recordings/{join_nonempty(video_id, path, delim="/")}',
video_id or jwt_decode_hs256(token)['context'], note=note, fatal=fatal,
headers={'Authorization': f'Bearer {token}'} if token else '')
def _entries(self, data, region, token):
for item in traverse_obj(data, ('results', ...)):
yield self.url_result(self._call_api(region, item['id'], 'url', token)['url'], BlackboardCollaborateLaunchIE, **traverse_obj(item, {
'id': ('id', {str_or_none}),
'view_count': ('playbackCount', {int_or_none}),
'duration': ('duration', {int_or_none(scale=1000)}),
'availability': ('publicLinkAllowed', {self._parse_availability}),
}))
def _parse_availability(self, public_link_allowed):
if public_link_allowed:
@ -210,45 +220,35 @@ class BlackboardClassCollaborateIE(InfoExtractor):
def _real_extract(self, url):
url, data = unsmuggle_url(url, {})
region = self._match_valid_url(url)['region']
token = parse_qs(url).get('token')
token = parse_qs(url).get('token')[-1]
playlist_info = self._call_api(region, token=token, note='Downloading playlist information')
entries = traverse_obj(playlist_info, ('results', ..., {
'id': ('id', {str_or_none}),
'view_count': ('playbackCount', {int_or_none}),
'duration': ('duration', {int_or_none(scale=1000)}),
'availability': ('publicLinkAllowed', {self._parse_availability}),
}))
for i in entries:
i['_type'] = 'url'
i['url'] = self._call_api(region, i['id'], '/url', token)['url']
i['ie_key'] = BlackboardCollaborateLaunchIE.ie_key()
return self.playlist_result(
entries,
playlist_count=playlist_info['size'],
title=data.get('title'),
alt_title=data.get('alt_title'),
description=data.get('description'),
modified_timestamp=data.get('modified_timestamp'),
channel_id=data.get('channel_id'))
entries=self._entries(playlist_info, region, token),
playlist_count=playlist_info.get('size'),
**data,
)
class BlackboardCollaborateUltraSingleCourseIE(InfoExtractor):
# Support format of either host/webapps/collab-ultra/tool/collabultra?course_id=course_id or
# host/webapps/collab-ultra/tool/collabultra/lti/launch?course_id=course_id
# host/webapps/blackboard/execute/courseMain?course_id=course_id
# host/ultra/courses/course_id/cl/outline
"""
Match various URL formats including:
* host/webapps/collab-ultra/tool/collabultra?course_id=course_id or
* host/webapps/collab-ultra/tool/collabultra/lti/launch?course_id=course_id
* host/webapps/blackboard/execute/courseMain?course_id=course_id
* host/ultra/courses/course_id/cl/outline
* host/webapps/blackboard/execute/announcement?course_id=course_id
* host/webapps/blackboard/content/listContent.jsp?course_id=course_id
"""
_VALID_URL = r'''(?x)
https://(?P<host>[\w\.]+)/(?:
https://[\w\.]+/(?:
(?:webapps/
(?:collab-ultra/tool/collabultra(?:/lti/launch)?
|blackboard/execute/courseMain)
\?[\w\d_=&]*course_id=(?P<course_id>[\d_]+))
|(?:ultra/courses/(?P<course_id2>[\d_]+)/cl/outline))'''
(?:collab-ultra/tool/collabultra(?:/lti/launch)?
|blackboard/(?:execute/(?:courseMain|announcement)|content/listContent.jsp)))
|(?:ultra/courses/(?P<course_id>[\d_]+)/cl/outline))'''
_TESTS = [ # All Require a login
_TESTS = [
{
'url': 'https://umb.umassonline.net/webapps/collab-ultra/tool/collabultra/lti/launch?course_id=_70544_1',
'only_matching': True,
@ -265,38 +265,52 @@ class BlackboardCollaborateUltraSingleCourseIE(InfoExtractor):
'url': 'https://blackboard.salford.ac.uk/ultra/courses/_175809_1/cl/outline',
'only_matching': True,
},
{
'url': 'https://blackboard.example.com/webapps/blackboard/execute/announcement?method=search&context=course_entry&course_id=_123456_1',
'only_matching': True,
},
{
'url': 'https://vuws.westernsydney.edu.au/webapps/blackboard/content/listContent.jsp?course_id= _41005_1&content_id=_7747469_1',
'only_matching': True,
},
]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
course_id = mobj.group('course_id') or mobj.group('course_id2')
host = mobj.group('host')
course_id = parse_qs(url).get('course_id')[-1] or self._match_valid_url(url)['course_id']
host = urllib.parse.urlparse(url).hostname
course_data = self._download_webpage(
f'https://{host}/webapps/collab-ultra/tool/collabultra/lti/launch?course_id={course_id}', course_id, 'Downloading course data')
f'https://{host}/webapps/collab-ultra/tool/collabultra/lti/launch?course_id={course_id}',
course_id, note='Downloading course data')
attrs = self._hidden_inputs(course_data)
endpoint = self._html_search_regex(r'<form[^>]+action="([^"]+)"', course_data, 'form_action')
redirect_url = self._request_webpage(endpoint, course_id, 'Getting authentication token',
data=urlencode_postdata(attrs)).url
course_info = self._download_json(f'https://{host}/learn/api/v1/courses/{course_id}',
course_id, 'Downloading extra metadata', fatal=False)
# Ref: https://developer.blackboard.com/portal/displayApi
course_info = self._download_json(f'https://{host}/learn/api/v1/courses/{course_id}', course_id,
note='Downloading extra metadata', fatal=False)
return self.url_result(smuggle_url(redirect_url, {
'title': course_info.get('displayName'),
'alt_title': course_info.get('displayId'), # Could also use courseId
'description': course_info.get('description'),
'modified_timestamp': parse_iso8601(course_info.get('modifiedDate')),
'channel_id': course_id,
}), ie=BlackboardClassCollaborateIE.ie_key(), video_id=None)
if self.suitable(redirect_url):
raise UnsupportedError(redirect_url)
return self.url_result(smuggle_url(
redirect_url,
traverse_obj(course_info, {
'title': (('name', 'displayName'), {str}, any),
'id': (('id', 'displayId', 'courseId'), {str}, any),
'url': (('guestAccessUrl', 'externalAccessUrl', {value(join_nonempty('https://', host, course_info['homePageUrl'], delim=''))}), {url_or_none}, any),
'description': ('description', {str}),
'modified_timestamp': ('modifiedDate', {parse_iso8601}),
}),
), ie=BlackboardClassCollaborateIE, video_id=None)
class BlackboardCollaborateUltraAllCoursesIE(InfoExtractor):
_VALID_URL = r'https://(?P<host>[\w\.]+)/ultra/institution-page'
_VALID_URL = r'https://[\w\.]+/ultra/institution-page'
_TESTS = [ # All Require a login
_TESTS = [
{
'url': 'https://umb.umassonline.net/ultra/institution-page',
'only_matching': True,
@ -315,28 +329,38 @@ class BlackboardCollaborateUltraAllCoursesIE(InfoExtractor):
},
]
def _download_course_list(self, host, offset):
# Ref: https://developer.blackboard.com/portal/displayApi
return self._download_json(
f'https://{host}/learn/api/v1/users/me/memberships?fields=course&includeCount=true&offset={offset}',
video_id=None, note='Finding courses')
def _entries(self, data, host):
for item in traverse_obj(data, (..., 'course')):
if item['isAvailable']:
yield self.url_result(
ie=BlackboardCollaborateUltraSingleCourseIE,
**traverse_obj(item, {
'id': ('id', {str}, any),
'display_id': ('courseId', 'displayId', {str}, any),
'description': ('description', {str}),
'title': ('displayName', 'name', {str}, any),
'availability': ('isAllowGuests', {bool}, any),
'url': ('guestAccessUrl', 'externalAccessUrl',
{value(join_nonempty('https://', host, item['homePageUrl'], delim=''))},
{url_or_none}, any),
}))
def _real_extract(self, url):
host = self._match_valid_url(url)['host']
endpoint = f'https://{host}/learn/api/v1/users/me/memberships?fields=course&includeCount=true'
number_of_courses, courses_found, user_id, entries = 1, 0, None, []
host = urllib.parse.urlparse(url).hostname
first_page = self._download_course_list(host, 0)
results = first_page['results']
number_of_courses = traverse_obj(first_page, ('paging', 'count'))
courses_found = number_of_courses
# Number of results per page seems to depend on the host and while it can be changed by '&limit=', each host seems to have a different upperbound, so a loop might be better
while number_of_courses > courses_found:
current_page = self._download_json(f'{endpoint}&offset={courses_found}', user_id,
'Finding courses')
number_of_courses = traverse_obj(current_page, ('paging', 'count'))
user_id = traverse_obj(current_page, ('results', '0', 'userId'))
current_page = self._download_course_list(host, number_of_courses)
results.append(*current_page['results'])
courses_found += len(current_page['results'])
for current_course in traverse_obj(current_page, ('results', ..., 'course')):
if current_course['isAvailable']:
entries.append({
'id': current_course.get('id'),
'title': current_course.get('displayName'),
'alt_title': current_course.get('displayId'),
'_type': 'url',
'url': current_course.get('externalAccessUrl') or f"{host}/{current_course.get('homePageUrl')}",
'ie_key': BlackboardCollaborateUltraSingleCourseIE.ie_key(),
})
return self.playlist_result(entries)
return self.playlist_result(self._entries(results, host))

Loading…
Cancel
Save