diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py index ad7ab27e28..09f957394e 100644 --- a/yt_dlp/extractor/mediasite.py +++ b/yt_dlp/extractor/mediasite.py @@ -5,7 +5,9 @@ import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, + determine_ext, float_or_none, + join_nonempty, mimetype2ext, smuggle_url, str_or_none, @@ -268,7 +270,33 @@ class MediasiteIE(InfoExtractor): formats.extend(stream_formats) # XXX: Presentation['Presenters'] - # XXX: Presentation['Transcript'] + transcripts = presentation.get('Transcripts', []) + captions, subtitles = {}, {} + for transcript in transcripts: + lang_code = traverse_obj( + transcript, (('DetailedLanguageCode', 'LanguageCode'), {str}), get_all=False) or 'und' + lang_name = transcript.get('Language') + t = { + 'url': transcript.get('CaptionsUrl'), + 'name': lang_name, + } + if 'Auto-Generated' in lang_name: + captions.setdefault(lang_code, []).append(t) + else: + subtitles.setdefault(lang_code, []).append(t) + if transcript_url := url_or_none(presentation.get('TranscriptUrl')): + if 'playbackTicket=' not in transcript_url: + transcript_url = join_nonempty( + transcript_url, traverse_obj(presentation, ('Streams', 0, 'SlidePlaybackTicketId', {str})), + delim='?playbackTicket=') + if determine_ext(transcript_url) != 'txt': + if len(transcripts) == 1: + (captions or subtitles)[lang_code].insert(0, { + 'url': transcript_url, + 'name': lang_name, + }) + else: + subtitles.setdefault('und', []).insert(0, {'url': transcript_url}) return { 'id': resource_id, @@ -277,6 +305,8 @@ class MediasiteIE(InfoExtractor): 'duration': float_or_none(presentation.get('Duration'), 1000), 'timestamp': float_or_none(presentation.get('UnixTime'), 1000), 'formats': formats, + 'automatic_captions': captions, + 'subtitles': subtitles, 'thumbnails': thumbnails, }