From 29f63c96720caa4272ad79aaedc4d436e4a7976a Mon Sep 17 00:00:00 2001 From: Mevious Date: Tue, 22 Jun 2021 18:57:53 -0700 Subject: [PATCH] [funimation] Extract subtitles (#434) Closes #420, https://github.com/ytdl-org/youtube-dl/issues/25645 Related: https://github.com/ytdl-org/youtube-dl/pull/24906 Authored by: Mevious --- yt_dlp/extractor/funimation.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index d8f1e169a..9ec1627fa 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -10,8 +10,9 @@ from ..utils import ( determine_ext, int_or_none, js_to_json, + urlencode_postdata, + urljoin, ExtractorError, - urlencode_postdata ) @@ -109,6 +110,7 @@ class FunimationIE(InfoExtractor): if series: title = '%s - %s' % (series, title) description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True) + subtitles = self.extract_subtitles(url, video_id, display_id) try: headers = {} @@ -153,6 +155,24 @@ class FunimationIE(InfoExtractor): 'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')), 'episode_number': int_or_none(title_data.get('episodeNum')), 'episode': episode, + 'subtitles': subtitles, 'season_id': title_data.get('seriesId'), 'formats': formats, } + + def _get_subtitles(self, url, video_id, display_id): + player_url = urljoin(url, '/player/' + video_id) + player_page = self._download_webpage(player_url, display_id) + text_tracks_json_string = self._search_regex( + r'"textTracks": (\[{.+?}\])', + player_page, 'subtitles data', default='') + text_tracks = self._parse_json( + text_tracks_json_string, display_id, js_to_json, fatal=False) or [] + subtitles = {} + for text_track in text_tracks: + url_element = {'url': text_track.get('src')} + language = text_track.get('language') + if text_track.get('type') == 'CC': + language += '_CC' + subtitles.setdefault(language, []).append(url_element) + return subtitles