From 7e1cf1a48d7b3ad2810dd0c870d3cd7dbcb4906a Mon Sep 17 00:00:00 2001 From: zubearc Date: Thu, 3 Sep 2020 05:08:41 -0400 Subject: [PATCH 1/2] support new json3 captions --- youtube_dl/extractor/youtube.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b35bf03aa..bb382849f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -549,7 +549,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, } - _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') + _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt', 'json3') _GEO_BYPASS = False @@ -1560,14 +1560,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): player_response, video_id, fatal=False) if player_response: renderer = player_response['captions']['playerCaptionsTracklistRenderer'] - base_url = renderer['captionTracks'][0]['baseUrl'] - sub_lang_list = [] - for lang in renderer['translationLanguages']: - lang_code = lang.get('languageCode') - if lang_code: - sub_lang_list.append(lang_code) - return make_captions(base_url, sub_lang_list) - + caption_tracks = renderer['captionTracks'] + for caption_track in caption_tracks: + if 'kind' not in caption_track: + # not an automatic transcription + continue + base_url = caption_track['baseUrl'] + sub_lang_list = [] + for lang in renderer['translationLanguages']: + lang_code = lang.get('languageCode') + if lang_code: + sub_lang_list.append(lang_code) + return make_captions(base_url, sub_lang_list) + + self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id) + return {} # Some videos don't provide ttsurl but rather caption_tracks and # caption_translation_languages (e.g. 20LmZk1hakA) # Does not used anymore as of 22.06.2017 From 8848f8083b23a48d07fa2edb95eee8c547b542d9 Mon Sep 17 00:00:00 2001 From: zubearc Date: Sat, 5 Sep 2020 17:54:19 -0400 Subject: [PATCH 2/2] [test/youtube] update subtitle tests --- test/test_subtitles.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 17aaaf20d..d9727c579 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -64,8 +64,8 @@ class TestYoutubeSubtitles(BaseTestSubtitles): self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles.keys()), 13) - self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06') - self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5') + self.assertEqual(md5(subtitles['en']), '688dd1ce0981683867e7fe6fde2a224b') + self.assertEqual(md5(subtitles['it']), '31324d30b8430b309f7f5979a504a769') for lang in ['fr', 'de']: self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) @@ -73,13 +73,13 @@ class TestYoutubeSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'ttml' subtitles = self.getSubtitles() - self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54') + self.assertEqual(md5(subtitles['en']), 'c97ddf1217390906fa9fbd34901f3da2') def test_youtube_subtitles_vtt_format(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'vtt' subtitles = self.getSubtitles() - self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06') + self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d') def test_youtube_automatic_captions(self): self.url = '8YoUxe5ncPo' @@ -88,9 +88,15 @@ class TestYoutubeSubtitles(BaseTestSubtitles): subtitles = self.getSubtitles() self.assertTrue(subtitles['it'] is not None) + def test_youtube_no_automatic_captions(self): + self.url = 'QRS8MkLhQmM' + self.DL.params['writeautomaticsub'] = True + subtitles = self.getSubtitles() + self.assertTrue(not subtitles) + def test_youtube_translated_subtitles(self): # This video has a subtitles track, which can be translated - self.url = 'Ky9eprVWzlI' + self.url = 'i0ZabxXmH4Y' self.DL.params['writeautomaticsub'] = True self.DL.params['subtitleslangs'] = ['it'] subtitles = self.getSubtitles()