From a0c3b2d5cf4fe374a2b0119ea53b71c9a06aaee9 Mon Sep 17 00:00:00 2001 From: Felix S Date: Mon, 7 Nov 2016 15:45:42 +0100 Subject: [PATCH] [extractor/common] Extract HLS subtitle tracks _extract_m3u8_formats is renamed to _extract_m3u8_formats_and_subtitles and extended to handle subtitle tracks instead of skipping them; a wrapper with the old name is provided for compatibility. _parse_m3u8_formats is likewise renamed and extended, but without adding the compatibility wrapper; the test suite is adjusted to test the enhanced method instead. --- test/test_InfoExtractor.py | 8 +++--- yt_dlp/extractor/common.py | 53 +++++++++++++++++++++++++++----------- 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index a08616694..9e059723f 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -684,17 +684,19 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', - }] + }], + {} ), ] - for m3u8_file, m3u8_url, expected_formats in _TEST_CASES: + for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES: with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, mode='r', encoding='utf-8') as f: - formats = self.ie._parse_m3u8_formats( + formats, subs = self.ie._parse_m3u8_formats_and_subtitles( f.read(), m3u8_url, ext='mp4') self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) + expect_value(self, subs, expected_subs, None) def test_parse_mpd_formats(self): _TEST_CASES = [ diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index ee8a54b66..c67fb7bbf 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1879,11 +1879,21 @@ class InfoExtractor(object): 'format_note': 'Quality selection URL', } - def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, - entry_protocol='m3u8', preference=None, quality=None, - m3u8_id=None, note=None, errnote=None, - fatal=True, live=False, data=None, headers={}, - query={}): + def _extract_m3u8_formats(self, *args, **kwargs): + fmts, subs = self._extract_m3u8_formats_and_subtitles(*args, **kwargs) + if subs: + self.report_warning(bug_reports_message( + "Ignoring subtitle tracks found in the HLS manifest; " + "if any subtitle tracks are missing," + )) + return fmts + + def _extract_m3u8_formats_and_subtitles( + self, m3u8_url, video_id, ext=None, entry_protocol='m3u8', + preference=None, quality=None, m3u8_id=None, note=None, + errnote=None, fatal=True, live=False, data=None, headers={}, + query={}): + res = self._download_webpage_handle( m3u8_url, video_id, note=note or 'Downloading m3u8 information', @@ -1891,30 +1901,34 @@ class InfoExtractor(object): fatal=fatal, data=data, headers=headers, query=query) if res is False: - return [] + return [], {} m3u8_doc, urlh = res m3u8_url = urlh.geturl() - return self._parse_m3u8_formats( + return self._parse_m3u8_formats_and_subtitles( m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol, preference=preference, quality=quality, m3u8_id=m3u8_id, note=note, errnote=errnote, fatal=fatal, live=live, data=data, headers=headers, query=query, video_id=video_id) - def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None, - entry_protocol='m3u8', preference=None, quality=None, - m3u8_id=None, live=False, note=None, errnote=None, - fatal=True, data=None, headers={}, query={}, video_id=None): + def _parse_m3u8_formats_and_subtitles( + self, m3u8_doc, m3u8_url, ext=None, entry_protocol='m3u8', + preference=None, quality=None, m3u8_id=None, live=False, note=None, + errnote=None, fatal=True, data=None, headers={}, query={}, + video_id=None): + if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access - return [] + return [], {} if (not self._downloader.params.get('allow_unplayable_formats') and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)): # Apple FairPlay - return [] + return [], {} formats = [] + subtitles = {} + format_url = lambda u: ( u if re.match(r'^https?://', u) @@ -2001,7 +2015,7 @@ class InfoExtractor(object): } formats.append(f) - return formats + return formats, subtitles groups = {} last_stream_inf = {} @@ -2013,6 +2027,15 @@ class InfoExtractor(object): if not (media_type and group_id and name): return groups.setdefault(group_id, []).append(media) + # + if media_type == 'SUBTITLES': + lang = media['LANGUAGE'] # XXX: normalise? + url = format_url(media['URI']) + sub_info = { + 'url': url, + 'ext': determine_ext(url), + } + subtitles.setdefault(lang, []).append(sub_info) if media_type not in ('VIDEO', 'AUDIO'): return media_url = media.get('URI') @@ -2160,7 +2183,7 @@ class InfoExtractor(object): formats.append(http_f) last_stream_inf = {} - return formats + return formats, subtitles @staticmethod def _xpath_ns(path, namespace=None):