[extractor/common, downloader/ism] Extract SSTR subtitle tracks

_parse_ism_formats was extended into _parse_ism_formats_and_subtitles;
all direct users were updated, though _extract_ism_formats was left
as a compatibility wrapper.

The SSTR downloader was also modified in order to prepare for muxing
subtitle streams, although no support for any subtitle codecs was
added in this commit.
pull/310/head
Felix S 4 years ago
parent 171e59edd4
commit fd76a14259

@ -48,7 +48,7 @@ def write_piff_header(stream, params):
language = params.get('language', 'und') language = params.get('language', 'und')
height = params.get('height', 0) height = params.get('height', 0)
width = params.get('width', 0) width = params.get('width', 0)
is_audio = width == 0 and height == 0 stream_type = params['stream_type']
creation_time = modification_time = int(time.time()) creation_time = modification_time = int(time.time())
ftyp_payload = b'isml' # major brand ftyp_payload = b'isml' # major brand
@ -77,7 +77,7 @@ def write_piff_header(stream, params):
tkhd_payload += u32.pack(0) * 2 # reserved tkhd_payload += u32.pack(0) * 2 # reserved
tkhd_payload += s16.pack(0) # layer tkhd_payload += s16.pack(0) # layer
tkhd_payload += s16.pack(0) # alternate group tkhd_payload += s16.pack(0) # alternate group
tkhd_payload += s88.pack(1 if is_audio else 0) # volume tkhd_payload += s88.pack(1 if stream_type == 'audio' else 0) # volume
tkhd_payload += u16.pack(0) # reserved tkhd_payload += u16.pack(0) # reserved
tkhd_payload += unity_matrix tkhd_payload += unity_matrix
tkhd_payload += u1616.pack(width) tkhd_payload += u1616.pack(width)
@ -93,19 +93,35 @@ def write_piff_header(stream, params):
mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box
hdlr_payload = u32.pack(0) # pre defined hdlr_payload = u32.pack(0) # pre defined
hdlr_payload += b'soun' if is_audio else b'vide' # handler type if stream_type == 'audio': # handler type
hdlr_payload += u32.pack(0) * 3 # reserved hdlr_payload += b'soun'
hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name hdlr_payload += u32.pack(0) * 3 # reserved
hdlr_payload += b'SoundHandler\0' # name
elif stream_type == 'video':
hdlr_payload += b'vide'
hdlr_payload += u32.pack(0) * 3 # reserved
hdlr_payload += b'VideoHandler\0' # name
elif stream_type == 'text':
hdlr_payload += b'subt'
hdlr_payload += u32.pack(0) * 3 # reserved
hdlr_payload += b'SubtitleHandler\0' # name
else:
assert False
mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box
if is_audio: if stream_type == 'audio':
smhd_payload = s88.pack(0) # balance smhd_payload = s88.pack(0) # balance
smhd_payload += u16.pack(0) # reserved smhd_payload += u16.pack(0) # reserved
media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
else: elif stream_type == 'video':
vmhd_payload = u16.pack(0) # graphics mode vmhd_payload = u16.pack(0) # graphics mode
vmhd_payload += u16.pack(0) * 3 # opcolor vmhd_payload += u16.pack(0) * 3 # opcolor
media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header
elif stream_type == 'text':
sthd_payload = u16.pack(0) * 2
media_header_box = full_box(b'sthd', 0, 1, sthd_payload) # Subtitle Media Header
else:
assert False
minf_payload = media_header_box minf_payload = media_header_box
dref_payload = u32.pack(1) # entry count dref_payload = u32.pack(1) # entry count
@ -117,7 +133,7 @@ def write_piff_header(stream, params):
sample_entry_payload = u8.pack(0) * 6 # reserved sample_entry_payload = u8.pack(0) * 6 # reserved
sample_entry_payload += u16.pack(1) # data reference index sample_entry_payload += u16.pack(1) # data reference index
if is_audio: if stream_type == 'audio':
sample_entry_payload += u32.pack(0) * 2 # reserved sample_entry_payload += u32.pack(0) * 2 # reserved
sample_entry_payload += u16.pack(params.get('channels', 2)) sample_entry_payload += u16.pack(params.get('channels', 2))
sample_entry_payload += u16.pack(params.get('bits_per_sample', 16)) sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
@ -127,7 +143,7 @@ def write_piff_header(stream, params):
if fourcc == 'AACL': if fourcc == 'AACL':
sample_entry_box = box(b'mp4a', sample_entry_payload) sample_entry_box = box(b'mp4a', sample_entry_payload)
else: elif stream_type == 'video':
sample_entry_payload += u16.pack(0) # pre defined sample_entry_payload += u16.pack(0) # pre defined
sample_entry_payload += u16.pack(0) # reserved sample_entry_payload += u16.pack(0) # reserved
sample_entry_payload += u32.pack(0) * 3 # pre defined sample_entry_payload += u32.pack(0) * 3 # pre defined
@ -155,6 +171,10 @@ def write_piff_header(stream, params):
avcc_payload += pps avcc_payload += pps
sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
else:
assert False
else:
assert False
stsd_payload += sample_entry_box stsd_payload += sample_entry_box
stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box

@ -2750,26 +2750,38 @@ class InfoExtractor(object):
else: else:
# Assuming direct URL to unfragmented media. # Assuming direct URL to unfragmented media.
f['url'] = base_url f['url'] = base_url
formats.append(f) if content_type in ('video', 'audio'):
formats.append(f)
elif content_type == 'text':
subtitles.setdefault(lang or 'und', []).append(f)
else: else:
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats, subtitles return formats, subtitles
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): def _extract_ism_formats(self, *args, **kwargs):
fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
if subs:
self.report_warning(bug_reports_message(
"Ignoring subtitle tracks found in the ISM manifest; "
"if any subtitle tracks are missing,"
))
return fmts
def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
res = self._download_xml_handle( res = self._download_xml_handle(
ism_url, video_id, ism_url, video_id,
note=note or 'Downloading ISM manifest', note=note or 'Downloading ISM manifest',
errnote=errnote or 'Failed to download ISM manifest', errnote=errnote or 'Failed to download ISM manifest',
fatal=fatal, data=data, headers=headers, query=query) fatal=fatal, data=data, headers=headers, query=query)
if res is False: if res is False:
return [] return [], {}
ism_doc, urlh = res ism_doc, urlh = res
if ism_doc is None: if ism_doc is None:
return [] return [], {}
return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id) return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None): def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
""" """
Parse formats from ISM manifest. Parse formats from ISM manifest.
References: References:
@ -2777,22 +2789,24 @@ class InfoExtractor(object):
https://msdn.microsoft.com/en-us/library/ff469518.aspx https://msdn.microsoft.com/en-us/library/ff469518.aspx
""" """
if ism_doc.get('IsLive') == 'TRUE': if ism_doc.get('IsLive') == 'TRUE':
return [] return [], {}
if (not self._downloader.params.get('allow_unplayable_formats') if (not self._downloader.params.get('allow_unplayable_formats')
and ism_doc.find('Protection') is not None): and ism_doc.find('Protection') is not None):
return [] return [], {}
duration = int(ism_doc.attrib['Duration']) duration = int(ism_doc.attrib['Duration'])
timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000 timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
formats = [] formats = []
subtitles = {}
for stream in ism_doc.findall('StreamIndex'): for stream in ism_doc.findall('StreamIndex'):
stream_type = stream.get('Type') stream_type = stream.get('Type')
if stream_type not in ('video', 'audio'): if stream_type not in ('video', 'audio', 'text'):
continue continue
url_pattern = stream.attrib['Url'] url_pattern = stream.attrib['Url']
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
stream_name = stream.get('Name') stream_name = stream.get('Name')
stream_language = stream.get('Language', 'und')
for track in stream.findall('QualityLevel'): for track in stream.findall('QualityLevel'):
fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None) fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
# TODO: add support for WVC1 and WMAP # TODO: add support for WVC1 and WMAP
@ -2839,33 +2853,52 @@ class InfoExtractor(object):
format_id.append(stream_name) format_id.append(stream_name)
format_id.append(compat_str(tbr)) format_id.append(compat_str(tbr))
formats.append({ if stream_type == 'text':
'format_id': '-'.join(format_id), subtitles.setdefault(stream_language, []).append({
'url': ism_url, 'ext': 'ismt',
'manifest_url': ism_url, 'protocol': 'ism',
'ext': 'ismv' if stream_type == 'video' else 'isma', 'url': ism_url,
'width': width, 'manifest_url': ism_url,
'height': height, 'fragments': fragments,
'tbr': tbr, '_download_params': {
'asr': sampling_rate, 'stream_type': stream_type,
'vcodec': 'none' if stream_type == 'audio' else fourcc, 'duration': duration,
'acodec': 'none' if stream_type == 'video' else fourcc, 'timescale': stream_timescale,
'protocol': 'ism', 'fourcc': fourcc,
'fragments': fragments, 'language': stream_language,
'_download_params': { 'codec_private_data': track.get('CodecPrivateData'),
'duration': duration, }
'timescale': stream_timescale, })
'width': width or 0, elif stream_type in ('video', 'audio'):
'height': height or 0, formats.append({
'fourcc': fourcc, 'format_id': '-'.join(format_id),
'codec_private_data': track.get('CodecPrivateData'), 'url': ism_url,
'sampling_rate': sampling_rate, 'manifest_url': ism_url,
'channels': int_or_none(track.get('Channels', 2)), 'ext': 'ismv' if stream_type == 'video' else 'isma',
'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)), 'width': width,
'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)), 'height': height,
}, 'tbr': tbr,
}) 'asr': sampling_rate,
return formats 'vcodec': 'none' if stream_type == 'audio' else fourcc,
'acodec': 'none' if stream_type == 'video' else fourcc,
'protocol': 'ism',
'fragments': fragments,
'_download_params': {
'stream_type': stream_type,
'duration': duration,
'timescale': stream_timescale,
'width': width or 0,
'height': height or 0,
'fourcc': fourcc,
'language': stream_language,
'codec_private_data': track.get('CodecPrivateData'),
'sampling_rate': sampling_rate,
'channels': int_or_none(track.get('Channels', 2)),
'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
},
})
return formats, subtitles
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None, quality=None): def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None, quality=None):
def absolute_url(item_url): def absolute_url(item_url):

Loading…
Cancel
Save