|
|
|
@ -1551,42 +1551,52 @@ class InfoExtractor(object):
|
|
|
|
|
|
|
|
|
|
def extract_multisegment_info(element, ms_parent_info):
|
|
|
|
|
ms_info = ms_parent_info.copy()
|
|
|
|
|
|
|
|
|
|
# As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
|
|
|
|
|
# common attributes and elements. We will only extract relevant
|
|
|
|
|
# for us.
|
|
|
|
|
def extract_common(source):
|
|
|
|
|
segment_timeline = source.find(_add_ns('SegmentTimeline'))
|
|
|
|
|
if segment_timeline is not None:
|
|
|
|
|
s_e = segment_timeline.findall(_add_ns('S'))
|
|
|
|
|
if s_e:
|
|
|
|
|
ms_info['total_number'] = 0
|
|
|
|
|
ms_info['s'] = []
|
|
|
|
|
for s in s_e:
|
|
|
|
|
r = int(s.get('r', 0))
|
|
|
|
|
ms_info['total_number'] += 1 + r
|
|
|
|
|
ms_info['s'].append({
|
|
|
|
|
't': int(s.get('t', 0)),
|
|
|
|
|
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
|
|
|
|
|
'd': int(s.attrib['d']),
|
|
|
|
|
'r': r,
|
|
|
|
|
})
|
|
|
|
|
start_number = source.get('startNumber')
|
|
|
|
|
if start_number:
|
|
|
|
|
ms_info['start_number'] = int(start_number)
|
|
|
|
|
timescale = source.get('timescale')
|
|
|
|
|
if timescale:
|
|
|
|
|
ms_info['timescale'] = int(timescale)
|
|
|
|
|
segment_duration = source.get('duration')
|
|
|
|
|
if segment_duration:
|
|
|
|
|
ms_info['segment_duration'] = int(segment_duration)
|
|
|
|
|
|
|
|
|
|
def extract_Initialization(source):
|
|
|
|
|
initialization = source.find(_add_ns('Initialization'))
|
|
|
|
|
if initialization is not None:
|
|
|
|
|
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
|
|
|
|
|
|
|
|
|
segment_list = element.find(_add_ns('SegmentList'))
|
|
|
|
|
if segment_list is not None:
|
|
|
|
|
extract_common(segment_list)
|
|
|
|
|
extract_Initialization(segment_list)
|
|
|
|
|
segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
|
|
|
|
|
if segment_urls_e:
|
|
|
|
|
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
|
|
|
|
|
initialization = segment_list.find(_add_ns('Initialization'))
|
|
|
|
|
if initialization is not None:
|
|
|
|
|
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
|
|
|
|
else:
|
|
|
|
|
segment_template = element.find(_add_ns('SegmentTemplate'))
|
|
|
|
|
if segment_template is not None:
|
|
|
|
|
start_number = segment_template.get('startNumber')
|
|
|
|
|
if start_number:
|
|
|
|
|
ms_info['start_number'] = int(start_number)
|
|
|
|
|
segment_timeline = segment_template.find(_add_ns('SegmentTimeline'))
|
|
|
|
|
if segment_timeline is not None:
|
|
|
|
|
s_e = segment_timeline.findall(_add_ns('S'))
|
|
|
|
|
if s_e:
|
|
|
|
|
ms_info['total_number'] = 0
|
|
|
|
|
ms_info['s'] = []
|
|
|
|
|
for s in s_e:
|
|
|
|
|
r = int(s.get('r', 0))
|
|
|
|
|
ms_info['total_number'] += 1 + r
|
|
|
|
|
ms_info['s'].append({
|
|
|
|
|
't': int(s.get('t', 0)),
|
|
|
|
|
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
|
|
|
|
|
'd': int(s.attrib['d']),
|
|
|
|
|
'r': r,
|
|
|
|
|
})
|
|
|
|
|
else:
|
|
|
|
|
timescale = segment_template.get('timescale')
|
|
|
|
|
if timescale:
|
|
|
|
|
ms_info['timescale'] = int(timescale)
|
|
|
|
|
segment_duration = segment_template.get('duration')
|
|
|
|
|
if segment_duration:
|
|
|
|
|
ms_info['segment_duration'] = int(segment_duration)
|
|
|
|
|
extract_common(segment_template)
|
|
|
|
|
media_template = segment_template.get('media')
|
|
|
|
|
if media_template:
|
|
|
|
|
ms_info['media_template'] = media_template
|
|
|
|
@ -1594,11 +1604,14 @@ class InfoExtractor(object):
|
|
|
|
|
if initialization:
|
|
|
|
|
ms_info['initialization_url'] = initialization
|
|
|
|
|
else:
|
|
|
|
|
initialization = segment_template.find(_add_ns('Initialization'))
|
|
|
|
|
if initialization is not None:
|
|
|
|
|
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
|
|
|
|
extract_Initialization(segment_template)
|
|
|
|
|
return ms_info
|
|
|
|
|
|
|
|
|
|
def combine_url(base_url, target_url):
|
|
|
|
|
if re.match(r'^https?://', target_url):
|
|
|
|
|
return target_url
|
|
|
|
|
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
|
|
|
|
|
|
|
|
|
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
|
|
|
|
formats = []
|
|
|
|
|
for period in mpd_doc.findall(_add_ns('Period')):
|
|
|
|
@ -1655,9 +1668,7 @@ class InfoExtractor(object):
|
|
|
|
|
}
|
|
|
|
|
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
|
|
|
|
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
|
|
|
|
if 'total_number' not in representation_ms_info and 'segment_duration':
|
|
|
|
|
segment_duration = float(representation_ms_info['segment_duration']) / float(representation_ms_info['timescale'])
|
|
|
|
|
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
|
|
|
|
|
|
|
|
|
media_template = representation_ms_info['media_template']
|
|
|
|
|
media_template = media_template.replace('$RepresentationID$', representation_id)
|
|
|
|
|
media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
|
|
|
|
@ -1666,7 +1677,11 @@ class InfoExtractor(object):
|
|
|
|
|
|
|
|
|
|
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
|
|
|
|
# can't be used at the same time
|
|
|
|
|
if '%(Number' in media_template:
|
|
|
|
|
if '%(Number' in media_template and 's' not in representation_ms_info:
|
|
|
|
|
segment_duration = None
|
|
|
|
|
if 'total_number' not in representation_ms_info and 'segment_duration':
|
|
|
|
|
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
|
|
|
|
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
|
|
|
|
representation_ms_info['segment_urls'] = [
|
|
|
|
|
media_template % {
|
|
|
|
|
'Number': segment_number,
|
|
|
|
@ -1675,28 +1690,65 @@ class InfoExtractor(object):
|
|
|
|
|
for segment_number in range(
|
|
|
|
|
representation_ms_info['start_number'],
|
|
|
|
|
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
|
|
|
|
representation_ms_info['fragments'] = [{
|
|
|
|
|
'url': media_template % {
|
|
|
|
|
'Number': segment_number,
|
|
|
|
|
'Bandwidth': representation_attrib.get('bandwidth'),
|
|
|
|
|
},
|
|
|
|
|
'duration': segment_duration,
|
|
|
|
|
} for segment_number in range(
|
|
|
|
|
representation_ms_info['start_number'],
|
|
|
|
|
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
|
|
|
|
else:
|
|
|
|
|
# $Number*$ or $Time$ in media template with S list available
|
|
|
|
|
# Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
|
|
|
|
|
# Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
|
|
|
|
|
representation_ms_info['segment_urls'] = []
|
|
|
|
|
representation_ms_info['fragments'] = []
|
|
|
|
|
segment_time = 0
|
|
|
|
|
segment_d = None
|
|
|
|
|
segment_number = representation_ms_info['start_number']
|
|
|
|
|
|
|
|
|
|
def add_segment_url():
|
|
|
|
|
representation_ms_info['segment_urls'].append(
|
|
|
|
|
media_template % {
|
|
|
|
|
'Time': segment_time,
|
|
|
|
|
'Bandwidth': representation_attrib.get('bandwidth'),
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
segment_url = media_template % {
|
|
|
|
|
'Time': segment_time,
|
|
|
|
|
'Bandwidth': representation_attrib.get('bandwidth'),
|
|
|
|
|
'Number': segment_number,
|
|
|
|
|
}
|
|
|
|
|
representation_ms_info['segment_urls'].append(segment_url)
|
|
|
|
|
representation_ms_info['fragments'].append({
|
|
|
|
|
'url': segment_url,
|
|
|
|
|
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
for num, s in enumerate(representation_ms_info['s']):
|
|
|
|
|
segment_time = s.get('t') or segment_time
|
|
|
|
|
segment_d = s['d']
|
|
|
|
|
add_segment_url()
|
|
|
|
|
segment_number += 1
|
|
|
|
|
for r in range(s.get('r', 0)):
|
|
|
|
|
segment_time += s['d']
|
|
|
|
|
segment_time += segment_d
|
|
|
|
|
add_segment_url()
|
|
|
|
|
segment_time += s['d']
|
|
|
|
|
segment_number += 1
|
|
|
|
|
segment_time += segment_d
|
|
|
|
|
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
|
|
|
|
|
# No media template
|
|
|
|
|
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
|
|
|
|
|
# or any YouTube dashsegments video
|
|
|
|
|
fragments = []
|
|
|
|
|
s_num = 0
|
|
|
|
|
for segment_url in representation_ms_info['segment_urls']:
|
|
|
|
|
s = representation_ms_info['s'][s_num]
|
|
|
|
|
for r in range(s.get('r', 0) + 1):
|
|
|
|
|
fragments.append({
|
|
|
|
|
'url': segment_url,
|
|
|
|
|
'duration': float_or_none(s['d'], representation_ms_info['timescale']),
|
|
|
|
|
})
|
|
|
|
|
representation_ms_info['fragments'] = fragments
|
|
|
|
|
if 'segment_urls' in representation_ms_info:
|
|
|
|
|
f.update({
|
|
|
|
|
'segment_urls': representation_ms_info['segment_urls'],
|
|
|
|
|
'fragments': [],
|
|
|
|
|
'protocol': 'http_dash_segments',
|
|
|
|
|
})
|
|
|
|
|
if 'initialization_url' in representation_ms_info:
|
|
|
|
@ -1706,6 +1758,10 @@ class InfoExtractor(object):
|
|
|
|
|
})
|
|
|
|
|
if not f.get('url'):
|
|
|
|
|
f['url'] = initialization_url
|
|
|
|
|
f['fragments'].append({'url': initialization_url})
|
|
|
|
|
f['fragments'].extend(representation_ms_info['fragments'])
|
|
|
|
|
for fragment in f['fragments']:
|
|
|
|
|
fragment['url'] = combine_url(base_url, fragment['url'])
|
|
|
|
|
try:
|
|
|
|
|
existing_format = next(
|
|
|
|
|
fo for fo in formats
|
|
|
|
|