Hacky workaround for segments that can't be played independently

mike/bad-segment-hack
Mike Lang 1 year ago
parent 590e056f7a
commit aa89c358da

@ -33,7 +33,7 @@ def unpadded_b64_decode(s):
class SegmentInfo( class SegmentInfo(
namedtuple('SegmentInfoBase', [ namedtuple('SegmentInfoBase', [
'path', 'channel', 'quality', 'start', 'duration', 'type', 'hash' 'paths', 'channel', 'quality', 'start', 'duration', 'type', 'hash'
]) ])
): ):
"""Info parsed from a segment path, including original path. """Info parsed from a segment path, including original path.
@ -45,6 +45,9 @@ class SegmentInfo(
def is_partial(self): def is_partial(self):
"""Note that suspect is considered partial""" """Note that suspect is considered partial"""
return self.type != "full" return self.type != "full"
@property
def path(self):
return self.paths[0]
def parse_segment_timestamp(hour_str, min_str): def parse_segment_timestamp(hour_str, min_str):
@ -84,7 +87,7 @@ def parse_segment_path(path):
hash = None if type == 'temp' else unpadded_b64_decode(hash) hash = None if type == 'temp' else unpadded_b64_decode(hash)
start = None if hour is None else parse_segment_timestamp(hour, time) start = None if hour is None else parse_segment_timestamp(hour, time)
return SegmentInfo( return SegmentInfo(
path = path, paths = (path,),
channel = channel, channel = channel,
quality = quality, quality = quality,
start = start, start = start,
@ -101,12 +104,26 @@ class ContainsHoles(Exception):
"""Raised by get_best_segments() when a hole is found and allow_holes is False""" """Raised by get_best_segments() when a hole is found and allow_holes is False"""
def merge_segments(a, b):
"""Extremely hacky thing that merges two SegmentInfos into one "segment" with two paths.
Assumes the segments have the same channel, quality, and no gap between them.
"""
TYPES = ["full", "suspect", "partial"]
return a._replace(
paths = a.paths + b.paths,
duration = a.duration + b.duration,
hash = None,
# Use the worst of the two types
type = TYPES[max(TYPES.index(a.type), TYPES.index(b.type))],
)
@timed( @timed(
hours_path=lambda ret, hours_path, *args, **kwargs: hours_path, hours_path=lambda ret, hours_path, *args, **kwargs: hours_path,
has_holes=lambda ret, *args, **kwargs: None in ret, has_holes=lambda ret, *args, **kwargs: None in ret,
normalize=lambda ret, *args, **kwargs: len([x for x in ret if x is not None]), normalize=lambda ret, *args, **kwargs: len([x for x in ret if x is not None]),
) )
def get_best_segments(hours_path, start, end, allow_holes=True): def get_best_segments(hours_path, start, end, allow_holes=True, no_hack=False):
"""Return a list of the best sequence of non-overlapping segments """Return a list of the best sequence of non-overlapping segments
we have for a given time range. Hours path should be the directory containing hour directories. we have for a given time range. Hours path should be the directory containing hour directories.
Time args start and end should be given as datetime objects. Time args start and end should be given as datetime objects.
@ -145,6 +162,7 @@ def get_best_segments(hours_path, start, end, allow_holes=True):
# to these types is exact, and all operations on them are exact, so all operations are exact. # to these types is exact, and all operations on them are exact, so all operations are exact.
result = [] result = []
prev_segment = None
for hour in hour_paths_for_range(hours_path, start, end): for hour in hour_paths_for_range(hours_path, start, end):
# Especially when processing multiple hours, this routine can take a signifigant amount # Especially when processing multiple hours, this routine can take a signifigant amount
@ -154,6 +172,21 @@ def get_best_segments(hours_path, start, end, allow_holes=True):
# best_segments_by_start will give us the best available segment for each unique start time # best_segments_by_start will give us the best available segment for each unique start time
for segment in best_segments_by_start(hour): for segment in best_segments_by_start(hour):
# === HACK ===
# We see segments in pairs of duration 5.2 and 3.133.
# The ones with duration 3.133 are not playable without the previous 5.2 segment.
# So if our segment is a 3.133, we instead combine it with the previous one
# and we treat them as an indivisible unit.
# ============
if (3.1 < segment.duration.total_seconds() < 3.2) and not no_hack:
if prev_segment is not None and prev_segment.end == segment.start:
segment = merge_segments(prev_segment, segment)
# If the segment we merged into was already in result, roll it back
# so we can re-run the logic with the new segment instead.
if result and result[-1] is prev_segment:
result.pop()
prev_segment = segment
# special case: first segment # special case: first segment
if not result: if not result:
@ -350,7 +383,7 @@ def ffmpeg_cut_segment(segment, cut_start=None, cut_end=None):
args = [ args = [
'ffmpeg', 'ffmpeg',
'-hide_banner', '-loglevel', 'error', # suppress noisy output '-hide_banner', '-loglevel', 'error', # suppress noisy output
'-i', segment.path, '-i', "-",
] ]
# output from ffprobe is generally already sorted but let's be paranoid, # output from ffprobe is generally already sorted but let's be paranoid,
# because the order of map args matters. # because the order of map args matters.
@ -374,7 +407,9 @@ def ffmpeg_cut_segment(segment, cut_start=None, cut_end=None):
args += ['-f', 'mpegts', '-'] args += ['-f', 'mpegts', '-']
# run it # run it
logging.info("Running segment cut with args: {}".format(" ".join(args))) logging.info("Running segment cut with args: {}".format(" ".join(args)))
return subprocess.Popen(args, stdout=subprocess.PIPE) proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
gevent.spawn(feed_input, [segment], proc.stdin)
return proc
def ffmpeg_cut_stdin(output_file, cut_start, duration, encode_args): def ffmpeg_cut_stdin(output_file, cut_start, duration, encode_args):
@ -439,9 +474,10 @@ def rough_cut_segments(segment_ranges, ranges):
for segment in segments: for segment in segments:
if segment is None: if segment is None:
continue continue
with open(segment.path, 'rb') as f: for path in segment.paths:
for chunk in read_chunks(f): with open(path, 'rb') as f:
yield chunk for chunk in read_chunks(f):
yield chunk
@timed('cut', cut_type='fast', normalize=lambda ret, sr, ranges: range_total(ranges)) @timed('cut', cut_type='fast', normalize=lambda ret, sr, ranges: range_total(ranges))
@ -569,9 +605,10 @@ def fast_cut_range(segments, start, end, fixts=None):
fixts.next() fixts.next()
else: else:
# no cutting needed, just serve the file # no cutting needed, just serve the file
with open(segment.path, 'rb') as f: for path in segment.paths:
for chunk in read_chunks(f): with open(path, 'rb') as f:
yield fixts.feed(chunk) if fixts else chunk for chunk in read_chunks(f):
yield fixts.feed(chunk) if fixts else chunk
if fixts: if fixts:
# check for errors and indicate range is finished # check for errors and indicate range is finished
fixts.next() fixts.next()
@ -581,13 +618,14 @@ def feed_input(segments, pipe):
"""Write each segment's data into the given pipe in order. """Write each segment's data into the given pipe in order.
This is used to provide input to ffmpeg in a full cut.""" This is used to provide input to ffmpeg in a full cut."""
for segment in segments: for segment in segments:
with open(segment.path, 'rb') as f: for path in segment.paths:
try: with open(path, 'rb') as f:
shutil.copyfileobj(f, pipe) try:
except OSError as e: shutil.copyfileobj(f, pipe)
# ignore EPIPE, as this just means the end cut meant we didn't need all it except OSError as e:
if e.errno != errno.EPIPE: # ignore EPIPE, as this just means the end cut meant we didn't need all it
raise if e.errno != errno.EPIPE:
raise
pipe.close() pipe.close()

@ -270,7 +270,7 @@ def generate_media_playlist(channel, quality):
# (not an error because someone might ask for a specific start, no end, but we ended up with # (not an error because someone might ask for a specific start, no end, but we ended up with
# end before start because that's the latest time we have) # end before start because that's the latest time we have)
if start < end: if start < end:
segments = get_best_segments(hours_path, start, end) segments = get_best_segments(hours_path, start, end, no_hack=True)
else: else:
# Note the None to indicate there was a "hole" at both start and end # Note the None to indicate there was a "hole" at both start and end
segments = [None] segments = [None]

Loading…
Cancel
Save