From 7179fcacecaeafa476e169463ca086ba1ee779f6 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Sun, 23 Jun 2019 21:30:12 -0700 Subject: [PATCH] Backfiller: ignore temp segments To make this work, we make type a proper segment field. We also tell get_best_segments to ignore temp segments, since they might go away before we can actually use them. --- backfiller/backfiller/main.py | 5 +++++ common/common/segments.py | 11 ++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/backfiller/backfiller/main.py b/backfiller/backfiller/main.py index 701a34c..0e3d20b 100644 --- a/backfiller/backfiller/main.py +++ b/backfiller/backfiller/main.py @@ -330,6 +330,11 @@ class BackfillerWorker(object): except ValueError as e: self.logger.warning('File {} invaid: {}'.format(path, e)) continue + + # Ignore temp segments as they may go away by the time we fetch them. + if segment.type == "temp": + self.logger.debug('Skipping {} as it is a temp segment'.format(path)) + continue # to avoid getting in the downloader's way ignore segments # less than recent_cutoff old diff --git a/common/common/segments.py b/common/common/segments.py index ddb3d75..3fe5b15 100644 --- a/common/common/segments.py +++ b/common/common/segments.py @@ -28,7 +28,7 @@ def unpadded_b64_decode(s): class SegmentInfo( namedtuple('SegmentInfoBase', [ - 'path', 'stream', 'variant', 'start', 'duration', 'is_partial', 'hash' + 'path', 'stream', 'variant', 'start', 'duration', 'type', 'hash' ]) ): """Info parsed from a segment path, including original path. @@ -36,6 +36,9 @@ class SegmentInfo( @property def end(self): return self.start + self.duration + @property + def is_partial(self): + return self.type != "full" def parse_segment_path(path): @@ -64,7 +67,7 @@ def parse_segment_path(path): variant = variant, start = datetime.datetime.strptime("{}:{}".format(hour, time), "%Y-%m-%dT%H:%M:%S.%f"), duration = datetime.timedelta(seconds=float(duration)), - is_partial = type != "full", + type = type, hash = hash, ) except ValueError as e: @@ -219,7 +222,9 @@ def best_segments_by_start(hour): # but is easy enough to do, so we might as well. parsed = (parse_segment_path(os.path.join(hour, name)) for name in segment_paths) for start_time, segments in itertools.groupby(parsed, key=lambda segment: segment.start): - segments = list(segments) + # ignore temp segments as they might go away by the time we want to use them + segments = [segment for segment in segments if segment.type != "temp"] + full_segments = [segment for segment in segments if not segment.is_partial] if full_segments: if len(full_segments) != 1: