From 3bbe1ed32d353c917f0db6cc9b91f473cf9de5d9 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Wed, 26 Dec 2018 20:07:07 -0800 Subject: [PATCH] Prefer longer duration on multiple segments --- common/common.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/common/common.py b/common/common.py index 83455ed..17f9b6b 100644 --- a/common/common.py +++ b/common/common.py @@ -240,8 +240,12 @@ def best_segments_by_start(hour): logging.warning("Multiple versions of full segment at start_time {}: {}".format( start_time, ", ".join(map(str, segments)) )) - # we have to pick one, so might as well make it consistent by sorting by path - full_segments.sort(key=lambda segment: segment.path) + # We've observed some cases where the same segment (with the same hash) will be reported + # with different durations (generally at stream end). Prefer the longer duration, + # as this will ensure that if hashes are different we get the most data, and if they + # are the same it should keep holes to a minimum. + # If same duration, we have to pick one, so pick highest-sorting hash just so we're consistent. + full_segments = [max(full_segments, key=lambda segment: (segment.duration, segment.hash))] yield full_segments[0] continue # no full segments, fall back to measuring partials.