From bb05e37ae4bbc429ccd69ce5692f2f5a18a339de Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Sun, 10 Nov 2019 22:34:31 -0800 Subject: [PATCH] segments: Use longest segment in bytes if duration is the same We occasionally see corrupted segments that are slightly shorter in size but report the same metadata as the full segments. Prefer the largest version as it's likely the least corrupt. --- common/common/segments.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/common/common/segments.py b/common/common/segments.py index fcd04fe..2b7e0df 100644 --- a/common/common/segments.py +++ b/common/common/segments.py @@ -243,11 +243,12 @@ def best_segments_by_start(hour): start_time, ", ".join(map(str, segments)) )) # We've observed some cases where the same segment (with the same hash) will be reported - # with different durations (generally at stream end). Prefer the longer duration, + # with different durations (generally at stream end). Prefer the longer duration (followed by longest size), # as this will ensure that if hashes are different we get the most data, and if they # are the same it should keep holes to a minimum. - # If same duration, we have to pick one, so pick highest-sorting hash just so we're consistent. - full_segments = [max(full_segments, key=lambda segment: (segment.duration, segment.hash))] + # If same duration and size, we have to pick one, so pick highest-sorting hash just so we're consistent. + sizes = {segment: os.stat(segment.path).st_size for segment in segments} + full_segments = [max(full_segments, key=lambda segment: (segment.duration, sizes[segment], segment.hash))] yield full_segments[0] continue # no full segments, fall back to measuring partials.