From 2a1f7207a8c0d05a7b5a06c081c3ca99c9270624 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Tue, 21 Nov 2023 17:41:26 +1100 Subject: [PATCH] Allow a fudge factor when checking for gaps/overlaps between segments Sometimes in the wild (particularly on youtube) segments may not be timed perfectly, so allow up to 10ms of gap or overlap to be counted as "equal" for purposes of finding the best segment. --- common/common/segments.py | 10 ++++++++-- segment_coverage/segment_coverage/main.py | 10 +++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/common/common/segments.py b/common/common/segments.py index 2a6d35a..e62c1d7 100644 --- a/common/common/segments.py +++ b/common/common/segments.py @@ -151,6 +151,11 @@ def get_best_segments(hours_path, start, end, allow_holes=True): # python's datetime types represent these as integer microseconds internally. So the parsing # to these types is exact, and all operations on them are exact, so all operations are exact. + # ...however in the wild we sometimes see timestamps or durations that differ by a few ms. + # So we allow some fudge factors. + ALLOWABLE_OVERLAP = 0.01 # 10ms + ALLOWABLE_GAP = 0.01 # 10ms + result = [] for hour in hour_paths_for_range(hours_path, start, end): @@ -182,13 +187,14 @@ def get_best_segments(hours_path, start, end, allow_holes=True): else: # normal case: check against previous segment end time prev_end = result[-1].end - if segment.start < prev_end: + gap = (segment.start - prev_end).total_seconds() + if gap < -ALLOWABLE_OVERLAP: # Overlap! This shouldn't happen, though it might be possible due to weirdness # if the stream drops then starts again quickly. We simply ignore the overlapping # segment and let the algorithm continue. logging.info("Overlapping segments: {} overlaps end of {}".format(segment, result[-1])) continue - if result[-1].is_partial or prev_end < segment.start: + if result[-1].is_partial or gap > ALLOWABLE_GAP: # there's a gap between prev end and this start, so add a None if not allow_holes: raise ContainsHoles diff --git a/segment_coverage/segment_coverage/main.py b/segment_coverage/segment_coverage/main.py index b99589c..f8a46f7 100644 --- a/segment_coverage/segment_coverage/main.py +++ b/segment_coverage/segment_coverage/main.py @@ -254,6 +254,9 @@ class CoverageChecker(object): """Loop over available hours for each quality, checking segment coverage.""" self.logger.info('Starting') + ALLOWABLE_OVERLAP = 0.01 # 10ms + ALLOWABLE_GAP = 0.01 # 10ms + while not self.stopping.is_set(): for quality in self.qualities: @@ -374,7 +377,8 @@ class CoverageChecker(object): previous_editable = best_segment else: previous_end = previous.start + previous.duration - if segment.start < previous_end: + gap = (segment.start - previous_end).total_seconds() + if gap < -ALLOWABLE_OVERLAP: if segment.type == 'full': full_overlaps += 1 full_overlap_duration += previous_end - segment.start @@ -389,11 +393,11 @@ class CoverageChecker(object): coverage += segment.duration editable_coverage += segment.duration - if segment.start > previous_end: + if gap > ALLOWABLE_GAP: holes.append((previous_end, segment.start)) previous_editable_end = previous_editable.start + previous_editable.duration - if segment.start > previous_editable_end: + if (segment.start - previous_editable_end).total_seconds() > ALLOWABLE_GAP: editable_holes.append((previous_editable_end, segment.start)) previous_editable = best_segment