From 15c357509f315122543ae0b1399ac45338f6aa01 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Thu, 12 Nov 2020 03:29:45 +1100 Subject: [PATCH] segment_coverage: Fix a problem where metrics would fail Because the checking process is entirely CPU-bound, it does not give any other greenlets a chance to run while it is processing. This prevents us from responding to metrics queries, and prometheus then times out. By stopping to handle all other traffic in between each hour processed, we ensure metrics remain responsive while processing. --- segment_coverage/segment_coverage/main.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/segment_coverage/segment_coverage/main.py b/segment_coverage/segment_coverage/main.py index d0fb297..23d268f 100644 --- a/segment_coverage/segment_coverage/main.py +++ b/segment_coverage/segment_coverage/main.py @@ -261,6 +261,11 @@ class CoverageChecker(object): all_hour_holes = {} all_hour_partials = {} for hour in hours: + # Let other things run, to avoid starving them with CPU-heavy workload + # (in particular the metrics server can have issues responding in time + # otherwise). + gevent.idle() + if self.stopping.is_set(): break self.logger.info('Checking {}/{}'.format(quality, hour))