backfiller: Don't crash on DB errors

We move all connection handling into get_nodes().
This means that problems connecting won't cause further errors
and cause the application to completely crash.

In turn, this means that the behaviour if the database goes down becomes
"continue backfilling from the nodes we know about" instead of crashing.
pull/119/head
Mike Lang 5 years ago
parent fc791e03d4
commit 0e437566aa

@ -189,9 +189,8 @@ class BackfillerManager(object):
self.start = start
self.run_once = run_once
self.node_file = node_file
self.node_database = node_database
if self.node_database is not None:
self.db_manager = database.DBManager(dsn=self.node_database)
self.db_manager = None if node_database is None else database.DBManager(dsn=node_database)
self.connection = None
self.localhost = localhost
self.download_concurrency = download_concurrency
self.recent_cutoff = recent_cutoff
@ -228,8 +227,6 @@ class BackfillerManager(object):
get_nodes are stopped. If self.run_once, only call nodes once. Calling
stop will exit the loop."""
self.logger.info('Starting')
if self.node_database is not None:
self.connection = self.db_manager.get_conn()
failures = 0
while not self.stopping.is_set():
@ -238,17 +235,12 @@ class BackfillerManager(object):
except Exception:
# To ensure a fresh slate and clear any DB-related errors, get a new conn on error.
# This is heavy-handed but simple and effective.
if self.node_database is not None:
self.connection = self.db_manager.get_conn()
self.connection = None
if failures < MAX_BACKOFF:
failures += 1
delay = common.jitter(TIMEOUT * 2**failures)
self.logger.exception('Getting nodes failed. Retrying in {:.0f} s'.format(delay))
try:
host = [s.split('=')[-1] for s in self.connection.dsn.split() if 'host' in s][0]
except Exception:
host = ''
node_list_errors.labels(filename=self.node_file, database=host).inc()
node_list_errors.labels(filename=self.node_file).inc()
self.stopping.wait(delay)
continue
exisiting_nodes = set(self.workers.keys())
@ -299,7 +291,9 @@ class BackfillerManager(object):
else:
nodes[substrs[0]] = substrs[1]
if self.node_database is not None:
if self.db_manager is not None:
if self.connection is None:
self.connection = self.db_manager.get_conn()
host = [s.split('=')[-1] for s in self.connection.dsn.split() if 'host' in s][0]
self.logger.info('Fetching list of nodes from {}'.format(host))
results = database.query(self.connection, """

Loading…
Cancel
Save