|
|
@ -188,9 +188,8 @@ class BackfillerManager(object):
|
|
|
|
self.start = start
|
|
|
|
self.start = start
|
|
|
|
self.run_once = run_once
|
|
|
|
self.run_once = run_once
|
|
|
|
self.node_file = node_file
|
|
|
|
self.node_file = node_file
|
|
|
|
self.node_database = node_database
|
|
|
|
self.db_manager = None if node_database is None else database.DBManager(dsn=node_database)
|
|
|
|
if self.node_database is not None:
|
|
|
|
self.connection = None
|
|
|
|
self.db_manager = database.DBManager(dsn=self.node_database)
|
|
|
|
|
|
|
|
self.localhost = localhost
|
|
|
|
self.localhost = localhost
|
|
|
|
self.download_concurrency = download_concurrency
|
|
|
|
self.download_concurrency = download_concurrency
|
|
|
|
self.recent_cutoff = recent_cutoff
|
|
|
|
self.recent_cutoff = recent_cutoff
|
|
|
@ -227,8 +226,6 @@ class BackfillerManager(object):
|
|
|
|
get_nodes are stopped. If self.run_once, only call nodes once. Calling
|
|
|
|
get_nodes are stopped. If self.run_once, only call nodes once. Calling
|
|
|
|
stop will exit the loop."""
|
|
|
|
stop will exit the loop."""
|
|
|
|
self.logger.info('Starting')
|
|
|
|
self.logger.info('Starting')
|
|
|
|
if self.node_database is not None:
|
|
|
|
|
|
|
|
self.connection = self.db_manager.get_conn()
|
|
|
|
|
|
|
|
failures = 0
|
|
|
|
failures = 0
|
|
|
|
|
|
|
|
|
|
|
|
while not self.stopping.is_set():
|
|
|
|
while not self.stopping.is_set():
|
|
|
@ -237,17 +234,12 @@ class BackfillerManager(object):
|
|
|
|
except Exception:
|
|
|
|
except Exception:
|
|
|
|
# To ensure a fresh slate and clear any DB-related errors, get a new conn on error.
|
|
|
|
# To ensure a fresh slate and clear any DB-related errors, get a new conn on error.
|
|
|
|
# This is heavy-handed but simple and effective.
|
|
|
|
# This is heavy-handed but simple and effective.
|
|
|
|
if self.node_database is not None:
|
|
|
|
self.connection = None
|
|
|
|
self.connection = self.db_manager.get_conn()
|
|
|
|
|
|
|
|
if failures < MAX_BACKOFF:
|
|
|
|
if failures < MAX_BACKOFF:
|
|
|
|
failures += 1
|
|
|
|
failures += 1
|
|
|
|
delay = common.jitter(TIMEOUT * 2**failures)
|
|
|
|
delay = common.jitter(TIMEOUT * 2**failures)
|
|
|
|
self.logger.exception('Getting nodes failed. Retrying in {:.0f} s'.format(delay))
|
|
|
|
self.logger.exception('Getting nodes failed. Retrying in {:.0f} s'.format(delay))
|
|
|
|
try:
|
|
|
|
node_list_errors.labels(filename=self.node_file).inc()
|
|
|
|
host = [s.split('=')[-1] for s in self.connection.dsn.split() if 'host' in s][0]
|
|
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
|
|
host = ''
|
|
|
|
|
|
|
|
node_list_errors.labels(filename=self.node_file, database=host).inc()
|
|
|
|
|
|
|
|
self.stopping.wait(delay)
|
|
|
|
self.stopping.wait(delay)
|
|
|
|
continue
|
|
|
|
continue
|
|
|
|
exisiting_nodes = set(self.workers.keys())
|
|
|
|
exisiting_nodes = set(self.workers.keys())
|
|
|
@ -298,7 +290,9 @@ class BackfillerManager(object):
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
nodes[substrs[0]] = substrs[1]
|
|
|
|
nodes[substrs[0]] = substrs[1]
|
|
|
|
|
|
|
|
|
|
|
|
if self.node_database is not None:
|
|
|
|
if self.db_manager is not None:
|
|
|
|
|
|
|
|
if self.connection is None:
|
|
|
|
|
|
|
|
self.connection = self.db_manager.get_conn()
|
|
|
|
host = [s.split('=')[-1] for s in self.connection.dsn.split() if 'host' in s][0]
|
|
|
|
host = [s.split('=')[-1] for s in self.connection.dsn.split() if 'host' in s][0]
|
|
|
|
self.logger.info('Fetching list of nodes from {}'.format(host))
|
|
|
|
self.logger.info('Fetching list of nodes from {}'.format(host))
|
|
|
|
results = database.query(self.connection, """
|
|
|
|
results = database.query(self.connection, """
|
|
|
|