diff --git a/backfiller/backfiller/main.py b/backfiller/backfiller/main.py
index 365f2ff..7ac3488 100644
--- a/backfiller/backfiller/main.py
+++ b/backfiller/backfiller/main.py
@@ -319,7 +319,7 @@ class BackfillerManager(object):
if failures < MAX_BACKOFF:
failures += 1
delay = common.jitter(TIMEOUT * 2**failures)
- self.logger.exception('Getting nodes failed. Retrying in {:.0f} s'.format(delay))
+ self.logger.exception('Getting nodes failed. Retrying in {:.0f} s'.format(delay), exc_info=True)
node_list_errors.inc()
self.stopping.wait(delay)
continue
diff --git a/segment_coverage/Dockerfile b/segment_coverage/Dockerfile
index 25f07d2..d92854f 100644
--- a/segment_coverage/Dockerfile
+++ b/segment_coverage/Dockerfile
@@ -13,7 +13,7 @@ RUN pip install /tmp/common && rm -r /tmp/common
# Install actual application
# freetype-dev and libpng-dev are required for matplotlib
-RUN apk add freetype-dev libpng-dev build-base libstdc++
+RUN apk add freetype-dev libpng-dev build-base libstdc++ postgresql-dev postgresql-libs
# need to install these manually
RUN ln -s /usr/include/locale.h /usr/include/xlocale.h \
&& pip install numpy \
diff --git a/segment_coverage/segment_coverage/main.py b/segment_coverage/segment_coverage/main.py
index 11ee9b3..d2eb676 100644
--- a/segment_coverage/segment_coverage/main.py
+++ b/segment_coverage/segment_coverage/main.py
@@ -15,6 +15,7 @@ import prometheus_client as prom
import common
from common import dateutil
+from common import database
segment_count_gauge = prom.Gauge(
@@ -73,7 +74,8 @@ class CoverageChecker(object):
CHECK_INTERVAL = 60 #seconds between checking coverage
- def __init__(self, channel, qualities, base_dir, first_hour, last_hour):
+ def __init__(self, channel, qualities, base_dir, first_hour, last_hour,
+ make_page, connection_string):
"""Constructor for CoverageChecker.
Creates a checker for a given channel with specified qualities."""
@@ -83,6 +85,8 @@ class CoverageChecker(object):
self.qualities = qualities
self.first_hour = first_hour
self.last_hour = last_hour
+ self.make_page = make_page
+ self.db_manager = None if connection_string is None else database.DBManager(dsn=connection_string)
self.stopping = gevent.event.Event()
self.logger = logging.getLogger('CoverageChecker({})'.format(channel))
@@ -176,6 +180,63 @@ class CoverageChecker(object):
os.rename(temp_path, final_path)
self.logger.info('Coverage map for {} created'.format(quality))
+ def create_coverage_page(self, quality):
+ nodes = {}
+ try:
+ connection = self.db_manager.get_conn()
+ host = [s.split('=')[-1] for s in connection.dsn.split() if 'host' in s][0]
+ self.logger.info('Fetching list of nodes from {}'.format(host))
+ results = database.query(connection, """
+ SELECT name, url
+ FROM nodes
+ WHERE backfill_from""")
+ for row in results:
+ nodes[row.name] = row.url
+ except:
+ self.logger.exception('Getting nodes failed.', exc_info=True)
+ return
+
+ self.logger.info('Nodes fetched: {}'.format(nodes.keys()))
+
+ html = """
+
+
+
+
+ {0} {1} Segment Coverage Maps
+
+
+
+ {0} {1}
""".format(self.channel, quality)
+
+ for node in sorted(nodes.keys()):
+ html += """ {}
+
+""".format(node, nodes[node], self.channel, quality, node)
+
+ html += """
+"""
+
+ path_prefix = os.path.join(self.base_dir, 'coverage-maps', '{}_{}'.format(self.channel, quality))
+ temp_path = '{}_{}.html'.format(path_prefix, uuid.uuid4())
+ final_path = '{}_coverage.html'.format(path_prefix)
+ common.ensure_directory(temp_path)
+ with open(temp_path, 'w') as f:
+ f.write(html)
+ os.rename(temp_path, final_path)
+ self.logger.info('Coverage page for {} created'.format(quality))
+
def run(self):
"""Loop over available hours for each quality, checking segment coverage."""
@@ -422,6 +483,8 @@ class CoverageChecker(object):
self.logger.info('{}/{} is empty'.format(quality, hour))
self.create_coverage_map(quality, all_hour_holes, all_hour_partials)
+ if self.make_page:
+ self.create_coverage_page(quality)
self.stopping.wait(common.jitter(self.CHECK_INTERVAL))
@@ -431,10 +494,13 @@ class CoverageChecker(object):
@argh.arg('--qualities', help="Qualities of each channel to checked. Comma seperated if multiple. Default is 'source'.")
@argh.arg('--first-hour', help='First hour to compute coverage for. Default is earliest available hour.')
@argh.arg('--last-hour', help='Last hour to compute coverage for. Default is lastest available hour.')
+@argh.arg('--make-page', help='Make a html page displaying coverage maps for all nodes in database')
+@argh.arg('--connection-string', help='Postgres connection string, which is either a space-separated list of key=value pairs, or a URI like: postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE')
@argh.arg('--metrics-port', help='Port for Prometheus stats. Default is 8006.')
@argh.arg('--backdoor-port', help='Port for gevent.backdoor access. By default disabled.')
def main(channels, base_dir='.', qualities='source', first_hour=None,
- last_hour=None, metrics_port=8006, backdoor_port=0):
+ last_hour=None, make_page=False, connection_string=None,
+ metrics_port=8006, backdoor_port=0):
"""Segment coverage service"""
qualities = qualities.split(',') if qualities else []
@@ -452,7 +518,8 @@ def main(channels, base_dir='.', qualities='source', first_hour=None,
workers = []
for channel in channels:
logging.info('Starting coverage checks {} with {} as qualities in {}'.format(channel, ', '.join(qualities), base_dir))
- manager = CoverageChecker(channel, qualities, base_dir, first_hour, last_hour)
+ manager = CoverageChecker(channel, qualities, base_dir, first_hour,
+ last_hour, make_page, connection_string)
managers.append(manager)
workers.append(gevent.spawn(manager.run))
@@ -476,7 +543,7 @@ def main(channels, base_dir='.', qualities='source', first_hour=None,
# 2. Wait (with timeout) until they've stopped
gevent.wait(workers)
# 3. Check if any of them failed. If they did, report it. If mulitple
- # failed, we report one arbitrarily.
+ # failed, we report one arbitrarily.
for worker in workers:
worker.get()
diff --git a/segment_coverage/setup.py b/segment_coverage/setup.py
index 764a9bb..8f321b6 100644
--- a/segment_coverage/setup.py
+++ b/segment_coverage/setup.py
@@ -9,6 +9,8 @@ setup(
'gevent',
'matplotlib',
'numpy',
+ 'psycogreen',
+ 'psycopg2',
'prometheus-client',
'python-dateutil',
'wubloader-common',