diff --git a/backfiller/backfiller/main.py b/backfiller/backfiller/main.py index 365f2ff..7ac3488 100644 --- a/backfiller/backfiller/main.py +++ b/backfiller/backfiller/main.py @@ -319,7 +319,7 @@ class BackfillerManager(object): if failures < MAX_BACKOFF: failures += 1 delay = common.jitter(TIMEOUT * 2**failures) - self.logger.exception('Getting nodes failed. Retrying in {:.0f} s'.format(delay)) + self.logger.exception('Getting nodes failed. Retrying in {:.0f} s'.format(delay), exc_info=True) node_list_errors.inc() self.stopping.wait(delay) continue diff --git a/segment_coverage/Dockerfile b/segment_coverage/Dockerfile index 25f07d2..d92854f 100644 --- a/segment_coverage/Dockerfile +++ b/segment_coverage/Dockerfile @@ -13,7 +13,7 @@ RUN pip install /tmp/common && rm -r /tmp/common # Install actual application # freetype-dev and libpng-dev are required for matplotlib -RUN apk add freetype-dev libpng-dev build-base libstdc++ +RUN apk add freetype-dev libpng-dev build-base libstdc++ postgresql-dev postgresql-libs # need to install these manually RUN ln -s /usr/include/locale.h /usr/include/xlocale.h \ && pip install numpy \ diff --git a/segment_coverage/segment_coverage/main.py b/segment_coverage/segment_coverage/main.py index 11ee9b3..d2eb676 100644 --- a/segment_coverage/segment_coverage/main.py +++ b/segment_coverage/segment_coverage/main.py @@ -15,6 +15,7 @@ import prometheus_client as prom import common from common import dateutil +from common import database segment_count_gauge = prom.Gauge( @@ -73,7 +74,8 @@ class CoverageChecker(object): CHECK_INTERVAL = 60 #seconds between checking coverage - def __init__(self, channel, qualities, base_dir, first_hour, last_hour): + def __init__(self, channel, qualities, base_dir, first_hour, last_hour, + make_page, connection_string): """Constructor for CoverageChecker. Creates a checker for a given channel with specified qualities.""" @@ -83,6 +85,8 @@ class CoverageChecker(object): self.qualities = qualities self.first_hour = first_hour self.last_hour = last_hour + self.make_page = make_page + self.db_manager = None if connection_string is None else database.DBManager(dsn=connection_string) self.stopping = gevent.event.Event() self.logger = logging.getLogger('CoverageChecker({})'.format(channel)) @@ -176,6 +180,63 @@ class CoverageChecker(object): os.rename(temp_path, final_path) self.logger.info('Coverage map for {} created'.format(quality)) + def create_coverage_page(self, quality): + nodes = {} + try: + connection = self.db_manager.get_conn() + host = [s.split('=')[-1] for s in connection.dsn.split() if 'host' in s][0] + self.logger.info('Fetching list of nodes from {}'.format(host)) + results = database.query(connection, """ + SELECT name, url + FROM nodes + WHERE backfill_from""") + for row in results: + nodes[row.name] = row.url + except: + self.logger.exception('Getting nodes failed.', exc_info=True) + return + + self.logger.info('Nodes fetched: {}'.format(nodes.keys())) + + html = """ + + + + + {0} {1} Segment Coverage Maps + + + +

{0} {1}

""".format(self.channel, quality) + + for node in sorted(nodes.keys()): + html += """

{}

+ {} +""".format(node, nodes[node], self.channel, quality, node) + + html += """ +""" + + path_prefix = os.path.join(self.base_dir, 'coverage-maps', '{}_{}'.format(self.channel, quality)) + temp_path = '{}_{}.html'.format(path_prefix, uuid.uuid4()) + final_path = '{}_coverage.html'.format(path_prefix) + common.ensure_directory(temp_path) + with open(temp_path, 'w') as f: + f.write(html) + os.rename(temp_path, final_path) + self.logger.info('Coverage page for {} created'.format(quality)) + def run(self): """Loop over available hours for each quality, checking segment coverage.""" @@ -422,6 +483,8 @@ class CoverageChecker(object): self.logger.info('{}/{} is empty'.format(quality, hour)) self.create_coverage_map(quality, all_hour_holes, all_hour_partials) + if self.make_page: + self.create_coverage_page(quality) self.stopping.wait(common.jitter(self.CHECK_INTERVAL)) @@ -431,10 +494,13 @@ class CoverageChecker(object): @argh.arg('--qualities', help="Qualities of each channel to checked. Comma seperated if multiple. Default is 'source'.") @argh.arg('--first-hour', help='First hour to compute coverage for. Default is earliest available hour.') @argh.arg('--last-hour', help='Last hour to compute coverage for. Default is lastest available hour.') +@argh.arg('--make-page', help='Make a html page displaying coverage maps for all nodes in database') +@argh.arg('--connection-string', help='Postgres connection string, which is either a space-separated list of key=value pairs, or a URI like: postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE') @argh.arg('--metrics-port', help='Port for Prometheus stats. Default is 8006.') @argh.arg('--backdoor-port', help='Port for gevent.backdoor access. By default disabled.') def main(channels, base_dir='.', qualities='source', first_hour=None, - last_hour=None, metrics_port=8006, backdoor_port=0): + last_hour=None, make_page=False, connection_string=None, + metrics_port=8006, backdoor_port=0): """Segment coverage service""" qualities = qualities.split(',') if qualities else [] @@ -452,7 +518,8 @@ def main(channels, base_dir='.', qualities='source', first_hour=None, workers = [] for channel in channels: logging.info('Starting coverage checks {} with {} as qualities in {}'.format(channel, ', '.join(qualities), base_dir)) - manager = CoverageChecker(channel, qualities, base_dir, first_hour, last_hour) + manager = CoverageChecker(channel, qualities, base_dir, first_hour, + last_hour, make_page, connection_string) managers.append(manager) workers.append(gevent.spawn(manager.run)) @@ -476,7 +543,7 @@ def main(channels, base_dir='.', qualities='source', first_hour=None, # 2. Wait (with timeout) until they've stopped gevent.wait(workers) # 3. Check if any of them failed. If they did, report it. If mulitple - # failed, we report one arbitrarily. + # failed, we report one arbitrarily. for worker in workers: worker.get() diff --git a/segment_coverage/setup.py b/segment_coverage/setup.py index 764a9bb..8f321b6 100644 --- a/segment_coverage/setup.py +++ b/segment_coverage/setup.py @@ -9,6 +9,8 @@ setup( 'gevent', 'matplotlib', 'numpy', + 'psycogreen', + 'psycopg2', 'prometheus-client', 'python-dateutil', 'wubloader-common',