From f20a77975629b78675fdfea40b247d7449d0bb90 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Tue, 1 Jan 2019 14:27:08 -0800 Subject: [PATCH] Fix some bugs and linter errors introduced by backfiller I ran `pyflakes` on the repo and found these bugs: ``` ./common/common.py:289: undefined name 'random' ./downloader/downloader/main.py:7: 'random' imported but unused ./backfiller/backfiller/main.py:150: undefined name 'variant' ./backfiller/backfiller/main.py:158: undefined name 'timedelta' ./backfiller/backfiller/main.py:171: undefined name 'sort' ./backfiller/backfiller/main.py:173: undefined name 'sort' ``` (ok, the "imported but unused" one isn't a bug, but the rest are) This fixes those, as well as a further issue I saw with sorting of hours. Iterables are not sortable. As an obvious example, what if your iterable was infinite? As a result, any attempt to sort an iterable that is not already a friendly type like a list or tuple will result in an error. We avoid this by coercing to list, fully realising the iterable and putting it into a form that python will let us sort. It also avoids the nasty side-effect of mutating the list that gets passed into us, which the caller may not expect. Consider this example: ``` >>> my_hours = ["one", "two", "three"] >>> print my_hours ["one", "two", "three"] >>> backfill_node(base_dir, node, stream, variants, hours=my_hours, order='forward') >>> print my_hours ["one", "three", "two"] ``` Also, one of the linter errors was non-trivial to fix - we were trying to get a list of hours (which is an api call for a particular variant), but at a time when we weren't dealing with a single variant. My solution was to get a list of hours for ALL variants, and take the union. --- backfiller/backfiller/main.py | 14 +++++++++----- common/common.py | 1 + downloader/downloader/main.py | 1 - 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/backfiller/backfiller/main.py b/backfiller/backfiller/main.py index dadbef5..f4ecc85 100644 --- a/backfiller/backfiller/main.py +++ b/backfiller/backfiller/main.py @@ -147,15 +147,19 @@ def backfill_node(base_dir, node, stream, variants, hours=None, start=None, seconds to prioritise letting the downloader grab these segments.""" if hours is None: - hours = list_remote_hours(node, stream, variant) + # gather all available hours from all variants and take the union + hours = set().union(*[ + list_remote_hours(node, stream, variant) + for variant in variants + ]) elif is_iterable(hours): - pass # hours already in desired format + hours = list(hours) # coerce to list so it can be sorted else: n_hours = hours if n_hours < 1: raise ValueError('Number of hours has to be 1 or greater') now = datetime.datetime.utcnow() - hours = [(now - i * timedelta(hours=1)).strftime(HOUR_FMT) for i in range(n_hours)] + hours = [(now - i * datetime.timedelta(hours=1)).strftime(HOUR_FMT) for i in range(n_hours)] if start is not None: hours = [hour for hour in hours if hour >= start] @@ -168,9 +172,9 @@ def backfill_node(base_dir, node, stream, variants, hours=None, start=None, if order == 'random': random.shuffle(hours) elif order == 'forward': - sort(hours) + hours.sort() elif order == 'reverse': - sort(hours, reverse=True) + hours.sort(reverse=True) for variant in variants: diff --git a/common/common.py b/common/common.py index acb6b08..ba8f48d 100644 --- a/common/common.py +++ b/common/common.py @@ -8,6 +8,7 @@ import errno import itertools import logging import os +import random import sys from collections import namedtuple diff --git a/downloader/downloader/main.py b/downloader/downloader/main.py index d06cc50..0a068a7 100644 --- a/downloader/downloader/main.py +++ b/downloader/downloader/main.py @@ -4,7 +4,6 @@ import errno import hashlib import logging import os -import random import signal import sys import uuid