|
|
|
|
|
|
|
"""A place for common utilities between wubloader components"""
|
|
|
|
|
|
|
|
|
|
|
|
import datetime
|
|
|
|
import errno
|
|
|
|
import os
|
Fix some bugs and linter errors introduced by backfiller
I ran `pyflakes` on the repo and found these bugs:
```
./common/common.py:289: undefined name 'random'
./downloader/downloader/main.py:7: 'random' imported but unused
./backfiller/backfiller/main.py:150: undefined name 'variant'
./backfiller/backfiller/main.py:158: undefined name 'timedelta'
./backfiller/backfiller/main.py:171: undefined name 'sort'
./backfiller/backfiller/main.py:173: undefined name 'sort'
```
(ok, the "imported but unused" one isn't a bug, but the rest are)
This fixes those, as well as a further issue I saw with sorting of hours.
Iterables are not sortable. As an obvious example, what if your iterable was infinite?
As a result, any attempt to sort an iterable that is not already a friendly type like a list
or tuple will result in an error. We avoid this by coercing to list, fully realising the iterable
and putting it into a form that python will let us sort. It also avoids the nasty side-effect
of mutating the list that gets passed into us, which the caller may not expect. Consider this example:
```
>>> my_hours = ["one", "two", "three"]
>>> print my_hours
["one", "two", "three"]
>>> backfill_node(base_dir, node, stream, variants, hours=my_hours, order='forward')
>>> print my_hours
["one", "three", "two"]
```
Also, one of the linter errors was non-trivial to fix - we were trying to get a list of hours
(which is an api call for a particular variant), but at a time when we weren't dealing with a single
variant. My solution was to get a list of hours for ALL variants, and take the union.
6 years ago
|
|
|
import random
|
|
|
|
|
|
|
|
from .segments import get_best_segments, cut_segments, parse_segment_path, SegmentInfo
|
|
|
|
from .stats import timed, PromLogCountsHandler, install_stacksampler
|
|
|
|
|
|
|
|
|
|
|
|
def dt_to_bustime(start, dt):
|
|
|
|
"""Convert a datetime to bus time. Bus time is seconds since the given start point."""
|
|
|
|
return (dt - start).total_seconds()
|
|
|
|
|
|
|
|
|
|
|
|
def bustime_to_dt(start, bustime):
|
|
|
|
"""Convert from bus time to a datetime"""
|
|
|
|
return start + datetime.timedelta(seconds=bustime)
|
|
|
|
|
|
|
|
|
|
|
|
def parse_bustime(bustime):
|
|
|
|
"""Convert from bus time human-readable string [-]HH:MM[:SS[.fff]]
|
|
|
|
to float seconds since bustime 00:00. Inverse of format_bustime(),
|
|
|
|
see it for detail."""
|
|
|
|
if bustime.startswith('-'):
|
|
|
|
# parse without the -, then negate it
|
|
|
|
return -parse_bustime(bustime[:1])
|
|
|
|
|
|
|
|
parts = bustime.strip().split(':')
|
|
|
|
if len(parts) == 2:
|
|
|
|
hours, mins = parts
|
|
|
|
secs = 0
|
|
|
|
elif len(parts) == 3:
|
|
|
|
hours, mins, secs = parts
|
|
|
|
else:
|
|
|
|
raise ValueError("Invalid bustime: must be HH:MM[:SS]")
|
|
|
|
hours = int(hours)
|
|
|
|
mins = int(mins)
|
|
|
|
secs = float(secs)
|
|
|
|
return 3600 * hours + 60 * mins + secs
|
|
|
|
|
|
|
|
|
|
|
|
def format_bustime(bustime, round="millisecond"):
|
|
|
|
"""Convert bustime to a human-readable string (-)HH:MM:SS.fff, with the
|
|
|
|
ending cut off depending on the value of round:
|
|
|
|
"millisecond": (default) Round to the nearest millisecond.
|
|
|
|
"second": Round down to the current second.
|
|
|
|
"minute": Round down to the current minute.
|
|
|
|
Examples:
|
|
|
|
00:00:00.000
|
|
|
|
01:23:00
|
|
|
|
110:50
|
|
|
|
159:59:59.999
|
|
|
|
-10:30:01.100
|
|
|
|
Negative times are formatted as time-until-start, preceeded by a minus
|
|
|
|
sign.
|
|
|
|
eg. "-1:20:00" indicates the run begins in 80 minutes.
|
|
|
|
"""
|
|
|
|
sign = ''
|
|
|
|
if bustime < 0:
|
|
|
|
sign = '-'
|
|
|
|
bustime = -bustime
|
|
|
|
total_mins, secs = divmod(bustime, 60)
|
|
|
|
hours, mins = divmod(total_mins, 60)
|
|
|
|
parts = [
|
|
|
|
"{:02d}".format(int(hours)),
|
|
|
|
"{:02d}".format(int(mins)),
|
|
|
|
]
|
|
|
|
if round == "minute":
|
|
|
|
pass
|
|
|
|
elif round == "second":
|
|
|
|
parts.append("{:02d}".format(int(secs)))
|
|
|
|
elif round == "millisecond":
|
|
|
|
parts.append("{:06.3f}".format(secs))
|
|
|
|
else:
|
|
|
|
raise ValueError("Bad rounding value: {!r}".format(round))
|
|
|
|
return sign + ":".join(parts)
|
|
|
|
|
|
|
|
|
|
|
|
def rename(old, new):
|
|
|
|
"""Atomic rename that succeeds if the target already exists, since we're naming everything
|
|
|
|
by hash anyway, so if the filepath already exists the file itself is already there.
|
|
|
|
In this case, we delete the source file.
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
os.rename(old, new)
|
|
|
|
except OSError as e:
|
|
|
|
if e.errno != errno.EEXIST:
|
|
|
|
raise
|
|
|
|
os.remove(old)
|
|
|
|
|
|
|
|
|
|
|
|
def ensure_directory(path):
|
|
|
|
"""Create directory that contains path, as well as any parent directories,
|
|
|
|
if they don't already exist."""
|
|
|
|
dir_path = os.path.dirname(path)
|
|
|
|
if os.path.exists(dir_path):
|
|
|
|
return
|
|
|
|
ensure_directory(dir_path)
|
|
|
|
try:
|
|
|
|
os.mkdir(dir_path)
|
|
|
|
except OSError as e:
|
|
|
|
# Ignore if EEXISTS. This is needed to avoid a race if two getters run at once.
|
|
|
|
if e.errno != errno.EEXIST:
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
|
|
def jitter(interval):
|
|
|
|
"""Apply some 'jitter' to an interval. This is a random +/- 10% change in order to
|
|
|
|
smooth out patterns and prevent everything from retrying at the same time.
|
|
|
|
"""
|
|
|
|
return interval * (0.9 + 0.2 * random.random())
|
|
|
|
|
|
|
|
|
|
|
|
def encode_strings(o):
|
|
|
|
"""Recurvisely handles unicode in json output."""
|
|
|
|
if isinstance(o, list):
|
|
|
|
return [encode_strings(x) for x in o]
|
|
|
|
if isinstance(o, dict):
|
|
|
|
return {k.encode('utf-8'): encode_strings(v) for k, v in o.items()}
|
|
|
|
if isinstance(o, unicode):
|
|
|
|
return o.encode('utf-8')
|
|
|
|
return o
|