From 3f05eac5ea061d9ace4347abade6d4d8aae581aa Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Sat, 15 Jun 2019 14:20:55 -0700 Subject: [PATCH] Remove all usage of dateutil except when absolutely required dateutil attempts some fuzzy matching of datetimes, uses the system's local timezone by default (if timestamp doesn't have a timezone) and returns offset-aware datetime objects. We don't want any of these things in most circumstances - we would rather error out on a bad timestamp than make a guess as to its meaning, and we always want to deal strictly in UTC. The only exception to this is when parsing incoming HLS playlists. --- backfiller/backfiller/main.py | 5 ++--- backfiller/setup.py | 1 - common/common/__init__.py | 11 +++++++++++ restreamer/restreamer/main.py | 17 ++++++++--------- restreamer/setup.py | 1 - thrimshim/setup.py | 1 - 6 files changed, 21 insertions(+), 15 deletions(-) diff --git a/backfiller/backfiller/main.py b/backfiller/backfiller/main.py index 991571a..f21485b 100644 --- a/backfiller/backfiller/main.py +++ b/backfiller/backfiller/main.py @@ -11,7 +11,6 @@ import urlparse import uuid import argh -import dateutil.parser import gevent.backdoor import prometheus_client as prom import requests @@ -365,7 +364,7 @@ class BackfillerWorker(object): @argh.arg('--metrics-port', help='Port for Prometheus stats. Default is 8002.') @argh.arg('--static-nodes', help='Nodes to always backfill from. Comma seperated if multiple. By default empty.') @argh.arg('--backdoor-port', help='Port for gevent.backdoor access. By default disabled.') -@argh.arg('--start', help='If a datetime only backfill hours after that datetime. If a number, bacfill hours more recent than that number of hours ago. If None (default), all hours are backfilled.') +@argh.arg('--start', help='If a datetime only backfill hours after that datetime. If a number, bacfill hours more recent than that number of hours ago. If None (default), all hours are backfilled. Datetime must be given in ISO format and as UTC.') @argh.arg('--run-once', help='If True, backfill only once. By default False.') @argh.arg('--node-file', help="Name of file listing nodes to backfill from. One node per line in the form NAME URI with whitespace only lines or lines starting with '#' ignored. If None (default) do not get nodes from a file.") @argh.arg('--node-database', help='Address of database node to fetch a list of nodes from. If None (default) do not get nodes from database.') @@ -385,7 +384,7 @@ def main(streams, base_dir='.', variants='source', metrics_port=8002, start = float(start) logging.info('Backfilling last {} hours'.format(start)) except ValueError: - start = dateutil.parser.parse(start) + start = common.parse_timestamp(start) logging.info('Backfilling since {}'.format(start)) common.PromLogCountsHandler.install() diff --git a/backfiller/setup.py b/backfiller/setup.py index d8fceba..1b4ee32 100644 --- a/backfiller/setup.py +++ b/backfiller/setup.py @@ -7,7 +7,6 @@ setup( install_requires = [ "argh", "gevent", - "python-dateutil", "requests", "wubloader-common", ], diff --git a/common/common/__init__.py b/common/common/__init__.py index 940ab13..2535b0b 100644 --- a/common/common/__init__.py +++ b/common/common/__init__.py @@ -11,6 +11,9 @@ from .segments import get_best_segments, cut_segments, parse_segment_path, Segme from .stats import timed, PromLogCountsHandler, install_stacksampler +HOUR_FMT = '%Y-%m-%dT%H' + + def dt_to_bustime(start, dt): """Convert a datetime to bus time. Bus time is seconds since the given start point.""" return (dt - start).total_seconds() @@ -21,6 +24,14 @@ def bustime_to_dt(start, bustime): return start + datetime.timedelta(seconds=bustime) +def parse_timestamp(timestamp): + """Common place to define how we parse our 'standard' timestamps we use throughout. + This is simply iso format, and can be generated by datetime.isoformat(). + All timestamps are in UTC and the returned datetime is NOT offset-aware (in python + parlance, it's a "native" datetime).""" + return datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%f") + + def format_bustime(bustime, round="millisecond"): """Convert bustime to a human-readable string (-)HH:MM:SS.fff, with the ending cut off depending on the value of round: diff --git a/restreamer/restreamer/main.py b/restreamer/restreamer/main.py index ff1e42f..5d73a86 100644 --- a/restreamer/restreamer/main.py +++ b/restreamer/restreamer/main.py @@ -7,14 +7,13 @@ import logging import os import signal -import dateutil.parser import gevent import gevent.backdoor import prometheus_client as prom from flask import Flask, url_for, request, abort, Response from gevent.pywsgi import WSGIServer -from common import get_best_segments, cut_segments, PromLogCountsHandler, install_stacksampler +from common import get_best_segments, cut_segments, PromLogCountsHandler, install_stacksampler, parse_timestamp import generate_hls from stats import stats, after_request @@ -149,7 +148,7 @@ def time_range_for_variant(stream, variant): abort(404) first, last = min(hours), max(hours) # note last hour parses to _start_ of that hour, so we add 1h to go to end of that hour - return dateutil.parser.parse(first), dateutil.parser.parse(last) + datetime.timedelta(hours=1) + return parse_timestamp(first), parse_timestamp(last) + datetime.timedelta(hours=1) @app.route('/playlist/.m3u8') @@ -161,8 +160,8 @@ def generate_master_playlist(stream): start, end: The time to begin and end the stream at. See generate_media_playlist for details. """ - start = dateutil.parser.parse(request.args['start']) if 'start' in request.args else None - end = dateutil.parser.parse(request.args['end']) if 'end' in request.args else None + start = parse_timestamp(request.args['start']) if 'start' in request.args else None + end = parse_timestamp(request.args['end']) if 'end' in request.args else None variants = listdir(os.path.join(app.static_folder, stream)) playlists = {} @@ -200,8 +199,8 @@ def generate_media_playlist(stream, variant): if not os.path.isdir(hours_path): abort(404) - start = dateutil.parser.parse(request.args['start']) if 'start' in request.args else None - end = dateutil.parser.parse(request.args['end']) if 'end' in request.args else None + start = parse_timestamp(request.args['start']) if 'start' in request.args else None + end = parse_timestamp(request.args['end']) if 'end' in request.args else None if start is None or end is None: # If start or end are not given, use the earliest/latest time available first, last = time_range_for_variant(stream, variant) @@ -235,8 +234,8 @@ def cut(stream, variant): Set to true by passing "true" (case insensitive). Even if holes are allowed, a 406 may result if the resulting video would be empty. """ - start = dateutil.parser.parse(request.args['start']) - end = dateutil.parser.parse(request.args['end']) + start = parse_timestamp(request.args['start']) + end = parse_timestamp(request.args['end']) if end <= start: return "End must be after start", 400 diff --git a/restreamer/setup.py b/restreamer/setup.py index 9770ac9..bd1be83 100644 --- a/restreamer/setup.py +++ b/restreamer/setup.py @@ -6,7 +6,6 @@ setup( packages = find_packages(), install_requires = [ "argh", - "python-dateutil", "flask", "gevent", "monotonic", diff --git a/thrimshim/setup.py b/thrimshim/setup.py index 7bd0fa1..947530a 100644 --- a/thrimshim/setup.py +++ b/thrimshim/setup.py @@ -10,7 +10,6 @@ setup( "gevent", "psycogreen", "psycopg2", - "python-dateutil", "wubloader-common", ], )