From f8d10dacdf23c7b2653e8122c5d92839e30e41ff Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Sat, 15 Jun 2019 15:17:38 -0700 Subject: [PATCH] Audit and fix all usage of dateutil We wrap direct dateutil calls to handle two distinct cases: * `common.dateutil.parse()`: We want to handle arbitrary timestamps including tz info, then convert them to UTC. This is used in HLS parsing, and for command line input for backfiller * `common.dateutil.parse_utc_only()`: We want to only handle UTC timestamps, but datetime.strptime isn't flexible enough (eg. can't handle missing fractional component). This is used for restreamer request params. --- backfiller/backfiller/main.py | 4 ++-- common/common/dateutil.py | 23 +++++++++++++++++++++++ downloader/downloader/main.py | 4 ++-- restreamer/restreamer/main.py | 22 ++++++++++++---------- 4 files changed, 39 insertions(+), 14 deletions(-) create mode 100644 common/common/dateutil.py diff --git a/backfiller/backfiller/main.py b/backfiller/backfiller/main.py index 991571a..bbac4a2 100644 --- a/backfiller/backfiller/main.py +++ b/backfiller/backfiller/main.py @@ -11,12 +11,12 @@ import urlparse import uuid import argh -import dateutil.parser import gevent.backdoor import prometheus_client as prom import requests import common +import common.dateutil segments_backfilled = prom.Counter( @@ -385,7 +385,7 @@ def main(streams, base_dir='.', variants='source', metrics_port=8002, start = float(start) logging.info('Backfilling last {} hours'.format(start)) except ValueError: - start = dateutil.parser.parse(start) + start = common.dateutil.parse(start) logging.info('Backfilling since {}'.format(start)) common.PromLogCountsHandler.install() diff --git a/common/common/dateutil.py b/common/common/dateutil.py new file mode 100644 index 0000000..7793e6e --- /dev/null +++ b/common/common/dateutil.py @@ -0,0 +1,23 @@ + + +"""Wrapper code around dateutil to use it more sanely""" + + +# required so we are able to import dateutil despite this module also being called dateutil +from __future__ import absolute_import + +import dateutil.parser +import dateutil.tz + + +def parse(timestamp): + """Parse given timestamp, convert to UTC, and return naive UTC datetime""" + dt = dateutil.parser.parse(timestamp) + if dt.tzinfo is not None: + dt = dt.astimezone(dateutil.tz.tzutc()).replace(tzinfo=None) + return dt + + +def parse_utc_only(timestamp): + """Parse given timestamp, but assume it's already in UTC and ignore other timezone info""" + return dateutil.parser.parse(timestamp, ignoretz=True) diff --git a/downloader/downloader/main.py b/downloader/downloader/main.py index 5b513d6..c1524f4 100644 --- a/downloader/downloader/main.py +++ b/downloader/downloader/main.py @@ -11,7 +11,6 @@ from base64 import b64encode from contextlib import contextmanager import argh -import dateutil.parser import gevent import gevent.backdoor import gevent.event @@ -21,6 +20,7 @@ from monotonic import monotonic import twitch import common +import common.dateutil segments_downloaded = prom.Counter( @@ -344,7 +344,7 @@ class StreamWorker(object): self.manager.mark_working(self) if segment.date: - date = dateutil.parser.parse(segment.date) + date = common.dateutil.parse(segment.date) if segment.uri not in self.getters: if date is None: raise ValueError("Cannot determine date of segment") diff --git a/restreamer/restreamer/main.py b/restreamer/restreamer/main.py index ff1e42f..81375f8 100644 --- a/restreamer/restreamer/main.py +++ b/restreamer/restreamer/main.py @@ -7,13 +7,13 @@ import logging import os import signal -import dateutil.parser import gevent import gevent.backdoor import prometheus_client as prom from flask import Flask, url_for, request, abort, Response from gevent.pywsgi import WSGIServer +import common.dateutil from common import get_best_segments, cut_segments, PromLogCountsHandler, install_stacksampler import generate_hls @@ -149,7 +149,9 @@ def time_range_for_variant(stream, variant): abort(404) first, last = min(hours), max(hours) # note last hour parses to _start_ of that hour, so we add 1h to go to end of that hour - return dateutil.parser.parse(first), dateutil.parser.parse(last) + datetime.timedelta(hours=1) + def parse_hour(s): + return datetime.datetime.strptime(s, "%Y-%m-%dT%H") + return parse_hour(first), parse_hour(last) + datetime.timedelta(hours=1) @app.route('/playlist/.m3u8') @@ -161,8 +163,8 @@ def generate_master_playlist(stream): start, end: The time to begin and end the stream at. See generate_media_playlist for details. """ - start = dateutil.parser.parse(request.args['start']) if 'start' in request.args else None - end = dateutil.parser.parse(request.args['end']) if 'end' in request.args else None + start = common.dateutil.parse_utc_only(request.args['start']) if 'start' in request.args else None + end = common.dateutil.parse_utc_only(request.args['end']) if 'end' in request.args else None variants = listdir(os.path.join(app.static_folder, stream)) playlists = {} @@ -189,7 +191,7 @@ def generate_media_playlist(stream, variant): """Returns a HLS media playlist for the given stream and variant. Takes optional params: start, end: The time to begin and end the stream at. - Must be in ISO 8601 format (ie. yyyy-mm-ddTHH:MM:SS). + Must be in ISO 8601 format (ie. yyyy-mm-ddTHH:MM:SS) and UTC. If not given, effectively means "infinity", ie. no start means any time ago, no end means any time in the future. Note that because it returns segments _covering_ that range, the playlist @@ -200,8 +202,8 @@ def generate_media_playlist(stream, variant): if not os.path.isdir(hours_path): abort(404) - start = dateutil.parser.parse(request.args['start']) if 'start' in request.args else None - end = dateutil.parser.parse(request.args['end']) if 'end' in request.args else None + start = common.dateutil.parse_as_utc(request.args['start']) if 'start' in request.args else None + end = common.dateutil.parse_as_utc(request.args['end']) if 'end' in request.args else None if start is None or end is None: # If start or end are not given, use the earliest/latest time available first, last = time_range_for_variant(stream, variant) @@ -229,14 +231,14 @@ def cut(stream, variant): """Return a MPEGTS video file covering the exact timestamp range. Params: start, end: Required. The start and end times, down to the millisecond. - Must be in ISO 8601 format (ie. yyyy-mm-ddTHH:MM:SS). + Must be in ISO 8601 format (ie. yyyy-mm-ddTHH:MM:SS) and UTC. allow_holes: Optional, default false. If false, errors out with a 406 Not Acceptable if any holes are detected, rather than producing a video with missing parts. Set to true by passing "true" (case insensitive). Even if holes are allowed, a 406 may result if the resulting video would be empty. """ - start = dateutil.parser.parse(request.args['start']) - end = dateutil.parser.parse(request.args['end']) + start = common.dateutil.parse_as_utc(request.args['start']) + end = common.dateutil.parse_as_utc(request.args['end']) if end <= start: return "End must be after start", 400