From d6f976ac0f6d8d8af9731c9a63d0f0d8666bc2ac Mon Sep 17 00:00:00 2001 From: Christopher Usher Date: Mon, 4 Nov 2024 23:18:53 -0800 Subject: [PATCH] Bug fixes and improvements in response to comments on the PR --- common/common/shifts.py | 146 +++++++++++++++++++--------------- docker-compose.jsonnet | 12 +-- postgres/schema.sql | 2 +- sheetsync/setup.py | 2 +- sheetsync/sheetsync/main.py | 24 ++++-- sheetsync/sheetsync/sheets.py | 13 ++- thrimshim/thrimshim/main.py | 15 ++-- 7 files changed, 127 insertions(+), 87 deletions(-) diff --git a/common/common/shifts.py b/common/common/shifts.py index 606c8ca..8a7ba60 100644 --- a/common/common/shifts.py +++ b/common/common/shifts.py @@ -1,5 +1,7 @@ import datetime +import urllib +import zoneinfo from common import dateutil from common.requests import InstrumentedSession @@ -9,75 +11,93 @@ requests = InstrumentedSession() def parse_shift_time(time_str, timeout=5): - """Parse times in the shift definition. - - The parser first tries to parse a string as a datetime before trying the string as a URL to fetch a timestamp from.""" - if not time_str: - return None - try: - return dateutil.parse(time_str) - except ValueError: - try: - resp = requests.get(time_str, timeout=timeout, metric_name='get_shift_time') - resp.raise_for_status() - return dateutil.parse(resp.text.strip()) - except Exception: - return None + """ + Parse times in the shift definition. + + The parser first tries to parse a string as a URL to fetch a timestamp from before trying to parse it as a timestamp. + """ + if not time_str: + return None + if urllib.parse.urlparse(time_str).scheme in ('http', 'https'): + resp = requests.get(time_str, timeout=timeout) + resp.raise_for_status() + return dateutil.parse(resp.text.strip()) + else: + return dateutil.parse(time_str) def parse_shifts(shifts): - """Parse a shifts definition + """ + Parse a shifts definition. - The shifts definition is two entry mappable with two keys, repeating and one-off. + The shifts definition is three entry mappable with two keys, repeating, one_off and timezone. - The repeating shifts entry is a list of shift definition. Each of these is a sequence consisting of the name of the shift, the starting hour of the shift in local time, and the ending hour in local time. Repeating shifts extending across midnight can be handled by using two shifts with the same name. For example: - [['Night', 0, 6], - ['Day', 6, 18], - ['Night', 18, 24]] + The repeating shifts entry is a list of shift definition. + Each of these is a sequence consisting of the name of the shift, + the starting hour of the shift in local time, and the ending hour in local time. + Repeating shifts extending across midnight can be handled by using two shifts with the same name. + For example: + [['Night', 0, 6], + ['Day', 6, 18], + ['Night', 18, 24]] - The one-off shifts entry is a list of shift definitions. Each of these is a sequence consisting of the name of the shift, the start the shift, and the end of the shift. A start or end time can be a timestamp, a URL or None. If it is a URL, the URL will be queried for a timestamp. If no timezone info is provided the timestamp will be assumed to be UTC. If the start time is None, then the start will be assumed to be the earliest possible datetime; if the end is None, it will be assumed to be the oldest possible datetime. If both the start and end are None, the shift will be ignored. For example: - [['Full', '2024-01-01T00:00:00', '2024-01-02T00:00:00'], - ['End Only', '2024-01-02T00:00:00', None], - ['URL', 'http://example.com/start.html', '2024-01-01T00:00:00'], - ['Both None', None, None]] - would be parsed as: - [['Full', '2024-01-01T00:00:00', '2024-01-02T00:00:00'], - ['Start Only', '2024-01-02T00:00:00', '9999-12-31T23:59:59.999999'], - ['URL', '2023-12-31T12:00:00', '2024-01-01T00:00:00']] - """ - new_shifts = {'repeating':shifts['repeating'], 'one_off':[]} - for shift in shifts['one_off']: - name, start, end = shift - start = parse_shift_time(start) - end = parse_shift_time(end) - if (start is None) and (end is None): - continue - if start is None: - start = datetime.datetime.min - if end is None: - end = datetime.datetime.max - new_shifts['one_off'].append([name, start, end]) - return new_shifts + The one-off shifts entry is a list of shift definitions. + Each of these is a sequence consisting of the name of the shift, the start the shift, + and the end of the shift. + A start or end time can be a timestamp, a URL or None. + If it is a URL, the URL will be queried for a timestamp. + If no timezone info is provided the timestamp will be assumed to be UTC. + If the start time is None, then the start will be assumed to be the earliest possible datetime; + if the end is None, it will be assumed to be the oldest possible datetime. + For example: + [['Full', '2024-01-01T00:00:00', '2024-01-02T00:00:00'], + ['End Only', '2024-01-02T00:00:00', None], + ['URL', 'http://example.com/start.html', '2024-01-01T00:00:00'], + ['Both None', None, None]] + would be parsed as: + [['Full', '2024-01-01T00:00:00', '2024-01-02T00:00:00'], + ['End Only', '2024-01-02T00:00:00', '9999-12-31T23:59:59.999999'], + ['URL', '2023-12-31T12:00:00', '2024-01-01T00:00:00'], + ['Both None', '0001-01-01T00:00:00', '9999-12-31T23:59:59.999999']] + + The timezone entry is a string that the zoneinfo package can interpret as a timezone + + One-off shifts override repeating shifts. + In the case of overlapping shifts, the first shift in the list takes precedence. + """ + new_shifts = {'repeating':shifts['repeating'], 'one_off':[]} + for shift in shifts['one_off']: + name, start, end = shift + start = parse_shift_time(start) + end = parse_shift_time(end) + if start is None: + start = datetime.datetime.min + if end is None: + end = datetime.datetime.max + new_shifts['one_off'].append([name, start, end]) + new_shifts['timezone'] = zoneinfo.ZoneInfo(shifts['timezone']) + return new_shifts def calculate_shift(time, shifts, timezone): - """Calculate what shift a time falls in. - - time is a datetime, shifts the output from parse_shifts and timezone a - """ - if not time: - return '' - - for shift in shifts['one_off']: - print(time, shift[1], shift[2]) - if shift[1] <= time < shift[2]: - return shift[0] - - #since shifts are based on local times we have to worry about timezones for once - local_time = time.replace(tzinfo=UTC).astimezone(timezone) - # do a more involved calculation to allow for non-integer start and end hours - time_diff = local_time - datetime.datetime(local_time.year, local_time.month, local_time.day, tzinfo=timezone) - hour = time_diff / datetime.timedelta(hours=1) - for shift in shifts['repeating']: - if shift[1] <= hour < shift[2]: - return shift[0] + """ + Calculate what shift a time falls in. + + Arguments: + time -- a datetime.datetime instance + shifts -- the output from parse_shifts + """ + if time is not None: + return '' + + for shift in shifts['one_off']: + if shift[1] <= time < shift[2]: + return shift[0] + + #since shifts are based on local times we have to worry about timezones for once + local_time = time.replace(tzinfo=UTC).astimezone(timezone) + # do a more involved calculation to allow for non-integer start and end hours + hour = local_time.hour + local_time.minute / 60 + local_time.second / 3600 + for shift in shifts['repeating']: + if shift[1] <= hour < shift[2]: + return shift[0] diff --git a/docker-compose.jsonnet b/docker-compose.jsonnet index 550ec9c..65ab324 100644 --- a/docker-compose.jsonnet +++ b/docker-compose.jsonnet @@ -187,14 +187,15 @@ shift_defs:: { repeating: [ ["Zeta Shift", 0, 6], - ["Dawn Guard", 6, 12], + ["Dawn Guard", 6, 12], ["Alpha Flight", 12, 18], - ["Night Watch", 18, 24], + ["Night Watch", 18, 24], ], - one_off: [ + one_off: [ ["Tech Test", null, $.bustime_start], - ["Omega Shift", "http://example.com/omega_start.html", null], - ] + ["Omega Shift", "http://example.com/omega_start.html", null], + ], + timezone: $.timezone, }, shifts:: std.manifestJson($.shift_defs), @@ -559,7 +560,6 @@ image: $.get_image("sheetsync"), // Args for the sheetsync command: [ - "--timezone", $.timezone, "--shifts", $.shifts, "--backdoor-port", std.toString($.backdoor_port), $.db_connect, diff --git a/postgres/schema.sql b/postgres/schema.sql index 2d641d5..7e12b7d 100644 --- a/postgres/schema.sql +++ b/postgres/schema.sql @@ -157,7 +157,7 @@ CREATE TABLE playlists ( first_event_id TEXT, last_event_id TEXT, -- name of the thumbnail template to be applied by default to this tag - default_template TEXT NOT NULL DEFAULT '' + default_template TEXT ); -- This table records time series data gleaned from the bus cam (right now, just the odometer). diff --git a/sheetsync/setup.py b/sheetsync/setup.py index 726bc50..1528d6d 100644 --- a/sheetsync/setup.py +++ b/sheetsync/setup.py @@ -12,7 +12,7 @@ setup( "psycopg2", "python-dateutil", "requests", - "tzdata", + "tzdata", "wubloader-common", ], ) diff --git a/sheetsync/sheetsync/main.py b/sheetsync/sheetsync/main.py index 1f24280..eb06393 100644 --- a/sheetsync/sheetsync/main.py +++ b/sheetsync/sheetsync/main.py @@ -2,7 +2,6 @@ import json import logging import signal -import zoneinfo from collections import defaultdict from urllib.parse import urlparse @@ -277,11 +276,10 @@ class EventsSync(SheetSync): "category", } - def __init__(self, name, middleware, stop, dbmanager, reverse_sync=False, media_dir=None, timezone=None, shifts=None): + def __init__(self, name, middleware, stop, dbmanager, reverse_sync=False, media_dir=None, shifts=None): super().__init__(name, middleware, stop, dbmanager, reverse_sync) self.media_dir = media_dir self.media_downloads = None if media_dir is None else {} - self.timezone = timezone self.shifts = shifts @@ -417,14 +415,22 @@ class PlaylistsSync(SheetSync): event_id: The id of the streamlog event to sync """, ) -@argh.arg('--timezone', help="Local timezone for determining shift times") @argh.arg('--shifts', type=json.loads, help=""" Shift definitions in JSON form. Always present: - repeating: a list of repeating shifts. Each of these consist of a sequence of shift name, start hour and end hour. The start and end hours are in local time. - one_off: a list of non-repeating shifts. Each of these consist of a sequence of shift name, start and end. A start or end time can be a string repersenting timestamp or a URL or null. If it is a URL, the URL will be queried for a timestamp. If no timezone info is provided the timestamp will be assumed to be UTC. If the start time is None, then the start will be assumed to be the earliest possible datetime; if the end is None, it will be assumed to be the oldest possible datetime. If both the start and end are None, the shift will be ignored. + repeating: a list of repeating shifts. + Each of these consist of a sequence of shift name, start hour and end hour. + The start and end hours are in local time. + one_off: a list of non-repeating shifts. + Each of these consist of a sequence of shift name, start and end. + A start or end time can be a string repersenting timestamp or a URL or null. + If it is a URL, the URL will be queried for a timestamp. + If no timezone info is provided the timestamp will be assumed to be UTC. + If the start time is None, then the start will be assumed to be the earliest possible datetime; + if the end is None, it will be assumed to be the oldest possible datetime. + timezone: a string interpretable by the zoneinfo package as a timezone """) -def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0, media_dir=".", shifts=None, timezone=None): +def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0, media_dir=".", shifts=None): """ Sheet sync constantly scans a Google Sheets sheet and a database, copying inputs from the sheet to the DB and outputs from the DB to the sheet. @@ -444,6 +450,9 @@ def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0, media_dir= logging.info("Starting up") + if shifts is None: + shifts = {'repeating':[], 'one_off':[], 'timezone':'UTC'} + dbmanager = DBManager(dsn=dbconnect) while True: try: @@ -520,7 +529,6 @@ def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0, media_dir= }[config["type"]] sync_class_kwargs = {} if config["type"] == "events": - sync_class_kwargs["timezone"] = zoneinfo.ZoneInfo(timezone) sync_class_kwargs["shifts"] = shifts if config["type"] == "events" and config.get("download_media", False): sync_class_kwargs["media_dir"] = media_dir diff --git a/sheetsync/sheetsync/sheets.py b/sheetsync/sheetsync/sheets.py index 4a9a7b0..a0a8c6c 100644 --- a/sheetsync/sheetsync/sheets.py +++ b/sheetsync/sheetsync/sheets.py @@ -1,6 +1,7 @@ import logging import uuid +import zoneinfo from monotonic import monotonic @@ -273,6 +274,9 @@ class SheetsEventsMiddleware(SheetsMiddleware): self.bustime_start = bustime_start self.edit_url = edit_url + # fallback to no shifts if there is a shift parsing error + self.latest_shifts = {'repeating':[], 'one_off':[], 'timezone':zoneinfo.ZoneInfo('UTC')} + # column parsers are defined here so they can reference self self.column_parsers = { 'event_start': self.parse_bustime, @@ -294,7 +298,11 @@ class SheetsEventsMiddleware(SheetsMiddleware): def get_rows(self): # only need to update the shifts once per sync - self.latest_shifts = common.shifts.parse_shifts(self.shifts) + try: + self.latest_shifts = common.shifts.parse_shifts(self.shifts) + except Exception as e: + logging.error('Error parsing shifts with {}. Using previous shifts definition.'.format(e)) + return super().get_rows() def parse_bustime(self, value, preserve_dash=False): @@ -331,9 +339,10 @@ class SheetsEventsMiddleware(SheetsMiddleware): # This is only needed for full events (not the archive sheet), # so only do it if we had a tags column in the first place. if 'tags' in row_dict: + shift_tag = common.shifts.calculate_shift(row_dict['event_start'], self.current_shifts) row_dict['tags'] = ( + ([shift_tag] if shift_tag is not None else []) [ - common.shifts.calculate_shift(row_dict['event_start'], self.current_shifts, self.timezone), row_dict['category'], # category name worksheet, # sheet name ] + (['Poster Moment'] if row_dict['poster_moment'] else []) diff --git a/thrimshim/thrimshim/main.py b/thrimshim/thrimshim/main.py index 69840a0..432adf9 100644 --- a/thrimshim/thrimshim/main.py +++ b/thrimshim/thrimshim/main.py @@ -242,22 +242,25 @@ def get_row(ident): start = response['event_start'] # use tags to determine default thumbnail template - if response['thumbnail_template'] is None + if response['thumbnail_template'] is None: conn = app.db_manager.get_conn() query = """ - SELECT name, default_template + SELECT tags, default_template FROM playlists - """ + WHERE default_template IS NOT NULL + """ results = database.query(conn, query) default_templates = {} for row in results: - row = row._asdict: - if row['name'] and row['default_template']: - default_templates[row['name']] = row['default_template'] + for tag in row.tag: + default_templates[tag] = row.default_template + # since implicit tags are put at the start, with the shift tag first + # we prioritize later tags for tag in response['tags'][::-1]: if tag in default_templates: response['thumbnail_template'] = default_templates[tag] + break # pick default frame time as the middle of the video. if response['thumbnail_time'] is None: