Bug fixes and improvements in response to comments on the PR

pull/449/head
Christopher Usher 3 weeks ago
parent c4e98f4dca
commit d6f976ac0f

@ -1,5 +1,7 @@
import datetime import datetime
import urllib
import zoneinfo
from common import dateutil from common import dateutil
from common.requests import InstrumentedSession from common.requests import InstrumentedSession
@ -9,75 +11,93 @@ requests = InstrumentedSession()
def parse_shift_time(time_str, timeout=5): def parse_shift_time(time_str, timeout=5):
"""Parse times in the shift definition. """
Parse times in the shift definition.
The parser first tries to parse a string as a datetime before trying the string as a URL to fetch a timestamp from."""
if not time_str: The parser first tries to parse a string as a URL to fetch a timestamp from before trying to parse it as a timestamp.
return None """
try: if not time_str:
return dateutil.parse(time_str) return None
except ValueError: if urllib.parse.urlparse(time_str).scheme in ('http', 'https'):
try: resp = requests.get(time_str, timeout=timeout)
resp = requests.get(time_str, timeout=timeout, metric_name='get_shift_time') resp.raise_for_status()
resp.raise_for_status() return dateutil.parse(resp.text.strip())
return dateutil.parse(resp.text.strip()) else:
except Exception: return dateutil.parse(time_str)
return None
def parse_shifts(shifts): def parse_shifts(shifts):
"""Parse a shifts definition """
Parse a shifts definition.
The shifts definition is two entry mappable with two keys, repeating and one-off. The shifts definition is three entry mappable with two keys, repeating, one_off and timezone.
The repeating shifts entry is a list of shift definition. Each of these is a sequence consisting of the name of the shift, the starting hour of the shift in local time, and the ending hour in local time. Repeating shifts extending across midnight can be handled by using two shifts with the same name. For example: The repeating shifts entry is a list of shift definition.
[['Night', 0, 6], Each of these is a sequence consisting of the name of the shift,
['Day', 6, 18], the starting hour of the shift in local time, and the ending hour in local time.
['Night', 18, 24]] Repeating shifts extending across midnight can be handled by using two shifts with the same name.
For example:
[['Night', 0, 6],
['Day', 6, 18],
['Night', 18, 24]]
The one-off shifts entry is a list of shift definitions. Each of these is a sequence consisting of the name of the shift, the start the shift, and the end of the shift. A start or end time can be a timestamp, a URL or None. If it is a URL, the URL will be queried for a timestamp. If no timezone info is provided the timestamp will be assumed to be UTC. If the start time is None, then the start will be assumed to be the earliest possible datetime; if the end is None, it will be assumed to be the oldest possible datetime. If both the start and end are None, the shift will be ignored. For example: The one-off shifts entry is a list of shift definitions.
[['Full', '2024-01-01T00:00:00', '2024-01-02T00:00:00'], Each of these is a sequence consisting of the name of the shift, the start the shift,
['End Only', '2024-01-02T00:00:00', None], and the end of the shift.
['URL', 'http://example.com/start.html', '2024-01-01T00:00:00'], A start or end time can be a timestamp, a URL or None.
['Both None', None, None]] If it is a URL, the URL will be queried for a timestamp.
would be parsed as: If no timezone info is provided the timestamp will be assumed to be UTC.
[['Full', '2024-01-01T00:00:00', '2024-01-02T00:00:00'], If the start time is None, then the start will be assumed to be the earliest possible datetime;
['Start Only', '2024-01-02T00:00:00', '9999-12-31T23:59:59.999999'], if the end is None, it will be assumed to be the oldest possible datetime.
['URL', '2023-12-31T12:00:00', '2024-01-01T00:00:00']] For example:
""" [['Full', '2024-01-01T00:00:00', '2024-01-02T00:00:00'],
new_shifts = {'repeating':shifts['repeating'], 'one_off':[]} ['End Only', '2024-01-02T00:00:00', None],
for shift in shifts['one_off']: ['URL', 'http://example.com/start.html', '2024-01-01T00:00:00'],
name, start, end = shift ['Both None', None, None]]
start = parse_shift_time(start) would be parsed as:
end = parse_shift_time(end) [['Full', '2024-01-01T00:00:00', '2024-01-02T00:00:00'],
if (start is None) and (end is None): ['End Only', '2024-01-02T00:00:00', '9999-12-31T23:59:59.999999'],
continue ['URL', '2023-12-31T12:00:00', '2024-01-01T00:00:00'],
if start is None: ['Both None', '0001-01-01T00:00:00', '9999-12-31T23:59:59.999999']]
start = datetime.datetime.min
if end is None: The timezone entry is a string that the zoneinfo package can interpret as a timezone
end = datetime.datetime.max
new_shifts['one_off'].append([name, start, end]) One-off shifts override repeating shifts.
return new_shifts In the case of overlapping shifts, the first shift in the list takes precedence.
"""
new_shifts = {'repeating':shifts['repeating'], 'one_off':[]}
for shift in shifts['one_off']:
name, start, end = shift
start = parse_shift_time(start)
end = parse_shift_time(end)
if start is None:
start = datetime.datetime.min
if end is None:
end = datetime.datetime.max
new_shifts['one_off'].append([name, start, end])
new_shifts['timezone'] = zoneinfo.ZoneInfo(shifts['timezone'])
return new_shifts
def calculate_shift(time, shifts, timezone): def calculate_shift(time, shifts, timezone):
"""Calculate what shift a time falls in. """
Calculate what shift a time falls in.
time is a datetime, shifts the output from parse_shifts and timezone a
""" Arguments:
if not time: time -- a datetime.datetime instance
return '' shifts -- the output from parse_shifts
"""
for shift in shifts['one_off']: if time is not None:
print(time, shift[1], shift[2]) return ''
if shift[1] <= time < shift[2]:
return shift[0] for shift in shifts['one_off']:
if shift[1] <= time < shift[2]:
#since shifts are based on local times we have to worry about timezones for once return shift[0]
local_time = time.replace(tzinfo=UTC).astimezone(timezone)
# do a more involved calculation to allow for non-integer start and end hours #since shifts are based on local times we have to worry about timezones for once
time_diff = local_time - datetime.datetime(local_time.year, local_time.month, local_time.day, tzinfo=timezone) local_time = time.replace(tzinfo=UTC).astimezone(timezone)
hour = time_diff / datetime.timedelta(hours=1) # do a more involved calculation to allow for non-integer start and end hours
for shift in shifts['repeating']: hour = local_time.hour + local_time.minute / 60 + local_time.second / 3600
if shift[1] <= hour < shift[2]: for shift in shifts['repeating']:
return shift[0] if shift[1] <= hour < shift[2]:
return shift[0]

@ -187,14 +187,15 @@
shift_defs:: { shift_defs:: {
repeating: [ repeating: [
["Zeta Shift", 0, 6], ["Zeta Shift", 0, 6],
["Dawn Guard", 6, 12], ["Dawn Guard", 6, 12],
["Alpha Flight", 12, 18], ["Alpha Flight", 12, 18],
["Night Watch", 18, 24], ["Night Watch", 18, 24],
], ],
one_off: [ one_off: [
["Tech Test", null, $.bustime_start], ["Tech Test", null, $.bustime_start],
["Omega Shift", "http://example.com/omega_start.html", null], ["Omega Shift", "http://example.com/omega_start.html", null],
] ],
timezone: $.timezone,
}, },
shifts:: std.manifestJson($.shift_defs), shifts:: std.manifestJson($.shift_defs),
@ -559,7 +560,6 @@
image: $.get_image("sheetsync"), image: $.get_image("sheetsync"),
// Args for the sheetsync // Args for the sheetsync
command: [ command: [
"--timezone", $.timezone,
"--shifts", $.shifts, "--shifts", $.shifts,
"--backdoor-port", std.toString($.backdoor_port), "--backdoor-port", std.toString($.backdoor_port),
$.db_connect, $.db_connect,

@ -157,7 +157,7 @@ CREATE TABLE playlists (
first_event_id TEXT, first_event_id TEXT,
last_event_id TEXT, last_event_id TEXT,
-- name of the thumbnail template to be applied by default to this tag -- name of the thumbnail template to be applied by default to this tag
default_template TEXT NOT NULL DEFAULT '' default_template TEXT
); );
-- This table records time series data gleaned from the bus cam (right now, just the odometer). -- This table records time series data gleaned from the bus cam (right now, just the odometer).

@ -12,7 +12,7 @@ setup(
"psycopg2", "psycopg2",
"python-dateutil", "python-dateutil",
"requests", "requests",
"tzdata", "tzdata",
"wubloader-common", "wubloader-common",
], ],
) )

@ -2,7 +2,6 @@
import json import json
import logging import logging
import signal import signal
import zoneinfo
from collections import defaultdict from collections import defaultdict
from urllib.parse import urlparse from urllib.parse import urlparse
@ -277,11 +276,10 @@ class EventsSync(SheetSync):
"category", "category",
} }
def __init__(self, name, middleware, stop, dbmanager, reverse_sync=False, media_dir=None, timezone=None, shifts=None): def __init__(self, name, middleware, stop, dbmanager, reverse_sync=False, media_dir=None, shifts=None):
super().__init__(name, middleware, stop, dbmanager, reverse_sync) super().__init__(name, middleware, stop, dbmanager, reverse_sync)
self.media_dir = media_dir self.media_dir = media_dir
self.media_downloads = None if media_dir is None else {} self.media_downloads = None if media_dir is None else {}
self.timezone = timezone
self.shifts = shifts self.shifts = shifts
@ -417,14 +415,22 @@ class PlaylistsSync(SheetSync):
event_id: The id of the streamlog event to sync event_id: The id of the streamlog event to sync
""", """,
) )
@argh.arg('--timezone', help="Local timezone for determining shift times")
@argh.arg('--shifts', type=json.loads, help=""" @argh.arg('--shifts', type=json.loads, help="""
Shift definitions in JSON form. Shift definitions in JSON form.
Always present: Always present:
repeating: a list of repeating shifts. Each of these consist of a sequence of shift name, start hour and end hour. The start and end hours are in local time. repeating: a list of repeating shifts.
one_off: a list of non-repeating shifts. Each of these consist of a sequence of shift name, start and end. A start or end time can be a string repersenting timestamp or a URL or null. If it is a URL, the URL will be queried for a timestamp. If no timezone info is provided the timestamp will be assumed to be UTC. If the start time is None, then the start will be assumed to be the earliest possible datetime; if the end is None, it will be assumed to be the oldest possible datetime. If both the start and end are None, the shift will be ignored. Each of these consist of a sequence of shift name, start hour and end hour.
The start and end hours are in local time.
one_off: a list of non-repeating shifts.
Each of these consist of a sequence of shift name, start and end.
A start or end time can be a string repersenting timestamp or a URL or null.
If it is a URL, the URL will be queried for a timestamp.
If no timezone info is provided the timestamp will be assumed to be UTC.
If the start time is None, then the start will be assumed to be the earliest possible datetime;
if the end is None, it will be assumed to be the oldest possible datetime.
timezone: a string interpretable by the zoneinfo package as a timezone
""") """)
def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0, media_dir=".", shifts=None, timezone=None): def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0, media_dir=".", shifts=None):
""" """
Sheet sync constantly scans a Google Sheets sheet and a database, copying inputs from the sheet Sheet sync constantly scans a Google Sheets sheet and a database, copying inputs from the sheet
to the DB and outputs from the DB to the sheet. to the DB and outputs from the DB to the sheet.
@ -444,6 +450,9 @@ def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0, media_dir=
logging.info("Starting up") logging.info("Starting up")
if shifts is None:
shifts = {'repeating':[], 'one_off':[], 'timezone':'UTC'}
dbmanager = DBManager(dsn=dbconnect) dbmanager = DBManager(dsn=dbconnect)
while True: while True:
try: try:
@ -520,7 +529,6 @@ def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0, media_dir=
}[config["type"]] }[config["type"]]
sync_class_kwargs = {} sync_class_kwargs = {}
if config["type"] == "events": if config["type"] == "events":
sync_class_kwargs["timezone"] = zoneinfo.ZoneInfo(timezone)
sync_class_kwargs["shifts"] = shifts sync_class_kwargs["shifts"] = shifts
if config["type"] == "events" and config.get("download_media", False): if config["type"] == "events" and config.get("download_media", False):
sync_class_kwargs["media_dir"] = media_dir sync_class_kwargs["media_dir"] = media_dir

@ -1,6 +1,7 @@
import logging import logging
import uuid import uuid
import zoneinfo
from monotonic import monotonic from monotonic import monotonic
@ -273,6 +274,9 @@ class SheetsEventsMiddleware(SheetsMiddleware):
self.bustime_start = bustime_start self.bustime_start = bustime_start
self.edit_url = edit_url self.edit_url = edit_url
# fallback to no shifts if there is a shift parsing error
self.latest_shifts = {'repeating':[], 'one_off':[], 'timezone':zoneinfo.ZoneInfo('UTC')}
# column parsers are defined here so they can reference self # column parsers are defined here so they can reference self
self.column_parsers = { self.column_parsers = {
'event_start': self.parse_bustime, 'event_start': self.parse_bustime,
@ -294,7 +298,11 @@ class SheetsEventsMiddleware(SheetsMiddleware):
def get_rows(self): def get_rows(self):
# only need to update the shifts once per sync # only need to update the shifts once per sync
self.latest_shifts = common.shifts.parse_shifts(self.shifts) try:
self.latest_shifts = common.shifts.parse_shifts(self.shifts)
except Exception as e:
logging.error('Error parsing shifts with {}. Using previous shifts definition.'.format(e))
return super().get_rows() return super().get_rows()
def parse_bustime(self, value, preserve_dash=False): def parse_bustime(self, value, preserve_dash=False):
@ -331,9 +339,10 @@ class SheetsEventsMiddleware(SheetsMiddleware):
# This is only needed for full events (not the archive sheet), # This is only needed for full events (not the archive sheet),
# so only do it if we had a tags column in the first place. # so only do it if we had a tags column in the first place.
if 'tags' in row_dict: if 'tags' in row_dict:
shift_tag = common.shifts.calculate_shift(row_dict['event_start'], self.current_shifts)
row_dict['tags'] = ( row_dict['tags'] = (
([shift_tag] if shift_tag is not None else [])
[ [
common.shifts.calculate_shift(row_dict['event_start'], self.current_shifts, self.timezone),
row_dict['category'], # category name row_dict['category'], # category name
worksheet, # sheet name worksheet, # sheet name
] + (['Poster Moment'] if row_dict['poster_moment'] else []) ] + (['Poster Moment'] if row_dict['poster_moment'] else [])

@ -242,22 +242,25 @@ def get_row(ident):
start = response['event_start'] start = response['event_start']
# use tags to determine default thumbnail template # use tags to determine default thumbnail template
if response['thumbnail_template'] is None if response['thumbnail_template'] is None:
conn = app.db_manager.get_conn() conn = app.db_manager.get_conn()
query = """ query = """
SELECT name, default_template SELECT tags, default_template
FROM playlists FROM playlists
""" WHERE default_template IS NOT NULL
"""
results = database.query(conn, query) results = database.query(conn, query)
default_templates = {} default_templates = {}
for row in results: for row in results:
row = row._asdict: for tag in row.tag:
if row['name'] and row['default_template']: default_templates[tag] = row.default_template
default_templates[row['name']] = row['default_template']
# since implicit tags are put at the start, with the shift tag first
# we prioritize later tags
for tag in response['tags'][::-1]: for tag in response['tags'][::-1]:
if tag in default_templates: if tag in default_templates:
response['thumbnail_template'] = default_templates[tag] response['thumbnail_template'] = default_templates[tag]
break
# pick default frame time as the middle of the video. # pick default frame time as the middle of the video.
if response['thumbnail_time'] is None: if response['thumbnail_time'] is None:

Loading…
Cancel
Save