Bug fixes and improvements in response to comments on the PR

11 months ago · d6f976ac0f
parent c4e98f4dca
commit d6f976ac0f
7 changed files with 127 additions and 87 deletions
--- a/common/common/shifts.py
+++ b/common/common/shifts.py
@ -1,5 +1,7 @@

 import datetime
+import urllib
+import zoneinfo

 from common import dateutil
 from common.requests import InstrumentedSession
@ -9,75 +11,93 @@ requests = InstrumentedSession()


 def parse_shift_time(time_str, timeout=5):
-    """Parse times in the shift definition.
-        
-       The parser first tries to parse a string as a datetime before trying the string as a URL to fetch a timestamp from."""
-    if not time_str:
-        return None
-    try:
-        return dateutil.parse(time_str)
-    except ValueError:
-        try:
-            resp = requests.get(time_str, timeout=timeout, metric_name='get_shift_time')
-            resp.raise_for_status()
-            return dateutil.parse(resp.text.strip())
-        except Exception:
-            return None
+	"""
+	Parse times in the shift definition.
+		
+	The parser first tries to parse a string as a URL to fetch a timestamp from before trying to parse it as a timestamp.
+	"""
+	if not time_str:
+		return None
+	if urllib.parse.urlparse(time_str).scheme in ('http', 'https'):
+		resp = requests.get(time_str, timeout=timeout)
+		resp.raise_for_status()
+		return dateutil.parse(resp.text.strip())
+	else:
+		return dateutil.parse(time_str)


 def parse_shifts(shifts):
-    """Parse a shifts definition
-
-       The shifts definition is two entry mappable with two keys, repeating and one-off.
-
-       The repeating shifts entry is a list of shift definition. Each of these is a sequence consisting of the name of the shift, the starting hour of the shift in local time, and the ending hour in local time. Repeating shifts extending across midnight can be handled by using two shifts with the same name. For example:
-        [['Night', 0, 6],
-         ['Day', 6, 18],
-         ['Night', 18, 24]]
-
-       The one-off shifts entry is a list of shift definitions. Each of these is a sequence consisting of the name of the shift, the start the shift, and the end of the shift. A start or end time can be a timestamp, a URL or None. If it is a URL, the URL will be queried for a timestamp. If no timezone info is provided the timestamp will be assumed to be UTC. If the start time is None, then the start will be assumed to be the earliest possible datetime; if the end is None, it will be assumed to be the oldest possible datetime. If both the start and end are None, the shift will be ignored. For example:
-        [['Full', '2024-01-01T00:00:00', '2024-01-02T00:00:00'],
-         ['End Only', '2024-01-02T00:00:00', None],
-         ['URL', 'http://example.com/start.html', '2024-01-01T00:00:00'],
-         ['Both None', None, None]]
-        would be parsed as:
-        [['Full', '2024-01-01T00:00:00', '2024-01-02T00:00:00'],
-         ['Start Only', '2024-01-02T00:00:00', '9999-12-31T23:59:59.999999'],
-         ['URL', '2023-12-31T12:00:00', '2024-01-01T00:00:00']]
-         """
-    new_shifts = {'repeating':shifts['repeating'], 'one_off':[]}
-    for shift in shifts['one_off']:
-        name, start, end = shift
-        start = parse_shift_time(start)
-        end = parse_shift_time(end)
-        if (start is None) and (end is None):
-            continue
-        if start is None:
-            start = datetime.datetime.min
-        if end is None:
-            end = datetime.datetime.max
-        new_shifts['one_off'].append([name, start, end])       
-    return new_shifts
+	"""
+	Parse a shifts definition.
+
+	The shifts definition is three entry mappable with two keys, repeating, one_off and timezone.
+
+	The repeating shifts entry is a list of shift definition.
+	Each of these is a sequence consisting of the name of the shift,
+	the starting hour of the shift in local time, and the ending hour in local time.
+	Repeating shifts extending across midnight can be handled by using two shifts with the same name.
+	For example:
+	[['Night', 0, 6],
+	 ['Day', 6, 18],
+	 ['Night', 18, 24]]
+
+	The one-off shifts entry is a list of shift definitions.
+	Each of these is a sequence consisting of the name of the shift, the start the shift,
+	and the end of the shift.
+	A start or end time can be a timestamp, a URL or None.
+	If it is a URL, the URL will be queried for a timestamp.
+	If no timezone info is provided the timestamp will be assumed to be UTC.
+	If the start time is None, then the start will be assumed to be the earliest possible datetime;
+	if the end is None, it will be assumed to be the oldest possible datetime.
+	For example:
+	[['Full', '2024-01-01T00:00:00', '2024-01-02T00:00:00'],
+	 ['End Only', '2024-01-02T00:00:00', None],
+	 ['URL', 'http://example.com/start.html', '2024-01-01T00:00:00'],
+	 ['Both None', None, None]]
+	would be parsed as:
+	[['Full', '2024-01-01T00:00:00', '2024-01-02T00:00:00'],
+	 ['End Only', '2024-01-02T00:00:00', '9999-12-31T23:59:59.999999'],
+	 ['URL', '2023-12-31T12:00:00', '2024-01-01T00:00:00'],
+	 ['Both None', '0001-01-01T00:00:00', '9999-12-31T23:59:59.999999']]
+
+	The timezone entry is a string that the zoneinfo package can interpret as a timezone
+
+	One-off shifts override repeating shifts.
+	In the case of overlapping shifts, the first shift in the list takes precedence. 
+	"""
+	new_shifts = {'repeating':shifts['repeating'], 'one_off':[]}
+	for shift in shifts['one_off']:
+		name, start, end = shift
+		start = parse_shift_time(start)
+		end = parse_shift_time(end)
+		if start is None:
+			start = datetime.datetime.min
+		if end is None:
+			end = datetime.datetime.max
+		new_shifts['one_off'].append([name, start, end])
+	new_shifts['timezone'] = zoneinfo.ZoneInfo(shifts['timezone'])
+	return new_shifts


 def calculate_shift(time, shifts, timezone):
-    """Calculate what shift a time falls in. 
-        
-       time is a datetime, shifts the output from parse_shifts and timezone a 
-    """
-    if not time:
-        return ''
-    
-    for shift in shifts['one_off']:
-        print(time, shift[1], shift[2])
-        if shift[1] <= time < shift[2]:
-            return shift[0]
-        
-    #since shifts are based on local times we have to worry about timezones for once
-    local_time = time.replace(tzinfo=UTC).astimezone(timezone)
-    # do a more involved calculation to allow for non-integer start and end hours
-    time_diff = local_time - datetime.datetime(local_time.year, local_time.month, local_time.day, tzinfo=timezone)
-    hour = time_diff / datetime.timedelta(hours=1)
-    for shift in shifts['repeating']:
-        if shift[1] <= hour < shift[2]:
-            return shift[0]
+	"""
+	Calculate what shift a time falls in. 
+
+	Arguments:
+	time -- a datetime.datetime instance
+	shifts -- the output from parse_shifts
+	"""
+	if time is not None:
+		return ''
+	
+	for shift in shifts['one_off']:
+		if shift[1] <= time < shift[2]:
+			return shift[0]
+		
+	#since shifts are based on local times we have to worry about timezones for once
+	local_time = time.replace(tzinfo=UTC).astimezone(timezone)
+	# do a more involved calculation to allow for non-integer start and end hours
+	hour = local_time.hour + local_time.minute / 60 + local_time.second / 3600
+	for shift in shifts['repeating']:
+		if shift[1] <= hour < shift[2]:
+			return shift[0]
--- a/docker-compose.jsonnet
+++ b/docker-compose.jsonnet
@ -187,14 +187,15 @@
  shift_defs:: {
                repeating: [
                            ["Zeta Shift", 0, 6],
-   					        ["Dawn Guard", 6, 12],
+                            ["Dawn Guard", 6, 12],
                            ["Alpha Flight", 12, 18],
-							["Night Watch", 18, 24],
+                            ["Night Watch", 18, 24],
                           ],
-			    one_off: [
+                one_off: [
                          ["Tech Test", null, $.bustime_start],
-				    	  ["Omega Shift", "http://example.com/omega_start.html", null],
-                         ]
+                          ["Omega Shift", "http://example.com/omega_start.html", null],
+                         ],
+                timezone: $.timezone,
               },
  shifts:: std.manifestJson($.shift_defs),

@ -559,7 +560,6 @@
      image: $.get_image("sheetsync"),
      // Args for the sheetsync
      command: [
-		"--timezone", $.timezone,
        "--shifts", $.shifts,
        "--backdoor-port", std.toString($.backdoor_port),
        $.db_connect,
--- a/postgres/schema.sql
+++ b/postgres/schema.sql
@ -157,7 +157,7 @@ CREATE TABLE playlists (
 	first_event_id TEXT,
 	last_event_id TEXT,
 	-- name of the thumbnail template to be applied by default to this tag
-	default_template TEXT NOT NULL DEFAULT ''
+	default_template TEXT
 );

 -- This table records time series data gleaned from the bus cam (right now, just the odometer).
--- a/sheetsync/setup.py
+++ b/sheetsync/setup.py
@ -12,7 +12,7 @@ setup(
 		"psycopg2",
 		"python-dateutil",
 		"requests",
-        "tzdata",
+		"tzdata",
 		"wubloader-common",
 	],
 )
--- a/sheetsync/sheetsync/main.py
+++ b/sheetsync/sheetsync/main.py
@ -2,7 +2,6 @@
 import json
 import logging
 import signal
-import zoneinfo
 from collections import defaultdict
 from urllib.parse import urlparse

@ -277,11 +276,10 @@ class EventsSync(SheetSync):
 		"category",
 	}

-	def __init__(self, name, middleware, stop, dbmanager, reverse_sync=False, media_dir=None, timezone=None, shifts=None):
+	def __init__(self, name, middleware, stop, dbmanager, reverse_sync=False, media_dir=None, shifts=None):
 		super().__init__(name, middleware, stop, dbmanager, reverse_sync)
 		self.media_dir = media_dir
 		self.media_downloads = None if media_dir is None else {}
-		self.timezone = timezone
 		self.shifts = shifts
 		

@ -417,14 +415,22 @@ class PlaylistsSync(SheetSync):
 			event_id: The id of the streamlog event to sync
 	""",
 )
-@argh.arg('--timezone', help="Local timezone for determining shift times")
@argh.arg('--shifts', type=json.loads, help="""
 	Shift definitions in JSON form.
 	Always present:
-		repeating: a list of repeating shifts. Each of these consist of a sequence of shift name, start hour and end hour. The start and end hours are in local time.
-		one_off: a list of non-repeating shifts. Each of these consist of a sequence of shift name, start and end. A start or end time can be a string repersenting timestamp or a URL or null. If it is a URL, the URL will be queried for a timestamp. If no timezone info is provided the timestamp will be assumed to be UTC. If the start time is None, then the start will be assumed to be the earliest possible datetime; if the end is None, it will be assumed to be the oldest possible datetime. If both the start and end are None, the shift will be ignored. 
+		repeating: a list of repeating shifts.
+			Each of these consist of a sequence of shift name, start hour and end hour.
+			The start and end hours are in local time.
+		one_off: a list of non-repeating shifts.
+			Each of these consist of a sequence of shift name, start and end.
+			A start or end time can be a string repersenting timestamp or a URL or null.
+			If it is a URL, the URL will be queried for a timestamp.
+			If no timezone info is provided the timestamp will be assumed to be UTC.
+			If the start time is None, then the start will be assumed to be the earliest possible datetime;
+			if the end is None, it will be assumed to be the oldest possible datetime. 
+		timezone: a string interpretable by the zoneinfo package as a timezone
 	""")
-def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0, media_dir=".", shifts=None, timezone=None):
+def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0, media_dir=".", shifts=None):
 	"""
 	Sheet sync constantly scans a Google Sheets sheet and a database, copying inputs from the sheet
 	to the DB and outputs from the DB to the sheet.
@ -444,6 +450,9 @@ def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0, media_dir=

 	logging.info("Starting up")

+	if shifts is None:
+		shifts = {'repeating':[], 'one_off':[], 'timezone':'UTC'}
+
 	dbmanager = DBManager(dsn=dbconnect)
 	while True:
 		try:
@ -520,7 +529,6 @@ def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0, media_dir=
 		}[config["type"]]
 		sync_class_kwargs = {}
 		if config["type"] == "events":
-			sync_class_kwargs["timezone"] = zoneinfo.ZoneInfo(timezone)
 			sync_class_kwargs["shifts"] = shifts
 		if config["type"] == "events" and config.get("download_media", False):
 			sync_class_kwargs["media_dir"] = media_dir
--- a/sheetsync/sheetsync/sheets.py
+++ b/sheetsync/sheetsync/sheets.py
@ -1,6 +1,7 @@

 import logging
 import uuid
+import zoneinfo

 from monotonic import monotonic

@ -273,6 +274,9 @@ class SheetsEventsMiddleware(SheetsMiddleware):
 		self.bustime_start = bustime_start
 		self.edit_url = edit_url

+		# fallback to no shifts if there is a shift parsing error
+		self.latest_shifts = {'repeating':[], 'one_off':[], 'timezone':zoneinfo.ZoneInfo('UTC')}
+
 		# column parsers are defined here so they can reference self
 		self.column_parsers = {
 			'event_start': self.parse_bustime,
@ -294,7 +298,11 @@ class SheetsEventsMiddleware(SheetsMiddleware):

 	def get_rows(self):
 		# only need to update the shifts once per sync
-		self.latest_shifts = common.shifts.parse_shifts(self.shifts)
+		try:
+			self.latest_shifts = common.shifts.parse_shifts(self.shifts)
+		except Exception as e:
+			logging.error('Error parsing shifts with {}. Using previous shifts definition.'.format(e))
+
 		return super().get_rows()

 	def parse_bustime(self, value, preserve_dash=False):
@ -331,9 +339,10 @@ class SheetsEventsMiddleware(SheetsMiddleware):
 		# This is only needed for full events (not the archive sheet),
 		# so only do it if we had a tags column in the first place.
 		if 'tags' in row_dict:
+			shift_tag = common.shifts.calculate_shift(row_dict['event_start'], self.current_shifts)
 			row_dict['tags'] = (
+				([shift_tag] if shift_tag is not None else [])
 				[
-					common.shifts.calculate_shift(row_dict['event_start'], self.current_shifts, self.timezone), 
 					row_dict['category'], # category name
 					worksheet, # sheet name
 				] + (['Poster Moment'] if row_dict['poster_moment'] else [])
--- a/thrimshim/thrimshim/main.py
+++ b/thrimshim/thrimshim/main.py
@ -242,22 +242,25 @@ def get_row(ident):
 		start = response['event_start']

 		# use tags to determine default thumbnail template
-		if response['thumbnail_template'] is None
+		if response['thumbnail_template'] is None:
 			conn = app.db_manager.get_conn()
 			query = """
-				SELECT name, default_template
+				SELECT tags, default_template
 				FROM playlists
-					"""
+				WHERE default_template IS NOT NULL
+			"""
 			results = database.query(conn, query)
 			default_templates = {}
 			for row in results:
-				row = row._asdict:
-				if row['name'] and row['default_template']:
-					default_templates[row['name']] = row['default_template']
+				for tag in row.tag:
+					default_templates[tag] = row.default_template

+			# since implicit tags are put at the start, with the shift tag first
+			# we prioritize later tags  
 			for tag in response['tags'][::-1]:
 				if tag in default_templates:
 					response['thumbnail_template'] = default_templates[tag]
+					break
 	
 		# pick default frame time as the middle of the video.
 		if response['thumbnail_time'] is None: