From 3ffbefea4fce6506307f07d75b0b9da47bc8df65 Mon Sep 17 00:00:00 2001
From: Mike Lang <mikelang3000@gmail.com>
Date: Mon, 12 Aug 2024 14:51:17 +1000
Subject: [PATCH] sheetsync: Split SheetsMiddleware into a common base class
 and events specific

---
 sheetsync/sheetsync/main.py   |   4 +-
 sheetsync/sheetsync/sheets.py | 177 ++++++++++++++++++++--------------
 2 files changed, 109 insertions(+), 72 deletions(-)

diff --git a/sheetsync/sheetsync/main.py b/sheetsync/sheetsync/main.py
index e898872..2e40e94 100644
--- a/sheetsync/sheetsync/main.py
+++ b/sheetsync/sheetsync/main.py
@@ -17,7 +17,7 @@ import common
 import common.dateutil
 from common.database import DBManager, query, get_column_placeholder
 
-from .sheets import SheetsClient, SheetsMiddleware
+from .sheets import SheetsClient, SheetsEventsMiddleware
 from .streamlog import StreamLogClient, StreamLogMiddleware
 
 sheets_synced = prom.Counter(
@@ -450,7 +450,7 @@ def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0):
 				client_secret=creds['client_secret'],
 				refresh_token=creds['refresh_token'],
 			)
-			middleware = SheetsMiddleware(
+			middleware = SheetsEventsMiddleware(
 				client,
 				config["sheet_id"],
 				config["worksheets"],
diff --git a/sheetsync/sheetsync/sheets.py b/sheetsync/sheetsync/sheets.py
index 712b05d..34746cf 100644
--- a/sheetsync/sheetsync/sheets.py
+++ b/sheetsync/sheetsync/sheets.py
@@ -82,42 +82,26 @@ class SheetsMiddleware():
 	# Expected quota usage per 100s =
 	#  (100 / RETRY_INTERVAL) * ACTIVE_SHEET_COUNT
 	#  + (100 / RETRY_INTERVAL / SYNCS_PER_INACTIVE_CHECK) * (len(worksheets) - ACTIVE_SHEET_COUNT)
-	# If playlist_worksheet is defined, add 1 to len(worksheets).
 	# For current values, this is 100/5 * 2 + 100/5/4 * 7 = 75
 
-	def __init__(self, client, sheet_id, worksheets, bustime_start, edit_url, allocate_ids=False):
+	# Maps DB column names (or general identifier, for non-DB columns) to sheet column indexes.
+	# id is required.
+	column_map = {
+		"id": NotImplemented,
+	}
+
+	# Maps column names to a function that parses that column's value.
+	# Functions take a single arg (the value to parse) and ValueError is
+	# interpreted as None.
+	# Columns missing from this map default to simply using the string value.
+	column_parsers = {}
+
+	def __init__(self, client, sheet_id, worksheets, allocate_ids=False):
 		self.client = client
 		self.sheet_id = sheet_id
 		# map {worksheet: last modify time}
 		self.worksheets = {w: 0 for w in worksheets}
-		self.bustime_start = bustime_start
-		self.edit_url = edit_url
 		self.allocate_ids = allocate_ids
-		# Maps DB column names (or general identifier, for non-DB columns) to sheet column indexes.
-		# Hard-coded for now, future work: determine this from column headers in sheet
-		self.column_map = {
-			'event_start': 0,
-			'event_end': 1,
-			'category': 2,
-			'description': 3,
-			'submitter_winner': 4,
-			'poster_moment': 5,
-			'image_links': 6,
-			'marked_for_edit': 7,
-			'notes': 8,
-			'tags': 9,
-			'video_link': 11,
-			'state': 12,
-			'edit_link': 13,
-			'error': 14,
-			'id': 15,
-		}
-		# Maps column names to a function that parses that column's value.
-		# Functions take a single arg (the value to parse) and ValueError is
-		# interpreted as None.
-		# Columns missing from this map default to simply using the string value.
-		empty_is_none = lambda v: None if v == "" else v
-		self.column_parsers = {
 			'event_start': lambda v: self.parse_bustime(v),
 			'event_end': lambda v: self.parse_bustime(v, preserve_dash=True),
 			'poster_moment': lambda v: v == '[\u2713]', # check mark
@@ -132,18 +116,6 @@ class SheetsMiddleware():
 		# tracks empty rows on the sheet for us to create new rows in
 		self.unassigned_rows = {}
 
-	def parse_bustime(self, value, preserve_dash=False):
-		"""Convert from HH:MM or HH:MM:SS format to datetime.
-		If preserve_dash=True and value is "--", returns "--"
-		as a sentinel value instead of None. "" will still result in None.
-		"""
-		if not value.strip():
-			return None
-		if value.strip() == "--":
-			return "--" if preserve_dash else None
-		bustime = common.parse_bustime(value)
-		return common.bustime_to_dt(self.bustime_start, bustime)
-
 	def pick_worksheets(self):
 		"""Returns a list of worksheets to check, which may not be the same every time
 		for quota limit reasons."""
@@ -180,7 +152,7 @@ class SheetsMiddleware():
 				# Handle rows without an allocated id
 				if row['id'] is None:
 					# Only assign a row an id if it has a start time and a description
-					if not any(row[col] for col in ["event_start", "description"]):
+					if not self.row_is_non_empty(row):
 						self.unassigned_rows.setdefault(worksheet, []).append(row["index"])
 						continue
 					# If we can't allocate ids, warn and ignore.
@@ -192,22 +164,14 @@ class SheetsMiddleware():
 					logging.info(f"Allocating id for row {worksheet!r}:{row['index']} = {row['id']}")
 					self.write_id(row)
 
-				# Set edit link if marked for editing and start/end set.
-				# This prevents accidents / clicking the wrong row and provides
-				# feedback that sheet sync is still working.
-				# Also clear it if it shouldn't be set.
-				# We do this here instead of in sync_row() because it's Sheets-specific logic
-				# that doesn't depend on the DB event in any way.
-				edit_link = self.edit_url.format(row['id']) if row['marked_for_edit'] == '[+] Marked' else ''
-				if row['edit_link'] != edit_link:
-					logging.info("Updating sheet row {} with edit link {}".format(row['id'], edit_link))
-					self.write_value(row, "edit_link", edit_link)
-					self.mark_modified(row)
-
 				all_rows.append(row)
 		is_full = sorted(worksheets) == list(self.worksheets.keys()):
 		return is_full, all_rows
 
+	def row_is_non_empty(self, row):
+		"""Returns True if row is considered to be non-empty and should have an id assigned."""
+		raise NotImplementedError
+
 	def write_id(self, row):
 		self.client.write_value(
 			self.sheet_id, row["sheet_name"],
@@ -231,22 +195,6 @@ class SheetsMiddleware():
 					value = None
 					row_dict['_parse_errors'].append("Failed to parse column {}: {}".format(column, e))
 			row_dict[column] = value
-		# As a special case, add some implicit tags to the tags column.
-		# We prepend these to make it slightly more consistent for the editor,
-		# ie. it's always DAY, CATEGORY, POSTER_MOMENT, CUSTOM
-		row_dict['tags'] = (
-			[
-				row_dict['category'], # category name
-				worksheet, # sheet name
-			] + (['Poster Moment'] if row_dict['poster_moment'] else [])
-			+ row_dict['tags']
-		)
-		# As a special case, treat an end time of "--" as equal to the start time.
-		if row_dict["event_end"] == "--":
-			row_dict["event_end"] = row_dict["event_start"]
-		# Always include row index and worksheet
-		row_dict["index"] = row_index
-		row_dict["sheet_name"] = worksheet
 		return row_dict
 
 	def write_value(self, row, key, value):
@@ -281,3 +229,92 @@ class SheetsMiddleware():
 		logging.info(f"Assigning existing id {row['id']} to empty row {worksheet!r}:{row['index']}")
 		self.write_id(row)
 		return row
+
+
+class SheetsEventsMiddleware(SheetsMiddleware):
+	column_map = {
+		'event_start': 0,
+		'event_end': 1,
+		'category': 2,
+		'description': 3,
+		'submitter_winner': 4,
+		'poster_moment': 5,
+		'image_links': 6,
+		'marked_for_edit': 7,
+		'notes': 8,
+		'tags': 9,
+		'video_link': 11,
+		'state': 12,
+		'edit_link': 13,
+		'error': 14,
+		'id': 15,
+	}
+
+	def __init__(self, client, sheet_id, worksheets, bustime_start, edit_url, allocate_ids=False):
+		super().__init__(client, sheet_id, worksheets, allocate_ids)
+		self.bustime_start = bustime_start
+		self.edit_url = edit_url
+		self.allocate_ids = allocate_ids
+
+		# column parsers are defined here so they can reference self
+		empty_is_none = lambda v: None if v == "" else v
+		self.column_parsers = {
+			'event_start': lambda v: self.parse_bustime(v),
+			'event_end': lambda v: self.parse_bustime(v, preserve_dash=True),
+			'poster_moment': lambda v: v == '[\u2713]', # check mark
+			'image_links': lambda v: [link.strip() for link in v.split()] if v.strip() else [],
+			'tags': lambda v: [tag.strip() for tag in v.split(',') if tag.strip()],
+			'id': lambda v: v if v.strip() else None,
+			'error': empty_is_none,
+			'video_link': empty_is_none,
+		}
+
+	def parse_bustime(self, value, preserve_dash=False):
+		"""Convert from HH:MM or HH:MM:SS format to datetime.
+		If preserve_dash=True and value is "--", returns "--"
+		as a sentinel value instead of None. "" will still result in None.
+		"""
+		if not value.strip():
+			return None
+		if value.strip() == "--":
+			return "--" if preserve_dash else None
+		bustime = common.parse_bustime(value)
+		return common.bustime_to_dt(self.bustime_start, bustime)
+
+	def row_is_non_empty(self, row):
+		return any(row[col] for col in ["event_start", "description"])
+
+	def parse_row(self, worksheet, row_index, row):
+		row_dict = super().parse_row(worksheet, row_index, row)
+
+		# As a special case, add some implicit tags to the tags column.
+		# We prepend these to make it slightly more consistent for the editor,
+		# ie. it's always DAY, CATEGORY, POSTER_MOMENT, CUSTOM
+		row_dict['tags'] = (
+			[
+				row_dict['category'], # category name
+				worksheet, # sheet name
+			] + (['Poster Moment'] if row_dict['poster_moment'] else [])
+			+ row_dict['tags']
+		)
+
+		# As a special case, treat an end time of "--" as equal to the start time.
+		if row_dict["event_end"] == "--":
+			row_dict["event_end"] = row_dict["event_start"]
+		# Always include row index and worksheet
+		row_dict["index"] = row_index
+		row_dict["sheet_name"] = worksheet
+
+		# Set edit link if marked for editing and start/end set.
+		# This prevents accidents / clicking the wrong row and provides
+		# feedback that sheet sync is still working.
+		# Also clear it if it shouldn't be set.
+		# We do this here instead of in sync_row() because it's Sheets-specific logic
+		# that doesn't depend on the DB event in any way.
+		edit_link = self.edit_url.format(row['id']) if row['marked_for_edit'] == '[+] Marked' else ''
+		if row['edit_link'] != edit_link:
+			logging.info("Updating sheet row {} with edit link {}".format(row['id'], edit_link))
+			self.write_value(row, "edit_link", edit_link)
+			self.mark_modified(row)
+
+		return row_dict