sheetsync: Deal with reverse syncing properly when not all events are in the list of worksheets

This is important because archive events should not be reversed. We only want to create new rows when the row's intended worksheet is in our list of worksheets we sync.
1 year ago · eeffeeed10
parent 96181fd875
commit eeffeeed10
4 changed files with 32 additions and 19 deletions
--- a/sheetsync/sheetsync/main.py
+++ b/sheetsync/sheetsync/main.py
@ -112,7 +112,7 @@ class SheetSync(object):
 				db_rows = self.get_db_rows()
 				seen = set()

-				is_full, sheet_rows = self.middleware.get_rows()
+				worksheets, sheet_rows = self.middleware.get_rows()
 				for row in sheet_rows:
 					if row['id'] in seen:
 						self.logger.error("Duplicate id {}, skipping".format(row['id']))
@ -120,16 +120,14 @@ class SheetSync(object):
 					seen.add(row['id'])
 					self.sync_row(row, db_rows.get(row['id']))

-				if is_full:
-					# Find rows that were not in the sheet.
-					# Only do this if we did a full sync, otherwise things might be missing
-					# simply because they're in a worksheet we didn't sync.
-					missing = [
-						r for id, r in db_rows.items()
-						if id not in seen
-					]
-					for db_row in missing:
-						self.sync_row(None, db_row)
+				# Find rows that were not in the sheet, that were expected to be in that sheet.
+				missing = [
+					r for id, r in db_rows.items()
+					if id not in seen
+					and self.middleware.row_was_expected(r, worksheets)
+				]
+				for db_row in missing:
+					self.sync_row(None, db_row)

 			except Exception as e:
 				# for HTTPErrors, http response body includes the more detailed error
--- a/sheetsync/sheetsync/middleware.py
+++ b/sheetsync/sheetsync/middleware.py
@ -13,12 +13,20 @@ class Middleware:
 				is still required.
 			_parse_errors: A list of error messages encountered when parsing, to be surfaced to the
 				user if possible.
-		In addition to the list of dicts, should return an "is_full" boolean which is True
-		if all rows were fetched or False if only some subset was fetched (eg. for quota management reasons).
-		Returns (is_full, rows).
+		In addition to the list of dicts, should return a list of worksheets fetched from,
+		which is then passed to row_was_expected().
+		Returns (worksheets, rows).
 		"""
 		raise NotImplementedError

+	def row_was_expected(self, db_row, worksheets):
+		"""Given a database row and list of worksheets from get_rows(), return whether
+		the given row should have been present in the returned rows, ie. if we expected
+		to find it on one of those worksheets."""
+		# Default to the common case, which is that we always return all data
+		# so the row should always be expected.
+		return True
+
 	def write_value(self, row, key, value):
 		"""Write key=value to the given row. Takes the full row object so any identifying info
 		can be read from it as needed."""
--- a/sheetsync/sheetsync/sheets.py
+++ b/sheetsync/sheetsync/sheets.py
@ -103,8 +103,7 @@ class SheetsMiddleware(Middleware):
 					self.write_id(row)

 				all_rows.append(row)
-		is_full = sorted(worksheets) == list(self.worksheets.keys())
-		return is_full, all_rows
+		return worksheets, all_rows

 	def row_is_non_empty(self, row):
 		"""Returns True if row is considered to be non-empty and should have an id assigned."""
@ -218,6 +217,11 @@ class SheetsPlaylistsMiddleware(SheetsMiddleware):
 		"show_in_description": ENCODE_CHECKMARK,
 	}

+	def row_was_expected(self, db_row, worksheets):
+		# Database does not record a worksheet for playlists, we assume there's only one
+		# sheet and so it should always be there.
+		return True
+
 	def row_is_non_empty(self, row):
 		return row["tags"] is not None

@ -284,6 +288,9 @@ class SheetsEventsMiddleware(SheetsMiddleware):
 		bustime = common.dt_to_bustime(self.bustime_start, value)
 		return common.format_bustime(bustime, round="minute")

+	def row_was_expected(self, db_row, worksheets):
+		return db_row.sheet_name in worksheets
+
 	def row_is_non_empty(self, row):
 		return any(row[col] for col in ["event_start", "description"])

--- a/sheetsync/sheetsync/streamlog.py
+++ b/sheetsync/sheetsync/streamlog.py
@ -71,7 +71,7 @@ class StreamLogPlaylistsMiddleware(Middleware):
 				"first_event_id": None, # TODO missing in StreamLog
 				"last_event_id": None, # TODO missing in StreamLog
 			})
-		return True, rows
+		return None, rows

 	# writing intentionally not implemented

@ -124,8 +124,8 @@ class StreamLogEventsMiddleware(Middleware):
 			# Malformed rows can be skipped, represented as a None result
 			if row is not None:
 				all_rows.append(row)
-		# There's no worksheet concept here so we always return a full sync.
-		return True, all_rows
+		# There's no worksheet concept here so just return None for worksheets.
+		return None, all_rows

 	def parse_row(self, row):
 		output = {}