sheetsync: Deal with reverse syncing properly when not all events are in the list of worksheets

This is important because archive events should not be reversed. We only want to create new rows when the row's intended worksheet is in our list of worksheets we sync.
11 months ago · eeffeeed10
parent 96181fd875
commit eeffeeed10
4 changed files with 32 additions and 19 deletions
--- a/sheetsync/sheetsync/main.py
+++ b/sheetsync/sheetsync/main.py
@ -112,7 +112,7 @@ class SheetSync(object):
 				db_rows = self.get_db_rows()
 				seen = set()
-				is_full, sheet_rows = self.middleware.get_rows()
+				worksheets, sheet_rows = self.middleware.get_rows()
 				for row in sheet_rows:
 					if row['id'] in seen:
 						self.logger.error("Duplicate id {}, skipping".format(row['id']))
@ -120,16 +120,14 @@ class SheetSync(object):
 					seen.add(row['id'])
 					self.sync_row(row, db_rows.get(row['id']))
-				if is_full:
+				# Find rows that were not in the sheet, that were expected to be in that sheet.
-					# Find rows that were not in the sheet.
+				missing = [
-					# Only do this if we did a full sync, otherwise things might be missing
+					r for id, r in db_rows.items()
-					# simply because they're in a worksheet we didn't sync.
+					if id not in seen
-					missing = [
+					and self.middleware.row_was_expected(r, worksheets)
-						r for id, r in db_rows.items()
+				]
-						if id not in seen
+				for db_row in missing:
-					]
+					self.sync_row(None, db_row)
 					for db_row in missing:
 						self.sync_row(None, db_row)
 			except Exception as e:
 				# for HTTPErrors, http response body includes the more detailed error
--- a/sheetsync/sheetsync/middleware.py
+++ b/sheetsync/sheetsync/middleware.py
@ -13,12 +13,20 @@ class Middleware:
 				is still required.
 			_parse_errors: A list of error messages encountered when parsing, to be surfaced to the
 				user if possible.
-		In addition to the list of dicts, should return an "is_full" boolean which is True
+		In addition to the list of dicts, should return a list of worksheets fetched from,
-		if all rows were fetched or False if only some subset was fetched (eg. for quota management reasons).
+		which is then passed to row_was_expected().
-		Returns (is_full, rows).
+		Returns (worksheets, rows).
 		"""
 		raise NotImplementedError
 	def row_was_expected(self, db_row, worksheets):
 		"""Given a database row and list of worksheets from get_rows(), return whether
 		the given row should have been present in the returned rows, ie. if we expected
 		to find it on one of those worksheets."""
 		# Default to the common case, which is that we always return all data
 		# so the row should always be expected.
 		return True
 	def write_value(self, row, key, value):
 		"""Write key=value to the given row. Takes the full row object so any identifying info
 		can be read from it as needed."""
--- a/sheetsync/sheetsync/sheets.py
+++ b/sheetsync/sheetsync/sheets.py
@ -103,8 +103,7 @@ class SheetsMiddleware(Middleware):
 					self.write_id(row)
 				all_rows.append(row)
-		is_full = sorted(worksheets) == list(self.worksheets.keys())
+		return worksheets, all_rows
 		return is_full, all_rows
 	def row_is_non_empty(self, row):
 		"""Returns True if row is considered to be non-empty and should have an id assigned."""
@ -218,6 +217,11 @@ class SheetsPlaylistsMiddleware(SheetsMiddleware):
 		"show_in_description": ENCODE_CHECKMARK,
 	}
 	def row_was_expected(self, db_row, worksheets):
 		# Database does not record a worksheet for playlists, we assume there's only one
 		# sheet and so it should always be there.
 		return True
 	def row_is_non_empty(self, row):
 		return row["tags"] is not None
@ -284,6 +288,9 @@ class SheetsEventsMiddleware(SheetsMiddleware):
 		bustime = common.dt_to_bustime(self.bustime_start, value)
 		return common.format_bustime(bustime, round="minute")
 	def row_was_expected(self, db_row, worksheets):
 		return db_row.sheet_name in worksheets
 	def row_is_non_empty(self, row):
 		return any(row[col] for col in ["event_start", "description"])
--- a/sheetsync/sheetsync/streamlog.py
+++ b/sheetsync/sheetsync/streamlog.py
@ -71,7 +71,7 @@ class StreamLogPlaylistsMiddleware(Middleware):
 				"first_event_id": None, # TODO missing in StreamLog
 				"last_event_id": None, # TODO missing in StreamLog
 			})
-		return True, rows
+		return None, rows
 	# writing intentionally not implemented
@ -124,8 +124,8 @@ class StreamLogEventsMiddleware(Middleware):
 			# Malformed rows can be skipped, represented as a None result
 			if row is not None:
 				all_rows.append(row)
-		# There's no worksheet concept here so we always return a full sync.
+		# There's no worksheet concept here so just return None for worksheets.
-		return True, all_rows
+		return None, all_rows
 	def parse_row(self, row):
 		output = {}