sheetsync: Only consider a row missing if we did a full sync

This prevents things being spuriously missing because they were in a worksheet we didn't sync.
1 year ago · bebce3df4c
parent 3cdd8f22ad
commit bebce3df4c
3 changed files with 26 additions and 8 deletions
--- a/sheetsync/sheetsync/main.py
+++ b/sheetsync/sheetsync/main.py
@ -112,14 +112,23 @@ class SheetSync(object):
 				db_rows = self.get_db_rows()
 				seen = set()
-				for row in self.middleware.get_rows():
+				is_full, sheet_rows = self.middleware.get_rows()
 				for row in sheet_rows:
 					if row['id'] in seen:
 						self.logger.error("Duplicate id {}, skipping".format(row['id']))
 						continue
 					seen.add(row['id'])
 					self.sync_row(row, db_rows.get(row['id']))
-				for db_row in [r for id, r in db_rows.items() if id not in seen]:
+				if is_full:
 					# Find rows that were not in the sheet.
 					# Only do this if we did a full sync, otherwise things might be missing
 					# simply because they're in a worksheet we didn't sync.
 					missing = [
 						r for id, r in db_rows.items()
 						if id not in seen
 					]
 					for db_row in missing:
 						self.sync_row(None, db_row)
 			except Exception as e:
--- a/sheetsync/sheetsync/sheets.py
+++ b/sheetsync/sheetsync/sheets.py
@ -149,7 +149,7 @@ class SheetsMiddleware():
 		for quota limit reasons."""
 		if self.sync_count % self.SYNCS_PER_INACTIVE_CHECK == 0:
 			# check all worksheets
-			worksheets = self.worksheets
+			worksheets = list(self.worksheets.keys())
 		else:
 			# only check most recently changed worksheets
 			worksheets = sorted(
@ -160,10 +160,14 @@ class SheetsMiddleware():
 		return worksheets
 	def get_rows(self):
-		"""Fetch all rows of worksheet, parsed into a list of dicts."""
+		"""Fetch all rows of worksheet, parsed into a list of dicts.
 		Return (is_full, all rows).
 		"""
 		# Clear previously seen unassigned rows
 		self.unassigned_rows = {}
-		for worksheet in self.pick_worksheets():
+		worksheets = self.pick_worksheets()
 		all_rows = []
 		for worksheet in worksheets:
 			rows = self.client.get_rows(self.sheet_id, worksheet)
 			for row_index, row in enumerate(rows):
 				# Skip first row (ie. the column titles).
@ -200,7 +204,9 @@ class SheetsMiddleware():
 					self.write_value(row, "edit_link", edit_link)
 					self.mark_modified(row)
-				yield row
+				all_rows.append(row)
 		is_full = sorted(worksheets) == list(self.worksheets.keys()):
 		return is_full, all_rows
 	def write_id(self, row):
 		self.client.write_value(
--- a/sheetsync/sheetsync/streamlog.py
+++ b/sheetsync/sheetsync/streamlog.py
@ -83,11 +83,14 @@ class StreamLogMiddleware:
 		}
 	def get_rows(self):
 		all_rows = []
 		for row in self.client.get_rows()["event_log"]:
 			row = self.parse_row(row)
 			# Malformed rows can be skipped, represented as a None result
 			if row is not None:
-				yield row
+				all_rows.append(row)
 		# There's no worksheet concept here so we always return a full sync.
 		return True, all_rows
 	def parse_row(self, row):
 		output = {}