sheetsync: Only consider a row missing if we did a full sync

This prevents things being spuriously missing because they were in a worksheet we didn't sync.
pull/401/head
Mike Lang 3 months ago committed by Mike Lang
parent 3cdd8f22ad
commit bebce3df4c

@ -112,14 +112,23 @@ class SheetSync(object):
db_rows = self.get_db_rows() db_rows = self.get_db_rows()
seen = set() seen = set()
for row in self.middleware.get_rows(): is_full, sheet_rows = self.middleware.get_rows()
for row in sheet_rows:
if row['id'] in seen: if row['id'] in seen:
self.logger.error("Duplicate id {}, skipping".format(row['id'])) self.logger.error("Duplicate id {}, skipping".format(row['id']))
continue continue
seen.add(row['id']) seen.add(row['id'])
self.sync_row(row, db_rows.get(row['id'])) self.sync_row(row, db_rows.get(row['id']))
for db_row in [r for id, r in db_rows.items() if id not in seen]: if is_full:
# Find rows that were not in the sheet.
# Only do this if we did a full sync, otherwise things might be missing
# simply because they're in a worksheet we didn't sync.
missing = [
r for id, r in db_rows.items()
if id not in seen
]
for db_row in missing:
self.sync_row(None, db_row) self.sync_row(None, db_row)
except Exception as e: except Exception as e:

@ -149,7 +149,7 @@ class SheetsMiddleware():
for quota limit reasons.""" for quota limit reasons."""
if self.sync_count % self.SYNCS_PER_INACTIVE_CHECK == 0: if self.sync_count % self.SYNCS_PER_INACTIVE_CHECK == 0:
# check all worksheets # check all worksheets
worksheets = self.worksheets worksheets = list(self.worksheets.keys())
else: else:
# only check most recently changed worksheets # only check most recently changed worksheets
worksheets = sorted( worksheets = sorted(
@ -160,10 +160,14 @@ class SheetsMiddleware():
return worksheets return worksheets
def get_rows(self): def get_rows(self):
"""Fetch all rows of worksheet, parsed into a list of dicts.""" """Fetch all rows of worksheet, parsed into a list of dicts.
Return (is_full, all rows).
"""
# Clear previously seen unassigned rows # Clear previously seen unassigned rows
self.unassigned_rows = {} self.unassigned_rows = {}
for worksheet in self.pick_worksheets(): worksheets = self.pick_worksheets()
all_rows = []
for worksheet in worksheets:
rows = self.client.get_rows(self.sheet_id, worksheet) rows = self.client.get_rows(self.sheet_id, worksheet)
for row_index, row in enumerate(rows): for row_index, row in enumerate(rows):
# Skip first row (ie. the column titles). # Skip first row (ie. the column titles).
@ -200,7 +204,9 @@ class SheetsMiddleware():
self.write_value(row, "edit_link", edit_link) self.write_value(row, "edit_link", edit_link)
self.mark_modified(row) self.mark_modified(row)
yield row all_rows.append(row)
is_full = sorted(worksheets) == list(self.worksheets.keys()):
return is_full, all_rows
def write_id(self, row): def write_id(self, row):
self.client.write_value( self.client.write_value(

@ -83,11 +83,14 @@ class StreamLogMiddleware:
} }
def get_rows(self): def get_rows(self):
all_rows = []
for row in self.client.get_rows()["event_log"]: for row in self.client.get_rows()["event_log"]:
row = self.parse_row(row) row = self.parse_row(row)
# Malformed rows can be skipped, represented as a None result # Malformed rows can be skipped, represented as a None result
if row is not None: if row is not None:
yield row all_rows.append(row)
# There's no worksheet concept here so we always return a full sync.
return True, all_rows
def parse_row(self, row): def parse_row(self, row):
output = {} output = {}

Loading…
Cancel
Save