sheetsync: Only consider a row missing if we did a full sync

This prevents things being spuriously missing because they were in a worksheet we didn't sync.
pull/401/head
Mike Lang 4 months ago committed by Mike Lang
parent 3cdd8f22ad
commit bebce3df4c

@ -112,15 +112,24 @@ class SheetSync(object):
db_rows = self.get_db_rows()
seen = set()
for row in self.middleware.get_rows():
is_full, sheet_rows = self.middleware.get_rows()
for row in sheet_rows:
if row['id'] in seen:
self.logger.error("Duplicate id {}, skipping".format(row['id']))
continue
seen.add(row['id'])
self.sync_row(row, db_rows.get(row['id']))
for db_row in [r for id, r in db_rows.items() if id not in seen]:
self.sync_row(None, db_row)
if is_full:
# Find rows that were not in the sheet.
# Only do this if we did a full sync, otherwise things might be missing
# simply because they're in a worksheet we didn't sync.
missing = [
r for id, r in db_rows.items()
if id not in seen
]
for db_row in missing:
self.sync_row(None, db_row)
except Exception as e:
# for HTTPErrors, http response body includes the more detailed error

@ -149,7 +149,7 @@ class SheetsMiddleware():
for quota limit reasons."""
if self.sync_count % self.SYNCS_PER_INACTIVE_CHECK == 0:
# check all worksheets
worksheets = self.worksheets
worksheets = list(self.worksheets.keys())
else:
# only check most recently changed worksheets
worksheets = sorted(
@ -160,10 +160,14 @@ class SheetsMiddleware():
return worksheets
def get_rows(self):
"""Fetch all rows of worksheet, parsed into a list of dicts."""
"""Fetch all rows of worksheet, parsed into a list of dicts.
Return (is_full, all rows).
"""
# Clear previously seen unassigned rows
self.unassigned_rows = {}
for worksheet in self.pick_worksheets():
worksheets = self.pick_worksheets()
all_rows = []
for worksheet in worksheets:
rows = self.client.get_rows(self.sheet_id, worksheet)
for row_index, row in enumerate(rows):
# Skip first row (ie. the column titles).
@ -200,7 +204,9 @@ class SheetsMiddleware():
self.write_value(row, "edit_link", edit_link)
self.mark_modified(row)
yield row
all_rows.append(row)
is_full = sorted(worksheets) == list(self.worksheets.keys()):
return is_full, all_rows
def write_id(self, row):
self.client.write_value(

@ -83,11 +83,14 @@ class StreamLogMiddleware:
}
def get_rows(self):
all_rows = []
for row in self.client.get_rows()["event_log"]:
row = self.parse_row(row)
# Malformed rows can be skipped, represented as a None result
if row is not None:
yield row
all_rows.append(row)
# There's no worksheet concept here so we always return a full sync.
return True, all_rows
def parse_row(self, row):
output = {}

Loading…
Cancel
Save