diff --git a/sheetsync/sheetsync/main.py b/sheetsync/sheetsync/main.py index be9ae7d..e898872 100644 --- a/sheetsync/sheetsync/main.py +++ b/sheetsync/sheetsync/main.py @@ -112,15 +112,24 @@ class SheetSync(object): db_rows = self.get_db_rows() seen = set() - for row in self.middleware.get_rows(): + is_full, sheet_rows = self.middleware.get_rows() + for row in sheet_rows: if row['id'] in seen: self.logger.error("Duplicate id {}, skipping".format(row['id'])) continue seen.add(row['id']) self.sync_row(row, db_rows.get(row['id'])) - for db_row in [r for id, r in db_rows.items() if id not in seen]: - self.sync_row(None, db_row) + if is_full: + # Find rows that were not in the sheet. + # Only do this if we did a full sync, otherwise things might be missing + # simply because they're in a worksheet we didn't sync. + missing = [ + r for id, r in db_rows.items() + if id not in seen + ] + for db_row in missing: + self.sync_row(None, db_row) except Exception as e: # for HTTPErrors, http response body includes the more detailed error diff --git a/sheetsync/sheetsync/sheets.py b/sheetsync/sheetsync/sheets.py index 73cf25c..712b05d 100644 --- a/sheetsync/sheetsync/sheets.py +++ b/sheetsync/sheetsync/sheets.py @@ -149,7 +149,7 @@ class SheetsMiddleware(): for quota limit reasons.""" if self.sync_count % self.SYNCS_PER_INACTIVE_CHECK == 0: # check all worksheets - worksheets = self.worksheets + worksheets = list(self.worksheets.keys()) else: # only check most recently changed worksheets worksheets = sorted( @@ -160,10 +160,14 @@ class SheetsMiddleware(): return worksheets def get_rows(self): - """Fetch all rows of worksheet, parsed into a list of dicts.""" + """Fetch all rows of worksheet, parsed into a list of dicts. + Return (is_full, all rows). + """ # Clear previously seen unassigned rows self.unassigned_rows = {} - for worksheet in self.pick_worksheets(): + worksheets = self.pick_worksheets() + all_rows = [] + for worksheet in worksheets: rows = self.client.get_rows(self.sheet_id, worksheet) for row_index, row in enumerate(rows): # Skip first row (ie. the column titles). @@ -200,7 +204,9 @@ class SheetsMiddleware(): self.write_value(row, "edit_link", edit_link) self.mark_modified(row) - yield row + all_rows.append(row) + is_full = sorted(worksheets) == list(self.worksheets.keys()): + return is_full, all_rows def write_id(self, row): self.client.write_value( diff --git a/sheetsync/sheetsync/streamlog.py b/sheetsync/sheetsync/streamlog.py index 264303f..6ac0122 100644 --- a/sheetsync/sheetsync/streamlog.py +++ b/sheetsync/sheetsync/streamlog.py @@ -83,11 +83,14 @@ class StreamLogMiddleware: } def get_rows(self): + all_rows = [] for row in self.client.get_rows()["event_log"]: row = self.parse_row(row) # Malformed rows can be skipped, represented as a None result if row is not None: - yield row + all_rows.append(row) + # There's no worksheet concept here so we always return a full sync. + return True, all_rows def parse_row(self, row): output = {}