diff --git a/sheetsync/sheetsync/main.py b/sheetsync/sheetsync/main.py index 0d4b889..ed5a80d 100644 --- a/sheetsync/sheetsync/main.py +++ b/sheetsync/sheetsync/main.py @@ -112,7 +112,7 @@ class SheetSync(object): db_rows = self.get_db_rows() seen = set() - is_full, sheet_rows = self.middleware.get_rows() + worksheets, sheet_rows = self.middleware.get_rows() for row in sheet_rows: if row['id'] in seen: self.logger.error("Duplicate id {}, skipping".format(row['id'])) @@ -120,16 +120,14 @@ class SheetSync(object): seen.add(row['id']) self.sync_row(row, db_rows.get(row['id'])) - if is_full: - # Find rows that were not in the sheet. - # Only do this if we did a full sync, otherwise things might be missing - # simply because they're in a worksheet we didn't sync. - missing = [ - r for id, r in db_rows.items() - if id not in seen - ] - for db_row in missing: - self.sync_row(None, db_row) + # Find rows that were not in the sheet, that were expected to be in that sheet. + missing = [ + r for id, r in db_rows.items() + if id not in seen + and self.middleware.row_was_expected(r, worksheets) + ] + for db_row in missing: + self.sync_row(None, db_row) except Exception as e: # for HTTPErrors, http response body includes the more detailed error diff --git a/sheetsync/sheetsync/middleware.py b/sheetsync/sheetsync/middleware.py index bc65303..4797841 100644 --- a/sheetsync/sheetsync/middleware.py +++ b/sheetsync/sheetsync/middleware.py @@ -13,12 +13,20 @@ class Middleware: is still required. _parse_errors: A list of error messages encountered when parsing, to be surfaced to the user if possible. - In addition to the list of dicts, should return an "is_full" boolean which is True - if all rows were fetched or False if only some subset was fetched (eg. for quota management reasons). - Returns (is_full, rows). + In addition to the list of dicts, should return a list of worksheets fetched from, + which is then passed to row_was_expected(). + Returns (worksheets, rows). """ raise NotImplementedError + def row_was_expected(self, db_row, worksheets): + """Given a database row and list of worksheets from get_rows(), return whether + the given row should have been present in the returned rows, ie. if we expected + to find it on one of those worksheets.""" + # Default to the common case, which is that we always return all data + # so the row should always be expected. + return True + def write_value(self, row, key, value): """Write key=value to the given row. Takes the full row object so any identifying info can be read from it as needed.""" diff --git a/sheetsync/sheetsync/sheets.py b/sheetsync/sheetsync/sheets.py index 4291b73..ef8733e 100644 --- a/sheetsync/sheetsync/sheets.py +++ b/sheetsync/sheetsync/sheets.py @@ -103,8 +103,7 @@ class SheetsMiddleware(Middleware): self.write_id(row) all_rows.append(row) - is_full = sorted(worksheets) == list(self.worksheets.keys()) - return is_full, all_rows + return worksheets, all_rows def row_is_non_empty(self, row): """Returns True if row is considered to be non-empty and should have an id assigned.""" @@ -218,6 +217,11 @@ class SheetsPlaylistsMiddleware(SheetsMiddleware): "show_in_description": ENCODE_CHECKMARK, } + def row_was_expected(self, db_row, worksheets): + # Database does not record a worksheet for playlists, we assume there's only one + # sheet and so it should always be there. + return True + def row_is_non_empty(self, row): return row["tags"] is not None @@ -284,6 +288,9 @@ class SheetsEventsMiddleware(SheetsMiddleware): bustime = common.dt_to_bustime(self.bustime_start, value) return common.format_bustime(bustime, round="minute") + def row_was_expected(self, db_row, worksheets): + return db_row.sheet_name in worksheets + def row_is_non_empty(self, row): return any(row[col] for col in ["event_start", "description"]) diff --git a/sheetsync/sheetsync/streamlog.py b/sheetsync/sheetsync/streamlog.py index 9db2eb7..928a8f6 100644 --- a/sheetsync/sheetsync/streamlog.py +++ b/sheetsync/sheetsync/streamlog.py @@ -71,7 +71,7 @@ class StreamLogPlaylistsMiddleware(Middleware): "first_event_id": None, # TODO missing in StreamLog "last_event_id": None, # TODO missing in StreamLog }) - return True, rows + return None, rows # writing intentionally not implemented @@ -124,8 +124,8 @@ class StreamLogEventsMiddleware(Middleware): # Malformed rows can be skipped, represented as a None result if row is not None: all_rows.append(row) - # There's no worksheet concept here so we always return a full sync. - return True, all_rows + # There's no worksheet concept here so just return None for worksheets. + return None, all_rows def parse_row(self, row): output = {}