sheetsync: Deal with reverse syncing properly when not all events are in the list of worksheets

This is important because archive events should not be reversed.
We only want to create new rows when the row's intended worksheet is in our list of worksheets we sync.
pull/401/head
Mike Lang 3 months ago committed by Mike Lang
parent 96181fd875
commit eeffeeed10

@ -112,7 +112,7 @@ class SheetSync(object):
db_rows = self.get_db_rows() db_rows = self.get_db_rows()
seen = set() seen = set()
is_full, sheet_rows = self.middleware.get_rows() worksheets, sheet_rows = self.middleware.get_rows()
for row in sheet_rows: for row in sheet_rows:
if row['id'] in seen: if row['id'] in seen:
self.logger.error("Duplicate id {}, skipping".format(row['id'])) self.logger.error("Duplicate id {}, skipping".format(row['id']))
@ -120,16 +120,14 @@ class SheetSync(object):
seen.add(row['id']) seen.add(row['id'])
self.sync_row(row, db_rows.get(row['id'])) self.sync_row(row, db_rows.get(row['id']))
if is_full: # Find rows that were not in the sheet, that were expected to be in that sheet.
# Find rows that were not in the sheet. missing = [
# Only do this if we did a full sync, otherwise things might be missing r for id, r in db_rows.items()
# simply because they're in a worksheet we didn't sync. if id not in seen
missing = [ and self.middleware.row_was_expected(r, worksheets)
r for id, r in db_rows.items() ]
if id not in seen for db_row in missing:
] self.sync_row(None, db_row)
for db_row in missing:
self.sync_row(None, db_row)
except Exception as e: except Exception as e:
# for HTTPErrors, http response body includes the more detailed error # for HTTPErrors, http response body includes the more detailed error

@ -13,12 +13,20 @@ class Middleware:
is still required. is still required.
_parse_errors: A list of error messages encountered when parsing, to be surfaced to the _parse_errors: A list of error messages encountered when parsing, to be surfaced to the
user if possible. user if possible.
In addition to the list of dicts, should return an "is_full" boolean which is True In addition to the list of dicts, should return a list of worksheets fetched from,
if all rows were fetched or False if only some subset was fetched (eg. for quota management reasons). which is then passed to row_was_expected().
Returns (is_full, rows). Returns (worksheets, rows).
""" """
raise NotImplementedError raise NotImplementedError
def row_was_expected(self, db_row, worksheets):
"""Given a database row and list of worksheets from get_rows(), return whether
the given row should have been present in the returned rows, ie. if we expected
to find it on one of those worksheets."""
# Default to the common case, which is that we always return all data
# so the row should always be expected.
return True
def write_value(self, row, key, value): def write_value(self, row, key, value):
"""Write key=value to the given row. Takes the full row object so any identifying info """Write key=value to the given row. Takes the full row object so any identifying info
can be read from it as needed.""" can be read from it as needed."""

@ -103,8 +103,7 @@ class SheetsMiddleware(Middleware):
self.write_id(row) self.write_id(row)
all_rows.append(row) all_rows.append(row)
is_full = sorted(worksheets) == list(self.worksheets.keys()) return worksheets, all_rows
return is_full, all_rows
def row_is_non_empty(self, row): def row_is_non_empty(self, row):
"""Returns True if row is considered to be non-empty and should have an id assigned.""" """Returns True if row is considered to be non-empty and should have an id assigned."""
@ -218,6 +217,11 @@ class SheetsPlaylistsMiddleware(SheetsMiddleware):
"show_in_description": ENCODE_CHECKMARK, "show_in_description": ENCODE_CHECKMARK,
} }
def row_was_expected(self, db_row, worksheets):
# Database does not record a worksheet for playlists, we assume there's only one
# sheet and so it should always be there.
return True
def row_is_non_empty(self, row): def row_is_non_empty(self, row):
return row["tags"] is not None return row["tags"] is not None
@ -284,6 +288,9 @@ class SheetsEventsMiddleware(SheetsMiddleware):
bustime = common.dt_to_bustime(self.bustime_start, value) bustime = common.dt_to_bustime(self.bustime_start, value)
return common.format_bustime(bustime, round="minute") return common.format_bustime(bustime, round="minute")
def row_was_expected(self, db_row, worksheets):
return db_row.sheet_name in worksheets
def row_is_non_empty(self, row): def row_is_non_empty(self, row):
return any(row[col] for col in ["event_start", "description"]) return any(row[col] for col in ["event_start", "description"])

@ -71,7 +71,7 @@ class StreamLogPlaylistsMiddleware(Middleware):
"first_event_id": None, # TODO missing in StreamLog "first_event_id": None, # TODO missing in StreamLog
"last_event_id": None, # TODO missing in StreamLog "last_event_id": None, # TODO missing in StreamLog
}) })
return True, rows return None, rows
# writing intentionally not implemented # writing intentionally not implemented
@ -124,8 +124,8 @@ class StreamLogEventsMiddleware(Middleware):
# Malformed rows can be skipped, represented as a None result # Malformed rows can be skipped, represented as a None result
if row is not None: if row is not None:
all_rows.append(row) all_rows.append(row)
# There's no worksheet concept here so we always return a full sync. # There's no worksheet concept here so just return None for worksheets.
return True, all_rows return None, all_rows
def parse_row(self, row): def parse_row(self, row):
output = {} output = {}

Loading…
Cancel
Save