sheetsync: Deal with reverse syncing properly when not all events are in the list of worksheets

This is important because archive events should not be reversed.
We only want to create new rows when the row's intended worksheet is in our list of worksheets we sync.
pull/401/head
Mike Lang 4 months ago committed by Mike Lang
parent 96181fd875
commit eeffeeed10

@ -112,7 +112,7 @@ class SheetSync(object):
db_rows = self.get_db_rows()
seen = set()
is_full, sheet_rows = self.middleware.get_rows()
worksheets, sheet_rows = self.middleware.get_rows()
for row in sheet_rows:
if row['id'] in seen:
self.logger.error("Duplicate id {}, skipping".format(row['id']))
@ -120,16 +120,14 @@ class SheetSync(object):
seen.add(row['id'])
self.sync_row(row, db_rows.get(row['id']))
if is_full:
# Find rows that were not in the sheet.
# Only do this if we did a full sync, otherwise things might be missing
# simply because they're in a worksheet we didn't sync.
missing = [
r for id, r in db_rows.items()
if id not in seen
]
for db_row in missing:
self.sync_row(None, db_row)
# Find rows that were not in the sheet, that were expected to be in that sheet.
missing = [
r for id, r in db_rows.items()
if id not in seen
and self.middleware.row_was_expected(r, worksheets)
]
for db_row in missing:
self.sync_row(None, db_row)
except Exception as e:
# for HTTPErrors, http response body includes the more detailed error

@ -13,12 +13,20 @@ class Middleware:
is still required.
_parse_errors: A list of error messages encountered when parsing, to be surfaced to the
user if possible.
In addition to the list of dicts, should return an "is_full" boolean which is True
if all rows were fetched or False if only some subset was fetched (eg. for quota management reasons).
Returns (is_full, rows).
In addition to the list of dicts, should return a list of worksheets fetched from,
which is then passed to row_was_expected().
Returns (worksheets, rows).
"""
raise NotImplementedError
def row_was_expected(self, db_row, worksheets):
"""Given a database row and list of worksheets from get_rows(), return whether
the given row should have been present in the returned rows, ie. if we expected
to find it on one of those worksheets."""
# Default to the common case, which is that we always return all data
# so the row should always be expected.
return True
def write_value(self, row, key, value):
"""Write key=value to the given row. Takes the full row object so any identifying info
can be read from it as needed."""

@ -103,8 +103,7 @@ class SheetsMiddleware(Middleware):
self.write_id(row)
all_rows.append(row)
is_full = sorted(worksheets) == list(self.worksheets.keys())
return is_full, all_rows
return worksheets, all_rows
def row_is_non_empty(self, row):
"""Returns True if row is considered to be non-empty and should have an id assigned."""
@ -218,6 +217,11 @@ class SheetsPlaylistsMiddleware(SheetsMiddleware):
"show_in_description": ENCODE_CHECKMARK,
}
def row_was_expected(self, db_row, worksheets):
# Database does not record a worksheet for playlists, we assume there's only one
# sheet and so it should always be there.
return True
def row_is_non_empty(self, row):
return row["tags"] is not None
@ -284,6 +288,9 @@ class SheetsEventsMiddleware(SheetsMiddleware):
bustime = common.dt_to_bustime(self.bustime_start, value)
return common.format_bustime(bustime, round="minute")
def row_was_expected(self, db_row, worksheets):
return db_row.sheet_name in worksheets
def row_is_non_empty(self, row):
return any(row[col] for col in ["event_start", "description"])

@ -71,7 +71,7 @@ class StreamLogPlaylistsMiddleware(Middleware):
"first_event_id": None, # TODO missing in StreamLog
"last_event_id": None, # TODO missing in StreamLog
})
return True, rows
return None, rows
# writing intentionally not implemented
@ -124,8 +124,8 @@ class StreamLogEventsMiddleware(Middleware):
# Malformed rows can be skipped, represented as a None result
if row is not None:
all_rows.append(row)
# There's no worksheet concept here so we always return a full sync.
return True, all_rows
# There's no worksheet concept here so just return None for worksheets.
return None, all_rows
def parse_row(self, row):
output = {}

Loading…
Cancel
Save