Implement reverse sync mode

This is a mode where all data flows one-way from the database to the sheet.
It is intended to be used to populate an empty sheet from database events,
possibly sourced from somewhere else.

To make this work, a few changes were required:
* Track which ids we've seen so we know what events were not matched with a row
* Allow `row` to be None in sync_rows
* When it is, call the middleware to create a new row with a new id
* In sheets, this is implemented by tracking the last empty rows we saw, and claiming them as needed.
pull/401/head
Mike Lang 1 year ago committed by Mike Lang
parent 85de9757f7
commit 74869de89d

@ -69,10 +69,11 @@ class SheetSync(object):
# Time to wait after getting an error
ERROR_RETRY_INTERVAL = 10
def __init__(self, middleware, stop, dbmanager):
def __init__(self, middleware, stop, dbmanager, reverse_sync=False):
self.middleware = middleware
self.stop = stop
self.dbmanager = dbmanager
self.create_missing_ids = False
# List of input columns
self.input_columns = [
'event_start',
@ -91,6 +92,14 @@ class SheetSync(object):
'state',
'error',
]
if reverse_sync:
# Reverse Sync refers to copying all event data from the database into the sheet,
# instead of it (mostly) being the other way. In particular:
# - All columns become output columns
# - We are allowed to create new sheet rows for database events if they don't exist.
self.create_missing_ids = True
self.output_columns += self.input_columns
self.input_columns = []
def run(self):
self.conn = self.dbmanager.get_conn()
@ -103,10 +112,18 @@ class SheetSync(object):
# each row is more expensive than the cost of just grabbing the entire table
# and comparing locally.
events = self.get_events()
seen = set()
for row in self.middleware.get_rows():
if row['id'] in seen:
logging.error("Duplicate id {}, skipping".format(row['id']))
continue
seen.add(row['id'])
self.sync_row(row, events.get(row['id']))
for event in [e for id, e in events.items() if id not in seen]:
self.sync_row(event["sheet_name"], None, event)
except Exception as e:
# for HTTPErrors, http response body includes the more detailed error
detail = ''
@ -151,11 +168,14 @@ class SheetSync(object):
return by_id
def sync_row(self, row, event):
"""Take a row dict and an Event from the database (or None if id not found)
and take whatever action is required to sync them, ie. writing to the database or sheet."""
worksheet = row["sheet_name"]
"""Take a row dict (or None) and an Event from the database (or None)
and take whatever action is required to sync them, ie. writing to the database or sheet.
At least one of row and event must be non-None.
"""
if event is None:
assert row
worksheet = row["sheet_name"]
# No event currently in DB, create it.
logging.info("Inserting new event {}".format(row['id']))
# Insertion conflict just means that another sheet sync beat us to the insert.
@ -175,6 +195,15 @@ class SheetSync(object):
self.middleware.mark_modified(worksheet)
return
if row is None:
assert event
if not self.create_missing_ids:
logging.info("Skipping event {} without any matching row".format(event["id"]))
return
logging.info("Adding new row {}".format(event["id"]))
row = self.middleware.create_row(event["sheet_name"], event["id"])
worksheet = row["sheet_name"]
rows_found.labels(worksheet).inc()
# If no database error, but we have parse errors, indicate they should be displayed.
@ -188,7 +217,7 @@ class SheetSync(object):
event = event._replace(state='UNLISTED')
# Update database with any changed inputs
changed = [col for col in self.input_columns if row[col] != getattr(event, col)]
changed = [col for col in self.input_columns if row.get(col) != getattr(event, col)]
if changed:
logging.info("Updating event {} with new value(s) for {}".format(
row['id'], ', '.join(changed)
@ -208,7 +237,7 @@ class SheetSync(object):
# Update sheet with any changed outputs
format_output = lambda v: '' if v is None else v # cast nulls to empty string
changed = [col for col in self.output_columns if row[col] != format_output(getattr(event, col))]
changed = [col for col in self.output_columns if row.get(col) != format_output(getattr(event, col))]
if changed:
logging.info("Updating sheet row {} with new value(s) for {}".format(
row['id'], ', '.join(changed)
@ -324,12 +353,15 @@ class PlaylistSync:
@argh.arg('--playlist-worksheet', help=
"An optional additional worksheet name that holds playlist tag definitions",
)
def main(dbconnect, sheets_creds_file, edit_url, bustime_start, sheet_id, worksheet_names, metrics_port=8005, backdoor_port=0, allocate_ids=False, playlist_worksheet=None):
@argh.arg('--reverse-sync', help=
"Enables an alternate mode where all data is synced from the database to the sheet",
)
def main(dbconnect, sheets_creds_file, edit_url, bustime_start, sheet_id, worksheet_names, metrics_port=8005, backdoor_port=0, allocate_ids=False, playlist_worksheet=None, reverse_sync=False):
"""
Sheet sync constantly scans a Google Sheets sheet and a database, copying inputs from the sheet
to the DB and outputs from the DB to the sheet.
With the exception of id allocation, all operations are idempotent and multiple sheet syncs
With the exception of id allocation or reverse sync mode, all operations are idempotent and multiple sheet syncs
may be run for redundancy.
"""
common.PromLogCountsHandler.install()
@ -369,7 +401,7 @@ def main(dbconnect, sheets_creds_file, edit_url, bustime_start, sheet_id, worksh
sheets_middleware = SheetsMiddleware(sheets_client, sheet_id, worksheet_names, bustime_start, edit_url, allocate_ids)
workers = [
SheetSync(sheets_middleware, stop, dbmanager),
SheetSync(sheets_middleware, stop, dbmanager, reverse_sync),
]
if playlist_worksheet:
workers.append(PlaylistSync(stop, dbmanager, sheets_client, sheet_id, playlist_worksheet))

@ -155,6 +155,8 @@ class SheetsMiddleware():
def get_rows(self):
"""Fetch all rows of worksheet, parsed into a list of dicts."""
# Clear previously seen unassigned rows
self.unassigned_rows = {}
for worksheet in self.pick_worksheets():
rows = self.sheets.get_rows(self.sheet_id, worksheet)
for row_index, row in enumerate(rows):
@ -167,10 +169,11 @@ class SheetsMiddleware():
# Handle rows without an allocated id
if row['id'] is None:
# If a row is all empty (including no id), ignore it.
# If a row is all empty (including no id), ignore it and mark it down for possible use in create_row().
# Ignore the tags column for this check since it is never non-empty due to implicit tags
# (and even if there's other tags, we don't care if there's nothing else in the row).
if not any(row[col] for col in self.input_columns if col != 'tags'):
self.unassigned_rows.setdefault(worksheet, []).append(row["index"])
continue
# If we can't allocate ids, warn and ignore.
if not self.allocate_ids:
@ -179,11 +182,7 @@ class SheetsMiddleware():
# Otherwise, allocate id for a new row.
row['id'] = str(uuid.uuid4())
logging.info(f"Allocating id for row {worksheet!r}:{row['index']} = {row['id']}")
self.sheets.write_value(
self.sheet_id, worksheet,
row["index"], self.column_map['id'],
str(row['id']),
)
self.write_id(row)
# Set edit link if marked for editing and start/end set.
# This prevents accidents / clicking the wrong row and provides
@ -199,6 +198,13 @@ class SheetsMiddleware():
yield row
def write_id(self, row):
self.sheets.write_value(
self.sheet_id, row["sheet_name"],
row["index"], self.column_map['id'],
str(row['id']),
)
def parse_row(self, worksheet, row_index, row):
"""Take a row as a sequence of columns, and return a dict {column: value}"""
row_dict = {'_parse_errors': []}
@ -247,3 +253,14 @@ class SheetsMiddleware():
"""Mark row as having had a change made, bumping its worksheet to the top of
the most-recently-modified queue."""
self.worksheets[row["sheet_name"]] = monotonic()
def create_row(self, worksheet, id):
index = self.unassigned_rows[worksheet].pop(0)
row = {
"sheet_name": worksheet,
"id": id,
"index": index,
}
logging.info(f"Assigning existing id {row['id']} to empty row {worksheet!r}:{row['index']}")
self.write_id(row)
return row

@ -104,3 +104,6 @@ class StreamLogMiddleware:
def mark_modified(self, row):
pass # not a concept we have
def create_row(self, worksheet, id):
raise NotImplementedError

Loading…
Cancel
Save