From 367e6a7a7a10f805131729bf947cadc76cd5f709 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Sat, 17 Aug 2024 11:26:28 +1000 Subject: [PATCH] sheetsync fixes typos and omissions --- sheetsync/sheetsync/main.py | 29 +++++++++-------- sheetsync/sheetsync/middleware.py | 4 +-- sheetsync/sheetsync/sheets.py | 53 ++++++++++++++++++++++++++----- 3 files changed, 62 insertions(+), 24 deletions(-) diff --git a/sheetsync/sheetsync/main.py b/sheetsync/sheetsync/main.py index ed5a80d..ef88e35 100644 --- a/sheetsync/sheetsync/main.py +++ b/sheetsync/sheetsync/main.py @@ -159,7 +159,7 @@ class SheetSync(object): | self.output_columns | self.metrics_columns ), - sql.Identifier("table"), + sql.Identifier(self.table), ) result = query(self.conn, built_query) by_id = {} @@ -207,7 +207,7 @@ class SheetSync(object): self.logger.info("Skipping db row {} without any matching sheet row".format(db_row.id)) return self.logger.info("Adding new row {}".format(db_row.id)) - sheet_row = self.middleware.create_row(db_row.sheet_name, db_row.id) + sheet_row = self.middleware.create_row(db_row) worksheet = sheet_row["sheet_name"] rows_found.labels(self.name, worksheet).inc() @@ -222,12 +222,11 @@ class SheetSync(object): UPDATE {} SET {} WHERE id = %(id)s - """).format(sql.SQL(", ").join( - [sql.Identifer(self.table)] + - [sql.SQL("{} = {}").format( + """).format(sql.Identifier(self.table), sql.SQL(", ").join([ + sql.SQL("{} = {}").format( sql.Identifier(col), get_column_placeholder(col) - ) for col in changed] - )) + ) for col in changed + ])) query(self.conn, built_query, **sheet_row) rows_changed.labels(self.name, 'input', worksheet).inc() self.middleware.mark_modified(sheet_row) @@ -250,7 +249,7 @@ class SheetSync(object): class EventsSync(SheetSync): table = "events" input_columns = { - 'sheet_name' + 'sheet_name', 'event_start', 'event_end', 'category', @@ -411,19 +410,22 @@ def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0): workers = [] for config in sync_configs: + reverse_sync = config.get("reverse_sync", False) if config["backend"] == "sheets": + allocate_ids = config.get("allocate_ids", False) + if allocate_ids and reverse_sync: + raise ValueError("Cannot combine allocate_ids and reverse_sync") creds = json.load(open(config["creds"])) client = SheetsClient( client_id=creds['client_id'], client_secret=creds['client_secret'], refresh_token=creds['refresh_token'], ) - allocate_ids = config.get("allocate_ids", False) - if config["type"] in ("sheets", "archive"): + if config["type"] in ("events", "archive"): middleware_cls = { - "sheets": SheetsEventsMiddleware, + "events": SheetsEventsMiddleware, "archive": SheetsArchiveMiddleware, - } + }[config["type"]] middleware = middleware_cls( client, config["sheet_id"], @@ -436,7 +438,7 @@ def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0): middleware = SheetsPlaylistsMiddleware( client, config["sheet_id"], - [config["playlist_worksheet"]], + config["worksheets"], config.get("allocate_ids", False), ) else: @@ -464,7 +466,6 @@ def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0): "playlists": PlaylistsSync, "archive": ArchiveSync, }[config["type"]] - reverse_sync = config.get("reverse_sync", False) sync = sync_class(config["name"], middleware, stop, dbmanager, reverse_sync) workers.append(sync) diff --git a/sheetsync/sheetsync/middleware.py b/sheetsync/sheetsync/middleware.py index 4797841..4a5a011 100644 --- a/sheetsync/sheetsync/middleware.py +++ b/sheetsync/sheetsync/middleware.py @@ -37,7 +37,7 @@ class Middleware: Intended as a way to keep track of recently-changed rows for quota optimization.""" pass - def create_row(self, worksheet, id): - """Create a new row with given id in the given worksheet and return it. + def create_row(self, row): + """Create a new blank row with id and worksheet determined from the given db row. Only used for reverse sync.""" raise NotImplementedError diff --git a/sheetsync/sheetsync/sheets.py b/sheetsync/sheetsync/sheets.py index ef8733e..dffc398 100644 --- a/sheetsync/sheetsync/sheets.py +++ b/sheetsync/sheetsync/sheets.py @@ -86,6 +86,8 @@ class SheetsMiddleware(Middleware): if row_index < self.header_rows: continue row = self.parse_row(worksheet, row_index, row) + if row is None: + continue # Handle rows without an allocated id if row['id'] is None: @@ -117,7 +119,8 @@ class SheetsMiddleware(Middleware): ) def parse_row(self, worksheet, row_index, row): - """Take a row as a sequence of columns, and return a dict {column: value}""" + """Take a row as a sequence of columns, and return a dict {column: value}. + May return None to skip the row (used by subclasses).""" row_dict = { "sheet_name": worksheet, "index": row_index, @@ -154,7 +157,9 @@ class SheetsMiddleware(Middleware): the most-recently-modified queue.""" self.worksheets[row["sheet_name"]] = monotonic() - def create_row(self, worksheet, id): + def _create_row(self, worksheet, id): + """Because the way we get the worksheet name differs for events vs playlists, + we have the common code here and defer extracting the worksheet and id to per-type implementations""" unassigned_rows = self.unassigned_rows.get(worksheet, []) if not unassigned_rows: raise Exception(f"Worksheet {worksheet} has no available space to create a new row in, or it wasn't fetched") @@ -185,6 +190,8 @@ def check_playlist(playlist_id): class SheetsPlaylistsMiddleware(SheetsMiddleware): + header_rows = 2 + column_map = { "tags": 0, "description": 1, @@ -206,6 +213,10 @@ class SheetsPlaylistsMiddleware(SheetsMiddleware): ), "playlist_id": check_playlist, "show_in_description": PARSE_CHECKMARK, + "first_event_id": EMPTY_IS_NONE, + "last_event_id": EMPTY_IS_NONE, + "error": EMPTY_IS_NONE, + "id": EMPTY_IS_NONE, } column_encode = { @@ -217,13 +228,27 @@ class SheetsPlaylistsMiddleware(SheetsMiddleware): "show_in_description": ENCODE_CHECKMARK, } + def parse_row(self, worksheet, row_index, row): + row = super().parse_row(worksheet, row_index, row) + if row["id"] == "": + # Special case, row is marked to be ignored + return None + return row + def row_was_expected(self, db_row, worksheets): # Database does not record a worksheet for playlists, we assume there's only one # sheet and so it should always be there. return True def row_is_non_empty(self, row): - return row["tags"] is not None + return row["tags"] is not None or any( + row[key] for key in ("description", "name", "playlist_id") + ) + + def create_row(self, row): + # Always create in the first worksheet. We should only have one anyway. + worksheet = list(self.worksheets.keys())[0] + return self._create_row(worksheet, row.id) class SheetsEventsMiddleware(SheetsMiddleware): @@ -321,20 +346,32 @@ class SheetsEventsMiddleware(SheetsMiddleware): # Also clear it if it shouldn't be set. # We do this here instead of in sync_row() because it's Sheets-specific logic # that doesn't depend on the DB event in any way. - edit_link = self.edit_url.format(row['id']) if self.show_edit_url(row) else '' - if row['edit_link'] != edit_link: - logging.info("Updating sheet row {} with edit link {}".format(row['id'], edit_link)) - self.write_value(row, "edit_link", edit_link) - self.mark_modified(row) + edit_link = self.edit_url.format(row_dict['id']) if self.show_edit_url(row_dict) else '' + if row_dict['edit_link'] != edit_link: + logging.info("Updating sheet row {} with edit link {}".format(row_dict['id'], edit_link)) + self.write_value(row_dict, "edit_link", edit_link) + self.mark_modified(row_dict) return row_dict def show_edit_url(self, row): return row['marked_for_edit'] == '[+] Marked' + def write_value(self, row, key, value): + # Undo the implicitly added tags + if key == "tags": + value = value[2:] + if row["poster_moment"]: + value = value[1:] + return super().write_value(row, key, value) + + def create_row(self, row): + return self._create_row(row.sheet_name, row.id) + class SheetsArchiveMiddleware(SheetsEventsMiddleware): # Archive sheet is similar to events sheet but is missing some columns. + header_rows = 3 column_map = { 'event_start': 0, 'event_end': 1,