Support archive sync in sheetsync again

pull/401/head
Mike Lang 3 months ago committed by Mike Lang
parent a3aaa37bb0
commit 96181fd875

@ -446,7 +446,7 @@
backend: "sheets",
creds: "/etc/sheet-creds.json",
sheet_id: $.sheet_id,
allocate_ids: true,
allocate_ids: ! $.sheet_reverse_sync,
reverse_sync: $.sheet_reverse_sync,
},
local sync_sheet = [
@ -462,7 +462,18 @@
type: "playlists",
worksheets: [$.playlist_worksheet],
},
],
] + (if $.archive_worksheet == null then [] else {
sync_sheet_base + {
name: "sheet-archive",
type: "archive",
worksheets: [$.archive_worksheet],
edit_url: $.edit_url,
bustime_start: $.bustime_start,
// archive is never reverse sync
allocate_ids: true,
reverse_sync: false,
}
}),
local sync_streamlog_base = {
backend: "streamlog",
creds: "/etc/streamlog-token.txt",

@ -17,7 +17,7 @@ import common.dateutil
from common.database import DBManager, query, get_column_placeholder
from common.sheets import Sheets as SheetsClient
from .sheets import SheetsEventsMiddleware, SheetsPlaylistsMiddleware
from .sheets import SheetsEventsMiddleware, SheetsPlaylistsMiddleware, SheetsArchiveMiddleware
from .streamlog import StreamLogClient, StreamLogEventsMiddleware, StreamLogPlaylistsMiddleware
sheets_synced = prom.Counter(
@ -304,6 +304,25 @@ class EventsSync(SheetSync):
super().sync_row(sheet_row, db_row)
class ArchiveSync(EventsSync):
# Archive events are a special case of event with less input columns.
# The other input columns default to empty string in the database.
input_columns = {
'sheet_name',
'event_start',
'event_end',
'description',
'notes',
}
output_columns = {
'state',
'error',
}
# Slower poll rate than events to avoid using large amounts of quota
retry_interval = 20
error_retry_interval = 20
class PlaylistsSync(SheetSync):
# Slower poll rate than events to avoid using large amounts of quota
@ -336,7 +355,7 @@ class PlaylistsSync(SheetSync):
Always present:
name: A human identifier for this sync operation
backend: The data source. One of "sheets" or "streamlog"
type: What kind of data is being synced. One of "events" or "playlists"
type: What kind of data is being synced. One of "events", "playlists" or "archive"
When backend is "sheets":
creds: path to credentials JSON file containing "client_id", "client_secret" and "refresh_token"
sheet_id: The id of the Google Sheet to use
@ -346,7 +365,7 @@ class PlaylistsSync(SheetSync):
reverse_sync: Boolean, optional. When true, enables an alternate mode
where all data is synced from the database to the sheet.
Only one sheetsync acting on the same sheet should have this enabled.
When type is "events":
When type is "events" or "archive":
edit_url: a format string for edit links, with {} as a placeholder for id
bustime_start: Timestamp string at which bustime is 00:00
When backend is "streamlog":
@ -402,8 +421,12 @@ def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0):
refresh_token=creds['refresh_token'],
)
allocate_ids = config.get("allocate_ids", False)
if config["type"] == "sheets":
middleware = SheetsEventsMiddleware(
if config["type"] in ("sheets", "archive"):
middleware_cls = {
"sheets": SheetsEventsMiddleware,
"archive": SheetsArchiveMiddleware,
}
middleware = middleware_cls(
client,
config["sheet_id"],
config["worksheets"],
@ -431,6 +454,8 @@ def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0):
middleware = StreamLogEventsMiddleware(client)
elif config["type"] == "playlists":
middleware = StreamLogPlaylistsMiddleware(client)
elif config["type"] == "archive":
raise ValueError("Archive sync is not compatible with streamlog")
else:
raise ValueError("Unknown type {!r}".format(config["type"]))
else:
@ -439,6 +464,7 @@ def main(dbconnect, sync_configs, metrics_port=8005, backdoor_port=0):
sync_class = {
"events": EventsSync,
"playlists": PlaylistsSync,
"archive": ArchiveSync,
}[config["type"]]
reverse_sync = config.get("reverse_sync", False)
sync = sync_class(config["name"], middleware, stop, dbmanager, reverse_sync)

@ -23,6 +23,9 @@ class SheetsMiddleware(Middleware):
# + (100 / RETRY_INTERVAL / SYNCS_PER_INACTIVE_CHECK) * (len(worksheets) - ACTIVE_SHEET_COUNT)
# For current values, this is 100/5 * 2 + 100/5/4 * 7 = 75
# Number of initial rows to ignore as they contain headers
header_rows = 1
# Maps DB column names (or general identifier, for non-DB columns) to sheet column indexes.
# id is required.
column_map = {
@ -77,10 +80,10 @@ class SheetsMiddleware(Middleware):
for worksheet in worksheets:
rows = self.client.get_rows(self.sheet_id, worksheet)
for row_index, row in enumerate(rows):
# Skip first row (ie. the column titles).
# Skip first row or rows (ie. the column titles).
# Need to do it inside the loop and not eg. use rows[1:],
# because then row_index won't be correct.
if row_index == 0:
if row_index < self.header_rows:
continue
row = self.parse_row(worksheet, row_index, row)
@ -116,7 +119,11 @@ class SheetsMiddleware(Middleware):
def parse_row(self, worksheet, row_index, row):
"""Take a row as a sequence of columns, and return a dict {column: value}"""
row_dict = {'_parse_errors': []}
row_dict = {
"sheet_name": worksheet,
"index": row_index,
'_parse_errors': [],
}
for column, index in self.column_map.items():
if index >= len(row):
# Sheets omits trailing columns if they're all empty, so substitute empty string
@ -286,6 +293,9 @@ class SheetsEventsMiddleware(SheetsMiddleware):
# As a special case, add some implicit tags to the tags column.
# We prepend these to make it slightly more consistent for the editor,
# ie. it's always DAY, CATEGORY, POSTER_MOMENT, CUSTOM
# This is only needed for full events (not the archive sheet),
# so only do it if we had a tags column in the first place.
if 'tags' in row_dict:
row_dict['tags'] = (
[
row_dict['category'], # category name
@ -297,9 +307,6 @@ class SheetsEventsMiddleware(SheetsMiddleware):
# As a special case, treat an end time of "--" as equal to the start time.
if row_dict["event_end"] == "--":
row_dict["event_end"] = row_dict["event_start"]
# Always include row index and worksheet
row_dict["index"] = row_index
row_dict["sheet_name"] = worksheet
# Set edit link if marked for editing and start/end set.
# This prevents accidents / clicking the wrong row and provides
@ -307,10 +314,30 @@ class SheetsEventsMiddleware(SheetsMiddleware):
# Also clear it if it shouldn't be set.
# We do this here instead of in sync_row() because it's Sheets-specific logic
# that doesn't depend on the DB event in any way.
edit_link = self.edit_url.format(row['id']) if row['marked_for_edit'] == '[+] Marked' else ''
edit_link = self.edit_url.format(row['id']) if self.show_edit_url(row) else ''
if row['edit_link'] != edit_link:
logging.info("Updating sheet row {} with edit link {}".format(row['id'], edit_link))
self.write_value(row, "edit_link", edit_link)
self.mark_modified(row)
return row_dict
def show_edit_url(self, row):
return row['marked_for_edit'] == '[+] Marked'
class SheetsArchiveMiddleware(SheetsEventsMiddleware):
# Archive sheet is similar to events sheet but is missing some columns.
column_map = {
'event_start': 0,
'event_end': 1,
'description': 2,
'state': 3,
'notes': 4,
'edit_link': 6,
'error': 7,
'id': 8,
}
def show_edit_url(self, row):
return row['event_start'] is not None and row['event_end'] is not None

Loading…
Cancel
Save