Add Audit Logging for several endpoints

- Use transactions for DB commits to avoid audit-less logs

Endpoints Supported:
- Manual Link
- Reset Row
- Update Row
pull/413/head
ZeldaZach 4 months ago committed by Mike Lang
parent fd78ff288e
commit c378a1e4ab

@ -104,6 +104,18 @@ CREATE TABLE events (
-- Index on state, since that's almost always what we're querying on besides id -- Index on state, since that's almost always what we're querying on besides id
CREATE INDEX event_state ON events (state); CREATE INDEX event_state ON events (state);
-- Table for recording each "edit" made to a video, written by thrimshim.
-- This is mainly a just-in-case thing so we can work out when something was changed,
-- and change it back if needed. More about accidents than security.
CREATE TABLE events_edits_audit_log (
time TIMESTAMP NOT NULL DEFAULT NOW(),
id TEXT NOT NULL,
api_action TEXT NOT NULL,
editor TEXT NOT NULL,
old_data JSONB,
new_data JSONB
);
CREATE TABLE nodes ( CREATE TABLE nodes (
name TEXT PRIMARY KEY, name TEXT PRIMARY KEY,
url TEXT NOT NULL, url TEXT NOT NULL,

@ -66,7 +66,7 @@ def authenticate(f):
if idinfo['iss'] not in ['accounts.google.com', 'https://accounts.google.com']: if idinfo['iss'] not in ['accounts.google.com', 'https://accounts.google.com']:
raise ValueError('Wrong issuer.') raise ValueError('Wrong issuer.')
except ValueError: except ValueError:
return 'Invalid token. Access denied.', 403 return 'Invalid token. Access denied.', 403
# check whether user is in the database # check whether user is in the database
email = idinfo['email'].lower() email = idinfo['email'].lower()
@ -78,7 +78,7 @@ def authenticate(f):
row = results.fetchone() row = results.fetchone()
if row is None: if row is None:
return 'Unknown user. Access denied.', 403 return 'Unknown user. Access denied.', 403
return f(*args, editor=email, **kwargs) return f(*args, editor=email, **kwargs)
return auth_wrapper return auth_wrapper
@ -128,14 +128,14 @@ def get_all_rows():
} }
rows.append(row) rows.append(row)
logging.info('All rows fetched') logging.info('All rows fetched')
return json.dumps(rows) return to_json(rows)
@app.route('/thrimshim/defaults') @app.route('/thrimshim/defaults')
@request_stats @request_stats
def get_defaults(): def get_defaults():
"""Get default info needed by thrimbletrimmer when not loading a specific row.""" """Get default info needed by thrimbletrimmer when not loading a specific row."""
return json.dumps({ return to_json({
"video_channel": app.default_channel, "video_channel": app.default_channel,
"bustime_start": app.bustime_start, "bustime_start": app.bustime_start,
"title_prefix": app.title_header, "title_prefix": app.title_header,
@ -159,6 +159,18 @@ def get_transitions():
] ]
def to_json(obj):
def convert(value):
if isinstance(value, datetime.datetime):
return value.isoformat()
if isinstance(value, datetime.timedelta):
return value.total_seconds()
if isinstance(value, memoryview) or isinstance(value, bytes):
return base64.b64encode(bytes(value)).decode()
raise TypeError(f"Can't convert object of type {value.__class__.__name__} to JSON: {value}")
return json.dumps(obj, default=convert)
@app.route('/thrimshim/<ident>', methods=['GET']) @app.route('/thrimshim/<ident>', methods=['GET'])
@request_stats @request_stats
def get_row(ident): def get_row(ident):
@ -177,7 +189,7 @@ def get_row(ident):
is_archive = response["sheet_name"] == app.archive_sheet is_archive = response["sheet_name"] == app.archive_sheet
response['id'] = str(response['id']) response['id'] = str(response['id'])
if response["video_channel"] is None: if response["video_channel"] is None:
response["video_channel"] = app.default_channel response["video_channel"] = app.default_channel
# Unwrap end time (dashed, value) to just value # Unwrap end time (dashed, value) to just value
@ -247,15 +259,7 @@ def get_row(ident):
logging.info('Row {} fetched'.format(ident)) logging.info('Row {} fetched'.format(ident))
def convert(value): return to_json(response)
if isinstance(value, datetime.datetime):
return value.isoformat()
if isinstance(value, datetime.timedelta):
return value.total_seconds()
if isinstance(value, memoryview) or isinstance(value, bytes):
return base64.b64encode(bytes(value)).decode()
raise TypeError(f"Can't convert object of type {value.__class__.__name__} to JSON: {value}")
return json.dumps(response, default=convert)
@app.route('/thrimshim/<ident>', methods=['POST']) @app.route('/thrimshim/<ident>', methods=['POST'])
@ -265,7 +269,7 @@ def update_row(ident, editor=None):
"""Updates row of database with id = ident with the edit columns in new_row.""" """Updates row of database with id = ident with the edit columns in new_row."""
new_row = flask.request.json new_row = flask.request.json
override_changes = new_row.get('override_changes', False) override_changes = new_row.get('override_changes', False)
state_columns = ['state', 'uploader', 'error', 'video_link'] state_columns = ['state', 'uploader', 'error', 'video_link']
# These have to be set before a video can be set as 'EDITED' # These have to be set before a video can be set as 'EDITED'
non_null_columns = [ non_null_columns = [
'upload_location', 'video_ranges', 'video_transitions', 'upload_location', 'video_ranges', 'video_transitions',
@ -297,175 +301,181 @@ def update_row(ident, editor=None):
for extra in extras: for extra in extras:
del new_row[extra] del new_row[extra]
# Check a row with id = ident is in the database # Everything that follows happens in a single transaction
conn = app.db_manager.get_conn() with app.db_manager.get_conn() as conn:
built_query = sql.SQL("""
SELECT id, state, {} # Check a row with id = ident is in the database.
FROM events # Lock the row to prevent concurrent updates while we check the transition validity.
WHERE id = %s built_query = sql.SQL("""
""").format(sql.SQL(', ').join( SELECT id, state, {}
sql.Identifier(key) for key in sheet_columns FROM events
)) WHERE id = %s
results = database.query(conn, built_query, ident) FOR UPDATE
old_row = results.fetchone()._asdict() """).format(sql.SQL(', ').join(
if old_row is None: sql.Identifier(key) for key in sheet_columns
return 'Row {} not found'.format(ident), 404 ))
assert old_row['id'] == ident results = database.query(conn, built_query, ident)
old_row = results.fetchone()._asdict()
is_archive = old_row["sheet_name"] == app.archive_sheet if old_row is None:
return 'Row {} not found'.format(ident), 404
# archive events skip title and description munging assert old_row['id'] == ident
if not is_archive:
is_archive = old_row["sheet_name"] == app.archive_sheet
playlists = database.query(conn, """
SELECT playlist_id, name, tags # archive events skip title and description munging
FROM playlists if not is_archive:
WHERE show_in_description AND tags IS NOT NULL AND playlist_id IS NOT NULL
""") playlists = database.query(conn, """
# Filter for matching playlists for this video SELECT playlist_id, name, tags
playlists = [ FROM playlists
playlist for playlist in playlists WHERE show_in_description AND tags IS NOT NULL AND playlist_id IS NOT NULL
if all( """)
tag.lower() in [t.lower() for t in old_row['tags']] # Filter for matching playlists for this video
for tag in playlist.tags playlists = [
) playlist for playlist in playlists
] if all(
tag.lower() in [t.lower() for t in old_row['tags']]
# Include headers and footers for tag in playlist.tags
new_row['video_title'] = app.title_header + new_row['video_title'] )
description_lines = []
if playlists:
# NOTE: If you change this format, you need to also change the regex that matches this
# on the GET handler.
description_lines.append(DESCRIPTION_PLAYLISTS_HEADER)
description_lines += [
"- {} [https://youtube.com/playlist?list={}]".format(playlist.name, playlist.playlist_id)
for playlist in playlists
] ]
description_lines.append('') # blank line before footer
description_lines.append(app.description_footer)
new_row['video_description'] += "\n\n" + "\n".join(description_lines)
# Validate youtube requirements on title and description
if len(new_row['video_title']) > MAX_TITLE_LENGTH:
return 'Title must be {} characters or less, including prefix'.format(MAX_TITLE_LENGTH), 400
if len(new_row['video_description']) > MAX_DESCRIPTION_LENGTH:
return 'Description must be {} characters or less, including footer'.format(MAX_DESCRIPTION_LENGTH), 400
for char in ['<', '>']:
if char in new_row['video_title']:
return 'Title may not contain a {} character'.format(char), 400
if char in new_row['video_description']:
return 'Description may not contain a {} character'.format(char), 400
# Validate and convert video ranges and transitions.
num_ranges = len(new_row['video_ranges'])
if num_ranges == 0:
return 'Ranges must contain at least one range', 400
if len(new_row['video_transitions']) != num_ranges - 1:
return 'There must be exactly {} transitions for {} ranges'.format(
num_ranges - 1, num_ranges,
)
for start, end in new_row['video_ranges']:
if start > end:
return 'Range start must be less than end', 400
# We need these to be tuples not lists for psycopg2 to do the right thing,
# but since they come in as JSON they are currently lists.
new_row['video_ranges'] = [tuple(range) for range in new_row['video_ranges']]
new_row['video_transitions'] = [
None if transition is None else tuple(transition)
for transition in new_row['video_transitions']
]
# Convert binary fields from base64 and do basic validation of contents # Include headers and footers
if new_row.get('thumbnail_image') is not None: new_row['video_title'] = app.title_header + new_row['video_title']
if new_row['thumbnail_mode'] != 'CUSTOM': description_lines = []
return 'Can only upload custom image when thumbnail_mode = "CUSTOM"', 400 if playlists:
try: # NOTE: If you change this format, you need to also change the regex that matches this
new_row['thumbnail_image'] = base64.b64decode(new_row['thumbnail_image']) # on the GET handler.
except binascii.Error: description_lines.append(DESCRIPTION_PLAYLISTS_HEADER)
return 'thumbnail_image must be valid base64', 400 description_lines += [
# check for PNG file header "- {} [https://youtube.com/playlist?list={}]".format(playlist.name, playlist.playlist_id)
if not new_row['thumbnail_image'].startswith(b'\x89PNG\r\n\x1a\n'): for playlist in playlists
return 'thumbnail_image must be a PNG', 400 ]
description_lines.append('') # blank line before footer
if new_row['state'] == 'MODIFIED': description_lines.append(app.description_footer)
if old_row['state'] not in ['DONE', 'MODIFIED']: new_row['video_description'] += "\n\n" + "\n".join(description_lines)
return 'Video is in state {} and cannot be modified'.format(old_row['state']), 403
elif old_row['state'] not in ['UNEDITED', 'EDITED', 'CLAIMED']: # Validate youtube requirements on title and description
return 'Video already published', 403 if len(new_row['video_title']) > MAX_TITLE_LENGTH:
return 'Title must be {} characters or less, including prefix'.format(MAX_TITLE_LENGTH), 400
# check whether row has been changed in the sheet since editing has begun if len(new_row['video_description']) > MAX_DESCRIPTION_LENGTH:
changes = '' return 'Description must be {} characters or less, including footer'.format(MAX_DESCRIPTION_LENGTH), 400
for column in sheet_columns: for char in ['<', '>']:
if column == "event_end": if char in new_row['video_title']:
# convert (dashed, value) to value return 'Title may not contain a {} character'.format(char), 400
old_row[column] = old_row[column][1] if char in new_row['video_description']:
if isinstance(old_row[column], datetime.datetime): return 'Description may not contain a {} character'.format(char), 400
old_row[column] = old_row[column].isoformat() # Validate and convert video ranges and transitions.
def normalize(value): num_ranges = len(new_row['video_ranges'])
if isinstance(value, list): if num_ranges == 0:
return sorted(map(normalize, value)) return 'Ranges must contain at least one range', 400
if value is None: if len(new_row['video_transitions']) != num_ranges - 1:
return None return 'There must be exactly {} transitions for {} ranges'.format(
return value.lower().strip() num_ranges - 1, num_ranges,
if normalize(new_row[column]) != normalize(old_row[column]): )
changes += '{}: {} => {}\n'.format(column, new_row[column], old_row[column]) for start, end in new_row['video_ranges']:
if changes and not override_changes: if start > end:
return 'Sheet columns have changed since editing has begun. Please review changes\n' + changes, 409 return 'Range start must be less than end', 400
# We need these to be tuples not lists for psycopg2 to do the right thing,
if new_row['state'] == 'MODIFIED': # but since they come in as JSON they are currently lists.
missing = [] new_row['video_ranges'] = [tuple(range) for range in new_row['video_ranges']]
# Modifying published rows is more limited, we ignore all other fields. new_row['video_transitions'] = [
for column in set(modifiable_columns) & set(non_null_columns): None if transition is None else tuple(transition)
if new_row.get(column) is None: for transition in new_row['video_transitions']
missing.append(column) ]
if missing:
return 'Fields {} must be non-null for modified video'.format(', '.join(missing)), 400
build_query = sql.SQL("""
UPDATE events
SET last_modified = NOW(), error = NULL, state = 'MODIFIED', {}
WHERE id = %(id)s AND state IN ('DONE', 'MODIFIED')
""").format(sql.SQL(", ").join(
sql.SQL("{} = {}").format(
sql.Identifier(column), database.get_column_placeholder(column),
) for column in set(modifiable_columns) & set(new_row)
))
result = database.query(conn, build_query, id=ident, **new_row)
if result.rowcount != 1:
return 'Video changed state while we were updating - maybe it was reset?', 403
else: # Convert binary fields from base64 and do basic validation of contents
# handle state columns if new_row.get('thumbnail_image') is not None:
if new_row['state'] == 'EDITED': if new_row['thumbnail_mode'] != 'CUSTOM':
return 'Can only upload custom image when thumbnail_mode = "CUSTOM"', 400
try:
new_row['thumbnail_image'] = base64.b64decode(new_row['thumbnail_image'])
except binascii.Error:
return 'thumbnail_image must be valid base64', 400
# check for PNG file header
if not new_row['thumbnail_image'].startswith(b'\x89PNG\r\n\x1a\n'):
return 'thumbnail_image must be a PNG', 400
if new_row['state'] == 'MODIFIED':
if old_row['state'] not in ['DONE', 'MODIFIED']:
return 'Video is in state {} and cannot be modified'.format(old_row['state']), 403
elif old_row['state'] not in ['UNEDITED', 'EDITED', 'CLAIMED']:
return 'Video already published', 403
# check whether row has been changed in the sheet since editing has begun
changes = ''
for column in sheet_columns:
if column == "event_end":
# convert (dashed, value) to value
old_row[column] = old_row[column][1]
if isinstance(old_row[column], datetime.datetime):
old_row[column] = old_row[column].isoformat()
def normalize(value):
if isinstance(value, list):
return sorted(map(normalize, value))
if value is None:
return None
return value.lower().strip()
if normalize(new_row[column]) != normalize(old_row[column]):
changes += '{}: {} => {}\n'.format(column, new_row[column], old_row[column])
if changes and not override_changes:
return 'Sheet columns have changed since editing has begun. Please review changes\n' + changes, 409
if new_row['state'] == 'MODIFIED':
missing = [] missing = []
for column in non_null_columns: # Modifying published rows is more limited, we ignore all other fields.
if new_row[column] is None: for column in set(modifiable_columns) & set(non_null_columns):
if new_row.get(column) is None:
missing.append(column) missing.append(column)
if missing: if missing:
return 'Fields {} must be non-null for video to be cut'.format(', '.join(missing)), 400 return 'Fields {} must be non-null for modified video'.format(', '.join(missing)), 400
if len(new_row.get('video_title', '')) <= len(app.title_header) and not is_archive: build_query = sql.SQL("""
return 'Video title must not be blank', 400 UPDATE events
elif new_row['state'] != 'UNEDITED': SET last_modified = NOW(), error = NULL, state = 'MODIFIED', {}
return 'Invalid state {}'.format(new_row['state']), 400 WHERE id = %(id)s AND state IN ('DONE', 'MODIFIED')
new_row['uploader'] = None """).format(sql.SQL(", ").join(
new_row['error'] = None sql.SQL("{} = {}").format(
new_row['editor'] = editor sql.Identifier(column), database.get_column_placeholder(column),
new_row['edit_time'] = datetime.datetime.utcnow() ) for column in set(modifiable_columns) & set(new_row)
))
# actually update database result = database.query(conn, build_query, id=ident, **new_row)
build_query = sql.SQL(""" if result.rowcount != 1:
UPDATE events return 'Video changed state while we were updating - maybe it was reset?', 409
SET {}
WHERE id = %(id)s else:
AND state IN ('UNEDITED', 'EDITED', 'CLAIMED')""" # handle state columns
).format(sql.SQL(", ").join( if new_row['state'] == 'EDITED':
sql.SQL("{} = {}").format( missing = []
sql.Identifier(column), database.get_column_placeholder(column), for column in non_null_columns:
) for column in new_row.keys() if column not in sheet_columns if new_row[column] is None:
)) missing.append(column)
result = database.query(conn, build_query, id=ident, **new_row) if missing:
if result.rowcount != 1: return 'Fields {} must be non-null for video to be cut'.format(', '.join(missing)), 400
return 'Video likely already published', 403 if len(new_row.get('video_title', '')) <= len(app.title_header) and not is_archive:
return 'Video title must not be blank', 400
elif new_row['state'] != 'UNEDITED':
return 'Invalid state {}'.format(new_row['state']), 400
new_row['uploader'] = None
new_row['error'] = None
new_row['editor'] = editor
new_row['edit_time'] = datetime.datetime.utcnow()
# actually update database
build_query = sql.SQL("""
UPDATE events
SET {}
WHERE id = %(id)s
AND state IN ('UNEDITED', 'EDITED', 'CLAIMED')"""
).format(sql.SQL(", ").join(
sql.SQL("{} = {}").format(
sql.Identifier(column), database.get_column_placeholder(column),
) for column in new_row.keys() if column not in sheet_columns
))
result = database.query(conn, build_query, id=ident, **new_row)
if result.rowcount != 1:
return 'Video likely already published', 403
_write_audit_log(conn, ident, "update-row", editor, old_row, new_row)
logging.info('Row {} updated to state {}'.format(ident, new_row['state'])) logging.info('Row {} updated to state {}'.format(ident, new_row['state']))
return '' return ''
@ -474,7 +484,7 @@ def update_row(ident, editor=None):
@request_stats @request_stats
@authenticate @authenticate
def manual_link(ident, editor=None): def manual_link(ident, editor=None):
"""Manually set a video_link if the state is 'UNEDITED' or 'DONE' and the """Manually set a video_link if the state is 'UNEDITED' or 'DONE' and the
upload_location is 'manual' or 'youtube-manual'.""" upload_location is 'manual' or 'youtube-manual'."""
link = flask.request.json['link'] link = flask.request.json['link']
upload_location = flask.request.json.get('upload_location', 'manual') upload_location = flask.request.json.get('upload_location', 'manual')
@ -490,28 +500,40 @@ def manual_link(ident, editor=None):
else: else:
return 'Upload location must be "manual" or "youtube-manual"', 400 return 'Upload location must be "manual" or "youtube-manual"', 400
conn = app.db_manager.get_conn() with app.db_manager.get_conn() as conn:
results = database.query(conn, """ results = database.query(conn, """
SELECT id, state SELECT id, state
FROM events FROM events
WHERE id = %s""", ident) WHERE id = %s
old_row = results.fetchone() FOR UPDATE
if old_row is None: """, ident)
return 'Row {} not found'.format(ident), 404 old_row = results.fetchone()
if old_row.state != 'UNEDITED': if old_row is None:
return 'Invalid state {} for manual video link'.format(old_row.state), 403 return 'Row {} not found'.format(ident), 404
now = datetime.datetime.utcnow() if old_row.state != 'UNEDITED':
# note we force thumbnail mode of manual uploads to always be NONE, return 'Invalid state {} for manual video link'.format(old_row.state), 403
# since they might not be a video we actually control at all, or might not even be on youtube. now = datetime.datetime.utcnow()
results = database.query(conn, """ # note we force thumbnail mode of manual uploads to always be NONE,
UPDATE events # since they might not be a video we actually control at all, or might not even be on youtube.
SET state='DONE', upload_location = %s, video_link = %s, video_id = %s, result = database.query(conn, """
editor = %s, edit_time = %s, upload_time = %s, thumbnail_mode = 'NONE' UPDATE events
WHERE id = %s AND state = 'UNEDITED' SET state='DONE', upload_location = %s, video_link = %s, video_id = %s,
""", upload_location, link, video_id, editor, now, now, ident) editor = %s, edit_time = %s, upload_time = %s, thumbnail_mode = 'NONE'
WHERE id = %s AND state = 'UNEDITED'
""", upload_location, link, video_id, editor, now, now, ident)
if result.rowcount != 1:
return 'Video changed state while we were updating - maybe it was reset?', 409
_write_audit_log(conn, ident, "manual-link", editor, new_row={
"state": "DONE",
"upload_location": upload_location,
"video_link": link,
"video_id": video_id,
"editor": editor,
"thumbnail_mode": None,
})
logging.info("Row {} video_link set to {}".format(ident, link)) logging.info("Row {} video_link set to {}".format(ident, link))
return '' return ''
@app.route('/thrimshim/reset/<ident>', methods=['POST']) @app.route('/thrimshim/reset/<ident>', methods=['POST'])
@request_stats @request_stats
@ -523,23 +545,31 @@ def reset_row(ident, editor=None):
(state is UNEDITED, EDITED or CLAIMED) (state is UNEDITED, EDITED or CLAIMED)
""" """
force = (flask.request.args.get('force', '').lower() == "true") force = (flask.request.args.get('force', '').lower() == "true")
conn = app.db_manager.get_conn() with app.db_manager.get_conn() as conn:
query = """ query = """
UPDATE events UPDATE events
SET state='UNEDITED', error = NULL, video_id = NULL, video_link = NULL, SET state='UNEDITED', error = NULL, video_id = NULL, video_link = NULL,
uploader = NULL, editor = NULL, edit_time = NULL, upload_time = NULL, uploader = NULL, editor = NULL, edit_time = NULL, upload_time = NULL,
last_modified = NULL last_modified = NULL
WHERE id = %s {} WHERE id = %s {}
""".format( """.format(
"" if force else "AND state IN ('UNEDITED', 'EDITED', 'CLAIMED')", "" if force else "AND state IN ('UNEDITED', 'EDITED', 'CLAIMED')",
) )
results = database.query(conn, query, ident) results = database.query(conn, query, ident)
if results.rowcount != 1: if results.rowcount != 1:
return 'Row id = {} not found or not in cancellable state'.format(ident), 404 return 'Row id = {} not found or not in cancellable state'.format(ident), 404
_write_audit_log(conn, ident, "reset-row", editor)
logging.info("Row {} reset to 'UNEDITED'".format(ident)) logging.info("Row {} reset to 'UNEDITED'".format(ident))
return '' return ''
def _write_audit_log(conn, ident, api_action, editor, old_row=None, new_row=None):
database.query(conn, """
INSERT INTO events_edits_audit_log (id, api_action, editor, old_data, new_data)
VALUES (%(id)s, %(api_action)s, %(editor)s, %(old_row)s, %(new_row)s)
""", id=ident, api_action=api_action, editor=editor, old_row=to_json(old_row), new_row=to_json(new_row))
@app.route('/thrimshim/bus/<channel>') @app.route('/thrimshim/bus/<channel>')
@request_stats @request_stats
def get_odometer(channel): def get_odometer(channel):

Loading…
Cancel
Save