From c378a1e4ab4dea7b46a3dc792d6b41e6cd011b4d Mon Sep 17 00:00:00 2001 From: ZeldaZach Date: Sun, 21 Jul 2024 00:56:07 +0200 Subject: [PATCH] Add Audit Logging for several endpoints - Use transactions for DB commits to avoid audit-less logs Endpoints Supported: - Manual Link - Reset Row - Update Row --- postgres/schema.sql | 12 + thrimshim/thrimshim/main.py | 456 +++++++++++++++++++----------------- 2 files changed, 255 insertions(+), 213 deletions(-) diff --git a/postgres/schema.sql b/postgres/schema.sql index 7764163..23d1357 100644 --- a/postgres/schema.sql +++ b/postgres/schema.sql @@ -104,6 +104,18 @@ CREATE TABLE events ( -- Index on state, since that's almost always what we're querying on besides id CREATE INDEX event_state ON events (state); +-- Table for recording each "edit" made to a video, written by thrimshim. +-- This is mainly a just-in-case thing so we can work out when something was changed, +-- and change it back if needed. More about accidents than security. +CREATE TABLE events_edits_audit_log ( + time TIMESTAMP NOT NULL DEFAULT NOW(), + id TEXT NOT NULL, + api_action TEXT NOT NULL, + editor TEXT NOT NULL, + old_data JSONB, + new_data JSONB +); + CREATE TABLE nodes ( name TEXT PRIMARY KEY, url TEXT NOT NULL, diff --git a/thrimshim/thrimshim/main.py b/thrimshim/thrimshim/main.py index 27ef889..9fba753 100644 --- a/thrimshim/thrimshim/main.py +++ b/thrimshim/thrimshim/main.py @@ -66,7 +66,7 @@ def authenticate(f): if idinfo['iss'] not in ['accounts.google.com', 'https://accounts.google.com']: raise ValueError('Wrong issuer.') except ValueError: - return 'Invalid token. Access denied.', 403 + return 'Invalid token. Access denied.', 403 # check whether user is in the database email = idinfo['email'].lower() @@ -78,7 +78,7 @@ def authenticate(f): row = results.fetchone() if row is None: return 'Unknown user. Access denied.', 403 - + return f(*args, editor=email, **kwargs) return auth_wrapper @@ -128,14 +128,14 @@ def get_all_rows(): } rows.append(row) logging.info('All rows fetched') - return json.dumps(rows) + return to_json(rows) @app.route('/thrimshim/defaults') @request_stats def get_defaults(): """Get default info needed by thrimbletrimmer when not loading a specific row.""" - return json.dumps({ + return to_json({ "video_channel": app.default_channel, "bustime_start": app.bustime_start, "title_prefix": app.title_header, @@ -159,6 +159,18 @@ def get_transitions(): ] +def to_json(obj): + def convert(value): + if isinstance(value, datetime.datetime): + return value.isoformat() + if isinstance(value, datetime.timedelta): + return value.total_seconds() + if isinstance(value, memoryview) or isinstance(value, bytes): + return base64.b64encode(bytes(value)).decode() + raise TypeError(f"Can't convert object of type {value.__class__.__name__} to JSON: {value}") + return json.dumps(obj, default=convert) + + @app.route('/thrimshim/', methods=['GET']) @request_stats def get_row(ident): @@ -177,7 +189,7 @@ def get_row(ident): is_archive = response["sheet_name"] == app.archive_sheet - response['id'] = str(response['id']) + response['id'] = str(response['id']) if response["video_channel"] is None: response["video_channel"] = app.default_channel # Unwrap end time (dashed, value) to just value @@ -247,15 +259,7 @@ def get_row(ident): logging.info('Row {} fetched'.format(ident)) - def convert(value): - if isinstance(value, datetime.datetime): - return value.isoformat() - if isinstance(value, datetime.timedelta): - return value.total_seconds() - if isinstance(value, memoryview) or isinstance(value, bytes): - return base64.b64encode(bytes(value)).decode() - raise TypeError(f"Can't convert object of type {value.__class__.__name__} to JSON: {value}") - return json.dumps(response, default=convert) + return to_json(response) @app.route('/thrimshim/', methods=['POST']) @@ -265,7 +269,7 @@ def update_row(ident, editor=None): """Updates row of database with id = ident with the edit columns in new_row.""" new_row = flask.request.json override_changes = new_row.get('override_changes', False) - state_columns = ['state', 'uploader', 'error', 'video_link'] + state_columns = ['state', 'uploader', 'error', 'video_link'] # These have to be set before a video can be set as 'EDITED' non_null_columns = [ 'upload_location', 'video_ranges', 'video_transitions', @@ -297,175 +301,181 @@ def update_row(ident, editor=None): for extra in extras: del new_row[extra] - # Check a row with id = ident is in the database - conn = app.db_manager.get_conn() - built_query = sql.SQL(""" - SELECT id, state, {} - FROM events - WHERE id = %s - """).format(sql.SQL(', ').join( - sql.Identifier(key) for key in sheet_columns - )) - results = database.query(conn, built_query, ident) - old_row = results.fetchone()._asdict() - if old_row is None: - return 'Row {} not found'.format(ident), 404 - assert old_row['id'] == ident - - is_archive = old_row["sheet_name"] == app.archive_sheet - - # archive events skip title and description munging - if not is_archive: - - playlists = database.query(conn, """ - SELECT playlist_id, name, tags - FROM playlists - WHERE show_in_description AND tags IS NOT NULL AND playlist_id IS NOT NULL - """) - # Filter for matching playlists for this video - playlists = [ - playlist for playlist in playlists - if all( - tag.lower() in [t.lower() for t in old_row['tags']] - for tag in playlist.tags - ) - ] - - # Include headers and footers - new_row['video_title'] = app.title_header + new_row['video_title'] - description_lines = [] - if playlists: - # NOTE: If you change this format, you need to also change the regex that matches this - # on the GET handler. - description_lines.append(DESCRIPTION_PLAYLISTS_HEADER) - description_lines += [ - "- {} [https://youtube.com/playlist?list={}]".format(playlist.name, playlist.playlist_id) - for playlist in playlists + # Everything that follows happens in a single transaction + with app.db_manager.get_conn() as conn: + + # Check a row with id = ident is in the database. + # Lock the row to prevent concurrent updates while we check the transition validity. + built_query = sql.SQL(""" + SELECT id, state, {} + FROM events + WHERE id = %s + FOR UPDATE + """).format(sql.SQL(', ').join( + sql.Identifier(key) for key in sheet_columns + )) + results = database.query(conn, built_query, ident) + old_row = results.fetchone()._asdict() + if old_row is None: + return 'Row {} not found'.format(ident), 404 + assert old_row['id'] == ident + + is_archive = old_row["sheet_name"] == app.archive_sheet + + # archive events skip title and description munging + if not is_archive: + + playlists = database.query(conn, """ + SELECT playlist_id, name, tags + FROM playlists + WHERE show_in_description AND tags IS NOT NULL AND playlist_id IS NOT NULL + """) + # Filter for matching playlists for this video + playlists = [ + playlist for playlist in playlists + if all( + tag.lower() in [t.lower() for t in old_row['tags']] + for tag in playlist.tags + ) ] - description_lines.append('') # blank line before footer - description_lines.append(app.description_footer) - new_row['video_description'] += "\n\n" + "\n".join(description_lines) - - # Validate youtube requirements on title and description - if len(new_row['video_title']) > MAX_TITLE_LENGTH: - return 'Title must be {} characters or less, including prefix'.format(MAX_TITLE_LENGTH), 400 - if len(new_row['video_description']) > MAX_DESCRIPTION_LENGTH: - return 'Description must be {} characters or less, including footer'.format(MAX_DESCRIPTION_LENGTH), 400 - for char in ['<', '>']: - if char in new_row['video_title']: - return 'Title may not contain a {} character'.format(char), 400 - if char in new_row['video_description']: - return 'Description may not contain a {} character'.format(char), 400 - # Validate and convert video ranges and transitions. - num_ranges = len(new_row['video_ranges']) - if num_ranges == 0: - return 'Ranges must contain at least one range', 400 - if len(new_row['video_transitions']) != num_ranges - 1: - return 'There must be exactly {} transitions for {} ranges'.format( - num_ranges - 1, num_ranges, - ) - for start, end in new_row['video_ranges']: - if start > end: - return 'Range start must be less than end', 400 - # We need these to be tuples not lists for psycopg2 to do the right thing, - # but since they come in as JSON they are currently lists. - new_row['video_ranges'] = [tuple(range) for range in new_row['video_ranges']] - new_row['video_transitions'] = [ - None if transition is None else tuple(transition) - for transition in new_row['video_transitions'] - ] - # Convert binary fields from base64 and do basic validation of contents - if new_row.get('thumbnail_image') is not None: - if new_row['thumbnail_mode'] != 'CUSTOM': - return 'Can only upload custom image when thumbnail_mode = "CUSTOM"', 400 - try: - new_row['thumbnail_image'] = base64.b64decode(new_row['thumbnail_image']) - except binascii.Error: - return 'thumbnail_image must be valid base64', 400 - # check for PNG file header - if not new_row['thumbnail_image'].startswith(b'\x89PNG\r\n\x1a\n'): - return 'thumbnail_image must be a PNG', 400 - - if new_row['state'] == 'MODIFIED': - if old_row['state'] not in ['DONE', 'MODIFIED']: - return 'Video is in state {} and cannot be modified'.format(old_row['state']), 403 - elif old_row['state'] not in ['UNEDITED', 'EDITED', 'CLAIMED']: - return 'Video already published', 403 - - # check whether row has been changed in the sheet since editing has begun - changes = '' - for column in sheet_columns: - if column == "event_end": - # convert (dashed, value) to value - old_row[column] = old_row[column][1] - if isinstance(old_row[column], datetime.datetime): - old_row[column] = old_row[column].isoformat() - def normalize(value): - if isinstance(value, list): - return sorted(map(normalize, value)) - if value is None: - return None - return value.lower().strip() - if normalize(new_row[column]) != normalize(old_row[column]): - changes += '{}: {} => {}\n'.format(column, new_row[column], old_row[column]) - if changes and not override_changes: - return 'Sheet columns have changed since editing has begun. Please review changes\n' + changes, 409 - - if new_row['state'] == 'MODIFIED': - missing = [] - # Modifying published rows is more limited, we ignore all other fields. - for column in set(modifiable_columns) & set(non_null_columns): - if new_row.get(column) is None: - missing.append(column) - if missing: - return 'Fields {} must be non-null for modified video'.format(', '.join(missing)), 400 - build_query = sql.SQL(""" - UPDATE events - SET last_modified = NOW(), error = NULL, state = 'MODIFIED', {} - WHERE id = %(id)s AND state IN ('DONE', 'MODIFIED') - """).format(sql.SQL(", ").join( - sql.SQL("{} = {}").format( - sql.Identifier(column), database.get_column_placeholder(column), - ) for column in set(modifiable_columns) & set(new_row) - )) - result = database.query(conn, build_query, id=ident, **new_row) - if result.rowcount != 1: - return 'Video changed state while we were updating - maybe it was reset?', 403 + # Include headers and footers + new_row['video_title'] = app.title_header + new_row['video_title'] + description_lines = [] + if playlists: + # NOTE: If you change this format, you need to also change the regex that matches this + # on the GET handler. + description_lines.append(DESCRIPTION_PLAYLISTS_HEADER) + description_lines += [ + "- {} [https://youtube.com/playlist?list={}]".format(playlist.name, playlist.playlist_id) + for playlist in playlists + ] + description_lines.append('') # blank line before footer + description_lines.append(app.description_footer) + new_row['video_description'] += "\n\n" + "\n".join(description_lines) + + # Validate youtube requirements on title and description + if len(new_row['video_title']) > MAX_TITLE_LENGTH: + return 'Title must be {} characters or less, including prefix'.format(MAX_TITLE_LENGTH), 400 + if len(new_row['video_description']) > MAX_DESCRIPTION_LENGTH: + return 'Description must be {} characters or less, including footer'.format(MAX_DESCRIPTION_LENGTH), 400 + for char in ['<', '>']: + if char in new_row['video_title']: + return 'Title may not contain a {} character'.format(char), 400 + if char in new_row['video_description']: + return 'Description may not contain a {} character'.format(char), 400 + # Validate and convert video ranges and transitions. + num_ranges = len(new_row['video_ranges']) + if num_ranges == 0: + return 'Ranges must contain at least one range', 400 + if len(new_row['video_transitions']) != num_ranges - 1: + return 'There must be exactly {} transitions for {} ranges'.format( + num_ranges - 1, num_ranges, + ) + for start, end in new_row['video_ranges']: + if start > end: + return 'Range start must be less than end', 400 + # We need these to be tuples not lists for psycopg2 to do the right thing, + # but since they come in as JSON they are currently lists. + new_row['video_ranges'] = [tuple(range) for range in new_row['video_ranges']] + new_row['video_transitions'] = [ + None if transition is None else tuple(transition) + for transition in new_row['video_transitions'] + ] - else: - # handle state columns - if new_row['state'] == 'EDITED': + # Convert binary fields from base64 and do basic validation of contents + if new_row.get('thumbnail_image') is not None: + if new_row['thumbnail_mode'] != 'CUSTOM': + return 'Can only upload custom image when thumbnail_mode = "CUSTOM"', 400 + try: + new_row['thumbnail_image'] = base64.b64decode(new_row['thumbnail_image']) + except binascii.Error: + return 'thumbnail_image must be valid base64', 400 + # check for PNG file header + if not new_row['thumbnail_image'].startswith(b'\x89PNG\r\n\x1a\n'): + return 'thumbnail_image must be a PNG', 400 + + if new_row['state'] == 'MODIFIED': + if old_row['state'] not in ['DONE', 'MODIFIED']: + return 'Video is in state {} and cannot be modified'.format(old_row['state']), 403 + elif old_row['state'] not in ['UNEDITED', 'EDITED', 'CLAIMED']: + return 'Video already published', 403 + + # check whether row has been changed in the sheet since editing has begun + changes = '' + for column in sheet_columns: + if column == "event_end": + # convert (dashed, value) to value + old_row[column] = old_row[column][1] + if isinstance(old_row[column], datetime.datetime): + old_row[column] = old_row[column].isoformat() + def normalize(value): + if isinstance(value, list): + return sorted(map(normalize, value)) + if value is None: + return None + return value.lower().strip() + if normalize(new_row[column]) != normalize(old_row[column]): + changes += '{}: {} => {}\n'.format(column, new_row[column], old_row[column]) + if changes and not override_changes: + return 'Sheet columns have changed since editing has begun. Please review changes\n' + changes, 409 + + if new_row['state'] == 'MODIFIED': missing = [] - for column in non_null_columns: - if new_row[column] is None: + # Modifying published rows is more limited, we ignore all other fields. + for column in set(modifiable_columns) & set(non_null_columns): + if new_row.get(column) is None: missing.append(column) if missing: - return 'Fields {} must be non-null for video to be cut'.format(', '.join(missing)), 400 - if len(new_row.get('video_title', '')) <= len(app.title_header) and not is_archive: - return 'Video title must not be blank', 400 - elif new_row['state'] != 'UNEDITED': - return 'Invalid state {}'.format(new_row['state']), 400 - new_row['uploader'] = None - new_row['error'] = None - new_row['editor'] = editor - new_row['edit_time'] = datetime.datetime.utcnow() - - # actually update database - build_query = sql.SQL(""" - UPDATE events - SET {} - WHERE id = %(id)s - AND state IN ('UNEDITED', 'EDITED', 'CLAIMED')""" - ).format(sql.SQL(", ").join( - sql.SQL("{} = {}").format( - sql.Identifier(column), database.get_column_placeholder(column), - ) for column in new_row.keys() if column not in sheet_columns - )) - result = database.query(conn, build_query, id=ident, **new_row) - if result.rowcount != 1: - return 'Video likely already published', 403 - + return 'Fields {} must be non-null for modified video'.format(', '.join(missing)), 400 + build_query = sql.SQL(""" + UPDATE events + SET last_modified = NOW(), error = NULL, state = 'MODIFIED', {} + WHERE id = %(id)s AND state IN ('DONE', 'MODIFIED') + """).format(sql.SQL(", ").join( + sql.SQL("{} = {}").format( + sql.Identifier(column), database.get_column_placeholder(column), + ) for column in set(modifiable_columns) & set(new_row) + )) + result = database.query(conn, build_query, id=ident, **new_row) + if result.rowcount != 1: + return 'Video changed state while we were updating - maybe it was reset?', 409 + + else: + # handle state columns + if new_row['state'] == 'EDITED': + missing = [] + for column in non_null_columns: + if new_row[column] is None: + missing.append(column) + if missing: + return 'Fields {} must be non-null for video to be cut'.format(', '.join(missing)), 400 + if len(new_row.get('video_title', '')) <= len(app.title_header) and not is_archive: + return 'Video title must not be blank', 400 + elif new_row['state'] != 'UNEDITED': + return 'Invalid state {}'.format(new_row['state']), 400 + new_row['uploader'] = None + new_row['error'] = None + new_row['editor'] = editor + new_row['edit_time'] = datetime.datetime.utcnow() + + # actually update database + build_query = sql.SQL(""" + UPDATE events + SET {} + WHERE id = %(id)s + AND state IN ('UNEDITED', 'EDITED', 'CLAIMED')""" + ).format(sql.SQL(", ").join( + sql.SQL("{} = {}").format( + sql.Identifier(column), database.get_column_placeholder(column), + ) for column in new_row.keys() if column not in sheet_columns + )) + result = database.query(conn, build_query, id=ident, **new_row) + if result.rowcount != 1: + return 'Video likely already published', 403 + + _write_audit_log(conn, ident, "update-row", editor, old_row, new_row) + logging.info('Row {} updated to state {}'.format(ident, new_row['state'])) return '' @@ -474,7 +484,7 @@ def update_row(ident, editor=None): @request_stats @authenticate def manual_link(ident, editor=None): - """Manually set a video_link if the state is 'UNEDITED' or 'DONE' and the + """Manually set a video_link if the state is 'UNEDITED' or 'DONE' and the upload_location is 'manual' or 'youtube-manual'.""" link = flask.request.json['link'] upload_location = flask.request.json.get('upload_location', 'manual') @@ -490,28 +500,40 @@ def manual_link(ident, editor=None): else: return 'Upload location must be "manual" or "youtube-manual"', 400 - conn = app.db_manager.get_conn() - results = database.query(conn, """ - SELECT id, state - FROM events - WHERE id = %s""", ident) - old_row = results.fetchone() - if old_row is None: - return 'Row {} not found'.format(ident), 404 - if old_row.state != 'UNEDITED': - return 'Invalid state {} for manual video link'.format(old_row.state), 403 - now = datetime.datetime.utcnow() - # note we force thumbnail mode of manual uploads to always be NONE, - # since they might not be a video we actually control at all, or might not even be on youtube. - results = database.query(conn, """ - UPDATE events - SET state='DONE', upload_location = %s, video_link = %s, video_id = %s, - editor = %s, edit_time = %s, upload_time = %s, thumbnail_mode = 'NONE' - WHERE id = %s AND state = 'UNEDITED' - """, upload_location, link, video_id, editor, now, now, ident) + with app.db_manager.get_conn() as conn: + results = database.query(conn, """ + SELECT id, state + FROM events + WHERE id = %s + FOR UPDATE + """, ident) + old_row = results.fetchone() + if old_row is None: + return 'Row {} not found'.format(ident), 404 + if old_row.state != 'UNEDITED': + return 'Invalid state {} for manual video link'.format(old_row.state), 403 + now = datetime.datetime.utcnow() + # note we force thumbnail mode of manual uploads to always be NONE, + # since they might not be a video we actually control at all, or might not even be on youtube. + result = database.query(conn, """ + UPDATE events + SET state='DONE', upload_location = %s, video_link = %s, video_id = %s, + editor = %s, edit_time = %s, upload_time = %s, thumbnail_mode = 'NONE' + WHERE id = %s AND state = 'UNEDITED' + """, upload_location, link, video_id, editor, now, now, ident) + if result.rowcount != 1: + return 'Video changed state while we were updating - maybe it was reset?', 409 + _write_audit_log(conn, ident, "manual-link", editor, new_row={ + "state": "DONE", + "upload_location": upload_location, + "video_link": link, + "video_id": video_id, + "editor": editor, + "thumbnail_mode": None, + }) logging.info("Row {} video_link set to {}".format(ident, link)) - return '' - + return '' + @app.route('/thrimshim/reset/', methods=['POST']) @request_stats @@ -523,23 +545,31 @@ def reset_row(ident, editor=None): (state is UNEDITED, EDITED or CLAIMED) """ force = (flask.request.args.get('force', '').lower() == "true") - conn = app.db_manager.get_conn() - query = """ - UPDATE events - SET state='UNEDITED', error = NULL, video_id = NULL, video_link = NULL, - uploader = NULL, editor = NULL, edit_time = NULL, upload_time = NULL, - last_modified = NULL - WHERE id = %s {} - """.format( - "" if force else "AND state IN ('UNEDITED', 'EDITED', 'CLAIMED')", - ) - results = database.query(conn, query, ident) - if results.rowcount != 1: - return 'Row id = {} not found or not in cancellable state'.format(ident), 404 + with app.db_manager.get_conn() as conn: + query = """ + UPDATE events + SET state='UNEDITED', error = NULL, video_id = NULL, video_link = NULL, + uploader = NULL, editor = NULL, edit_time = NULL, upload_time = NULL, + last_modified = NULL + WHERE id = %s {} + """.format( + "" if force else "AND state IN ('UNEDITED', 'EDITED', 'CLAIMED')", + ) + results = database.query(conn, query, ident) + if results.rowcount != 1: + return 'Row id = {} not found or not in cancellable state'.format(ident), 404 + _write_audit_log(conn, ident, "reset-row", editor) logging.info("Row {} reset to 'UNEDITED'".format(ident)) return '' +def _write_audit_log(conn, ident, api_action, editor, old_row=None, new_row=None): + database.query(conn, """ + INSERT INTO events_edits_audit_log (id, api_action, editor, old_data, new_data) + VALUES (%(id)s, %(api_action)s, %(editor)s, %(old_row)s, %(new_row)s) + """, id=ident, api_action=api_action, editor=editor, old_row=to_json(old_row), new_row=to_json(new_row)) + + @app.route('/thrimshim/bus/') @request_stats def get_odometer(channel):