Add Audit Logging for several endpoints

- Use transactions for DB commits to avoid audit-less logs

Endpoints Supported:
- Manual Link
- Reset Row
- Update Row
pull/413/head
ZeldaZach 4 months ago committed by Mike Lang
parent fd78ff288e
commit c378a1e4ab

@ -104,6 +104,18 @@ CREATE TABLE events (
-- Index on state, since that's almost always what we're querying on besides id
CREATE INDEX event_state ON events (state);
-- Table for recording each "edit" made to a video, written by thrimshim.
-- This is mainly a just-in-case thing so we can work out when something was changed,
-- and change it back if needed. More about accidents than security.
CREATE TABLE events_edits_audit_log (
time TIMESTAMP NOT NULL DEFAULT NOW(),
id TEXT NOT NULL,
api_action TEXT NOT NULL,
editor TEXT NOT NULL,
old_data JSONB,
new_data JSONB
);
CREATE TABLE nodes (
name TEXT PRIMARY KEY,
url TEXT NOT NULL,

@ -66,7 +66,7 @@ def authenticate(f):
if idinfo['iss'] not in ['accounts.google.com', 'https://accounts.google.com']:
raise ValueError('Wrong issuer.')
except ValueError:
return 'Invalid token. Access denied.', 403
return 'Invalid token. Access denied.', 403
# check whether user is in the database
email = idinfo['email'].lower()
@ -78,7 +78,7 @@ def authenticate(f):
row = results.fetchone()
if row is None:
return 'Unknown user. Access denied.', 403
return f(*args, editor=email, **kwargs)
return auth_wrapper
@ -128,14 +128,14 @@ def get_all_rows():
}
rows.append(row)
logging.info('All rows fetched')
return json.dumps(rows)
return to_json(rows)
@app.route('/thrimshim/defaults')
@request_stats
def get_defaults():
"""Get default info needed by thrimbletrimmer when not loading a specific row."""
return json.dumps({
return to_json({
"video_channel": app.default_channel,
"bustime_start": app.bustime_start,
"title_prefix": app.title_header,
@ -159,6 +159,18 @@ def get_transitions():
]
def to_json(obj):
def convert(value):
if isinstance(value, datetime.datetime):
return value.isoformat()
if isinstance(value, datetime.timedelta):
return value.total_seconds()
if isinstance(value, memoryview) or isinstance(value, bytes):
return base64.b64encode(bytes(value)).decode()
raise TypeError(f"Can't convert object of type {value.__class__.__name__} to JSON: {value}")
return json.dumps(obj, default=convert)
@app.route('/thrimshim/<ident>', methods=['GET'])
@request_stats
def get_row(ident):
@ -177,7 +189,7 @@ def get_row(ident):
is_archive = response["sheet_name"] == app.archive_sheet
response['id'] = str(response['id'])
response['id'] = str(response['id'])
if response["video_channel"] is None:
response["video_channel"] = app.default_channel
# Unwrap end time (dashed, value) to just value
@ -247,15 +259,7 @@ def get_row(ident):
logging.info('Row {} fetched'.format(ident))
def convert(value):
if isinstance(value, datetime.datetime):
return value.isoformat()
if isinstance(value, datetime.timedelta):
return value.total_seconds()
if isinstance(value, memoryview) or isinstance(value, bytes):
return base64.b64encode(bytes(value)).decode()
raise TypeError(f"Can't convert object of type {value.__class__.__name__} to JSON: {value}")
return json.dumps(response, default=convert)
return to_json(response)
@app.route('/thrimshim/<ident>', methods=['POST'])
@ -265,7 +269,7 @@ def update_row(ident, editor=None):
"""Updates row of database with id = ident with the edit columns in new_row."""
new_row = flask.request.json
override_changes = new_row.get('override_changes', False)
state_columns = ['state', 'uploader', 'error', 'video_link']
state_columns = ['state', 'uploader', 'error', 'video_link']
# These have to be set before a video can be set as 'EDITED'
non_null_columns = [
'upload_location', 'video_ranges', 'video_transitions',
@ -297,175 +301,181 @@ def update_row(ident, editor=None):
for extra in extras:
del new_row[extra]
# Check a row with id = ident is in the database
conn = app.db_manager.get_conn()
built_query = sql.SQL("""
SELECT id, state, {}
FROM events
WHERE id = %s
""").format(sql.SQL(', ').join(
sql.Identifier(key) for key in sheet_columns
))
results = database.query(conn, built_query, ident)
old_row = results.fetchone()._asdict()
if old_row is None:
return 'Row {} not found'.format(ident), 404
assert old_row['id'] == ident
is_archive = old_row["sheet_name"] == app.archive_sheet
# archive events skip title and description munging
if not is_archive:
playlists = database.query(conn, """
SELECT playlist_id, name, tags
FROM playlists
WHERE show_in_description AND tags IS NOT NULL AND playlist_id IS NOT NULL
""")
# Filter for matching playlists for this video
playlists = [
playlist for playlist in playlists
if all(
tag.lower() in [t.lower() for t in old_row['tags']]
for tag in playlist.tags
)
]
# Include headers and footers
new_row['video_title'] = app.title_header + new_row['video_title']
description_lines = []
if playlists:
# NOTE: If you change this format, you need to also change the regex that matches this
# on the GET handler.
description_lines.append(DESCRIPTION_PLAYLISTS_HEADER)
description_lines += [
"- {} [https://youtube.com/playlist?list={}]".format(playlist.name, playlist.playlist_id)
for playlist in playlists
# Everything that follows happens in a single transaction
with app.db_manager.get_conn() as conn:
# Check a row with id = ident is in the database.
# Lock the row to prevent concurrent updates while we check the transition validity.
built_query = sql.SQL("""
SELECT id, state, {}
FROM events
WHERE id = %s
FOR UPDATE
""").format(sql.SQL(', ').join(
sql.Identifier(key) for key in sheet_columns
))
results = database.query(conn, built_query, ident)
old_row = results.fetchone()._asdict()
if old_row is None:
return 'Row {} not found'.format(ident), 404
assert old_row['id'] == ident
is_archive = old_row["sheet_name"] == app.archive_sheet
# archive events skip title and description munging
if not is_archive:
playlists = database.query(conn, """
SELECT playlist_id, name, tags
FROM playlists
WHERE show_in_description AND tags IS NOT NULL AND playlist_id IS NOT NULL
""")
# Filter for matching playlists for this video
playlists = [
playlist for playlist in playlists
if all(
tag.lower() in [t.lower() for t in old_row['tags']]
for tag in playlist.tags
)
]
description_lines.append('') # blank line before footer
description_lines.append(app.description_footer)
new_row['video_description'] += "\n\n" + "\n".join(description_lines)
# Validate youtube requirements on title and description
if len(new_row['video_title']) > MAX_TITLE_LENGTH:
return 'Title must be {} characters or less, including prefix'.format(MAX_TITLE_LENGTH), 400
if len(new_row['video_description']) > MAX_DESCRIPTION_LENGTH:
return 'Description must be {} characters or less, including footer'.format(MAX_DESCRIPTION_LENGTH), 400
for char in ['<', '>']:
if char in new_row['video_title']:
return 'Title may not contain a {} character'.format(char), 400
if char in new_row['video_description']:
return 'Description may not contain a {} character'.format(char), 400
# Validate and convert video ranges and transitions.
num_ranges = len(new_row['video_ranges'])
if num_ranges == 0:
return 'Ranges must contain at least one range', 400
if len(new_row['video_transitions']) != num_ranges - 1:
return 'There must be exactly {} transitions for {} ranges'.format(
num_ranges - 1, num_ranges,
)
for start, end in new_row['video_ranges']:
if start > end:
return 'Range start must be less than end', 400
# We need these to be tuples not lists for psycopg2 to do the right thing,
# but since they come in as JSON they are currently lists.
new_row['video_ranges'] = [tuple(range) for range in new_row['video_ranges']]
new_row['video_transitions'] = [
None if transition is None else tuple(transition)
for transition in new_row['video_transitions']
]
# Convert binary fields from base64 and do basic validation of contents
if new_row.get('thumbnail_image') is not None:
if new_row['thumbnail_mode'] != 'CUSTOM':
return 'Can only upload custom image when thumbnail_mode = "CUSTOM"', 400
try:
new_row['thumbnail_image'] = base64.b64decode(new_row['thumbnail_image'])
except binascii.Error:
return 'thumbnail_image must be valid base64', 400
# check for PNG file header
if not new_row['thumbnail_image'].startswith(b'\x89PNG\r\n\x1a\n'):
return 'thumbnail_image must be a PNG', 400
if new_row['state'] == 'MODIFIED':
if old_row['state'] not in ['DONE', 'MODIFIED']:
return 'Video is in state {} and cannot be modified'.format(old_row['state']), 403
elif old_row['state'] not in ['UNEDITED', 'EDITED', 'CLAIMED']:
return 'Video already published', 403
# check whether row has been changed in the sheet since editing has begun
changes = ''
for column in sheet_columns:
if column == "event_end":
# convert (dashed, value) to value
old_row[column] = old_row[column][1]
if isinstance(old_row[column], datetime.datetime):
old_row[column] = old_row[column].isoformat()
def normalize(value):
if isinstance(value, list):
return sorted(map(normalize, value))
if value is None:
return None
return value.lower().strip()
if normalize(new_row[column]) != normalize(old_row[column]):
changes += '{}: {} => {}\n'.format(column, new_row[column], old_row[column])
if changes and not override_changes:
return 'Sheet columns have changed since editing has begun. Please review changes\n' + changes, 409
if new_row['state'] == 'MODIFIED':
missing = []
# Modifying published rows is more limited, we ignore all other fields.
for column in set(modifiable_columns) & set(non_null_columns):
if new_row.get(column) is None:
missing.append(column)
if missing:
return 'Fields {} must be non-null for modified video'.format(', '.join(missing)), 400
build_query = sql.SQL("""
UPDATE events
SET last_modified = NOW(), error = NULL, state = 'MODIFIED', {}
WHERE id = %(id)s AND state IN ('DONE', 'MODIFIED')
""").format(sql.SQL(", ").join(
sql.SQL("{} = {}").format(
sql.Identifier(column), database.get_column_placeholder(column),
) for column in set(modifiable_columns) & set(new_row)
))
result = database.query(conn, build_query, id=ident, **new_row)
if result.rowcount != 1:
return 'Video changed state while we were updating - maybe it was reset?', 403
# Include headers and footers
new_row['video_title'] = app.title_header + new_row['video_title']
description_lines = []
if playlists:
# NOTE: If you change this format, you need to also change the regex that matches this
# on the GET handler.
description_lines.append(DESCRIPTION_PLAYLISTS_HEADER)
description_lines += [
"- {} [https://youtube.com/playlist?list={}]".format(playlist.name, playlist.playlist_id)
for playlist in playlists
]
description_lines.append('') # blank line before footer
description_lines.append(app.description_footer)
new_row['video_description'] += "\n\n" + "\n".join(description_lines)
# Validate youtube requirements on title and description
if len(new_row['video_title']) > MAX_TITLE_LENGTH:
return 'Title must be {} characters or less, including prefix'.format(MAX_TITLE_LENGTH), 400
if len(new_row['video_description']) > MAX_DESCRIPTION_LENGTH:
return 'Description must be {} characters or less, including footer'.format(MAX_DESCRIPTION_LENGTH), 400
for char in ['<', '>']:
if char in new_row['video_title']:
return 'Title may not contain a {} character'.format(char), 400
if char in new_row['video_description']:
return 'Description may not contain a {} character'.format(char), 400
# Validate and convert video ranges and transitions.
num_ranges = len(new_row['video_ranges'])
if num_ranges == 0:
return 'Ranges must contain at least one range', 400
if len(new_row['video_transitions']) != num_ranges - 1:
return 'There must be exactly {} transitions for {} ranges'.format(
num_ranges - 1, num_ranges,
)
for start, end in new_row['video_ranges']:
if start > end:
return 'Range start must be less than end', 400
# We need these to be tuples not lists for psycopg2 to do the right thing,
# but since they come in as JSON they are currently lists.
new_row['video_ranges'] = [tuple(range) for range in new_row['video_ranges']]
new_row['video_transitions'] = [
None if transition is None else tuple(transition)
for transition in new_row['video_transitions']
]
else:
# handle state columns
if new_row['state'] == 'EDITED':
# Convert binary fields from base64 and do basic validation of contents
if new_row.get('thumbnail_image') is not None:
if new_row['thumbnail_mode'] != 'CUSTOM':
return 'Can only upload custom image when thumbnail_mode = "CUSTOM"', 400
try:
new_row['thumbnail_image'] = base64.b64decode(new_row['thumbnail_image'])
except binascii.Error:
return 'thumbnail_image must be valid base64', 400
# check for PNG file header
if not new_row['thumbnail_image'].startswith(b'\x89PNG\r\n\x1a\n'):
return 'thumbnail_image must be a PNG', 400
if new_row['state'] == 'MODIFIED':
if old_row['state'] not in ['DONE', 'MODIFIED']:
return 'Video is in state {} and cannot be modified'.format(old_row['state']), 403
elif old_row['state'] not in ['UNEDITED', 'EDITED', 'CLAIMED']:
return 'Video already published', 403
# check whether row has been changed in the sheet since editing has begun
changes = ''
for column in sheet_columns:
if column == "event_end":
# convert (dashed, value) to value
old_row[column] = old_row[column][1]
if isinstance(old_row[column], datetime.datetime):
old_row[column] = old_row[column].isoformat()
def normalize(value):
if isinstance(value, list):
return sorted(map(normalize, value))
if value is None:
return None
return value.lower().strip()
if normalize(new_row[column]) != normalize(old_row[column]):
changes += '{}: {} => {}\n'.format(column, new_row[column], old_row[column])
if changes and not override_changes:
return 'Sheet columns have changed since editing has begun. Please review changes\n' + changes, 409
if new_row['state'] == 'MODIFIED':
missing = []
for column in non_null_columns:
if new_row[column] is None:
# Modifying published rows is more limited, we ignore all other fields.
for column in set(modifiable_columns) & set(non_null_columns):
if new_row.get(column) is None:
missing.append(column)
if missing:
return 'Fields {} must be non-null for video to be cut'.format(', '.join(missing)), 400
if len(new_row.get('video_title', '')) <= len(app.title_header) and not is_archive:
return 'Video title must not be blank', 400
elif new_row['state'] != 'UNEDITED':
return 'Invalid state {}'.format(new_row['state']), 400
new_row['uploader'] = None
new_row['error'] = None
new_row['editor'] = editor
new_row['edit_time'] = datetime.datetime.utcnow()
# actually update database
build_query = sql.SQL("""
UPDATE events
SET {}
WHERE id = %(id)s
AND state IN ('UNEDITED', 'EDITED', 'CLAIMED')"""
).format(sql.SQL(", ").join(
sql.SQL("{} = {}").format(
sql.Identifier(column), database.get_column_placeholder(column),
) for column in new_row.keys() if column not in sheet_columns
))
result = database.query(conn, build_query, id=ident, **new_row)
if result.rowcount != 1:
return 'Video likely already published', 403
return 'Fields {} must be non-null for modified video'.format(', '.join(missing)), 400
build_query = sql.SQL("""
UPDATE events
SET last_modified = NOW(), error = NULL, state = 'MODIFIED', {}
WHERE id = %(id)s AND state IN ('DONE', 'MODIFIED')
""").format(sql.SQL(", ").join(
sql.SQL("{} = {}").format(
sql.Identifier(column), database.get_column_placeholder(column),
) for column in set(modifiable_columns) & set(new_row)
))
result = database.query(conn, build_query, id=ident, **new_row)
if result.rowcount != 1:
return 'Video changed state while we were updating - maybe it was reset?', 409
else:
# handle state columns
if new_row['state'] == 'EDITED':
missing = []
for column in non_null_columns:
if new_row[column] is None:
missing.append(column)
if missing:
return 'Fields {} must be non-null for video to be cut'.format(', '.join(missing)), 400
if len(new_row.get('video_title', '')) <= len(app.title_header) and not is_archive:
return 'Video title must not be blank', 400
elif new_row['state'] != 'UNEDITED':
return 'Invalid state {}'.format(new_row['state']), 400
new_row['uploader'] = None
new_row['error'] = None
new_row['editor'] = editor
new_row['edit_time'] = datetime.datetime.utcnow()
# actually update database
build_query = sql.SQL("""
UPDATE events
SET {}
WHERE id = %(id)s
AND state IN ('UNEDITED', 'EDITED', 'CLAIMED')"""
).format(sql.SQL(", ").join(
sql.SQL("{} = {}").format(
sql.Identifier(column), database.get_column_placeholder(column),
) for column in new_row.keys() if column not in sheet_columns
))
result = database.query(conn, build_query, id=ident, **new_row)
if result.rowcount != 1:
return 'Video likely already published', 403
_write_audit_log(conn, ident, "update-row", editor, old_row, new_row)
logging.info('Row {} updated to state {}'.format(ident, new_row['state']))
return ''
@ -474,7 +484,7 @@ def update_row(ident, editor=None):
@request_stats
@authenticate
def manual_link(ident, editor=None):
"""Manually set a video_link if the state is 'UNEDITED' or 'DONE' and the
"""Manually set a video_link if the state is 'UNEDITED' or 'DONE' and the
upload_location is 'manual' or 'youtube-manual'."""
link = flask.request.json['link']
upload_location = flask.request.json.get('upload_location', 'manual')
@ -490,28 +500,40 @@ def manual_link(ident, editor=None):
else:
return 'Upload location must be "manual" or "youtube-manual"', 400
conn = app.db_manager.get_conn()
results = database.query(conn, """
SELECT id, state
FROM events
WHERE id = %s""", ident)
old_row = results.fetchone()
if old_row is None:
return 'Row {} not found'.format(ident), 404
if old_row.state != 'UNEDITED':
return 'Invalid state {} for manual video link'.format(old_row.state), 403
now = datetime.datetime.utcnow()
# note we force thumbnail mode of manual uploads to always be NONE,
# since they might not be a video we actually control at all, or might not even be on youtube.
results = database.query(conn, """
UPDATE events
SET state='DONE', upload_location = %s, video_link = %s, video_id = %s,
editor = %s, edit_time = %s, upload_time = %s, thumbnail_mode = 'NONE'
WHERE id = %s AND state = 'UNEDITED'
""", upload_location, link, video_id, editor, now, now, ident)
with app.db_manager.get_conn() as conn:
results = database.query(conn, """
SELECT id, state
FROM events
WHERE id = %s
FOR UPDATE
""", ident)
old_row = results.fetchone()
if old_row is None:
return 'Row {} not found'.format(ident), 404
if old_row.state != 'UNEDITED':
return 'Invalid state {} for manual video link'.format(old_row.state), 403
now = datetime.datetime.utcnow()
# note we force thumbnail mode of manual uploads to always be NONE,
# since they might not be a video we actually control at all, or might not even be on youtube.
result = database.query(conn, """
UPDATE events
SET state='DONE', upload_location = %s, video_link = %s, video_id = %s,
editor = %s, edit_time = %s, upload_time = %s, thumbnail_mode = 'NONE'
WHERE id = %s AND state = 'UNEDITED'
""", upload_location, link, video_id, editor, now, now, ident)
if result.rowcount != 1:
return 'Video changed state while we were updating - maybe it was reset?', 409
_write_audit_log(conn, ident, "manual-link", editor, new_row={
"state": "DONE",
"upload_location": upload_location,
"video_link": link,
"video_id": video_id,
"editor": editor,
"thumbnail_mode": None,
})
logging.info("Row {} video_link set to {}".format(ident, link))
return ''
return ''
@app.route('/thrimshim/reset/<ident>', methods=['POST'])
@request_stats
@ -523,23 +545,31 @@ def reset_row(ident, editor=None):
(state is UNEDITED, EDITED or CLAIMED)
"""
force = (flask.request.args.get('force', '').lower() == "true")
conn = app.db_manager.get_conn()
query = """
UPDATE events
SET state='UNEDITED', error = NULL, video_id = NULL, video_link = NULL,
uploader = NULL, editor = NULL, edit_time = NULL, upload_time = NULL,
last_modified = NULL
WHERE id = %s {}
""".format(
"" if force else "AND state IN ('UNEDITED', 'EDITED', 'CLAIMED')",
)
results = database.query(conn, query, ident)
if results.rowcount != 1:
return 'Row id = {} not found or not in cancellable state'.format(ident), 404
with app.db_manager.get_conn() as conn:
query = """
UPDATE events
SET state='UNEDITED', error = NULL, video_id = NULL, video_link = NULL,
uploader = NULL, editor = NULL, edit_time = NULL, upload_time = NULL,
last_modified = NULL
WHERE id = %s {}
""".format(
"" if force else "AND state IN ('UNEDITED', 'EDITED', 'CLAIMED')",
)
results = database.query(conn, query, ident)
if results.rowcount != 1:
return 'Row id = {} not found or not in cancellable state'.format(ident), 404
_write_audit_log(conn, ident, "reset-row", editor)
logging.info("Row {} reset to 'UNEDITED'".format(ident))
return ''
def _write_audit_log(conn, ident, api_action, editor, old_row=None, new_row=None):
database.query(conn, """
INSERT INTO events_edits_audit_log (id, api_action, editor, old_data, new_data)
VALUES (%(id)s, %(api_action)s, %(editor)s, %(old_row)s, %(new_row)s)
""", id=ident, api_action=api_action, editor=editor, old_row=to_json(old_row), new_row=to_json(new_row))
@app.route('/thrimshim/bus/<channel>')
@request_stats
def get_odometer(channel):

Loading…
Cancel
Save