Tagging API

borken-flask
HeNine 3 years ago
parent 2461924d9f
commit 6e81bbf629

@ -1,13 +1,31 @@
DROP TABLE buscribe_transcriptions; BEGIN TRANSACTION;
DROP TABLE IF EXISTS buscribe_verified_lines;
DROP TABLE IF EXISTS buscribe_line_speakers;
DROP TABLE IF EXISTS buscribe_speakers;
DROP TABLE IF EXISTS buscribe_verifiers;
DROP TABLE IF EXISTS buscribe_transcriptions;
ROLLBACK;
BEGIN TRANSACTION;
TRUNCATE buscribe_verified_lines RESTART IDENTITY CASCADE;
TRUNCATE buscribe_line_speakers RESTART IDENTITY CASCADE;
TRUNCATE buscribe_speakers RESTART IDENTITY CASCADE;
TRUNCATE buscribe_verifiers RESTART IDENTITY CASCADE;
TRUNCATE buscribe_transcriptions RESTART IDENTITY CASCADE;
ROLLBACK;
CREATE TABLE buscribe_transcriptions CREATE TABLE buscribe_transcriptions
( (
id BIGSERIAL PRIMARY KEY, id BIGSERIAL PRIMARY KEY,
start_time timestamp without time zone NOT NULL, start_time timestamp without time zone NOT NULL,
end_time timestamp without time zone NOT NULL, end_time timestamp without time zone NOT NULL,
transcription_line text NOT NULL, transcription_line text NOT NULL,
line_speaker float[128], line_speaker float[128],
transcription_json jsonb NOT NULL transcription_json jsonb NOT NULL
); );
CREATE INDEX buscribe_transcriptions_idx ON buscribe_transcriptions USING CREATE INDEX buscribe_transcriptions_idx ON buscribe_transcriptions USING
@ -15,4 +33,39 @@ CREATE INDEX buscribe_transcriptions_idx ON buscribe_transcriptions USING
-- This might not actually be needed. Check once there is more data. -- This might not actually be needed. Check once there is more data.
CREATE INDEX buscribe_start_time_idx ON buscribe_transcriptions (start_time); CREATE INDEX buscribe_start_time_idx ON buscribe_transcriptions (start_time);
CREATE INDEX buscribe_end_time_idx ON buscribe_transcriptions (end_time); CREATE INDEX buscribe_end_time_idx ON buscribe_transcriptions (end_time);
CREATE TABLE buscribe_speakers
(
id BIGSERIAL PRIMARY KEY,
name text NOT NULL UNIQUE
);
CREATE TABLE buscribe_verifiers
(
id SERIAL PRIMARY KEY,
email TEXT NOT NULL,
name TEXT NOT NULL
);
-- For testing
INSERT INTO buscribe_verifiers(email, name)
VALUES ('placeholder@example.com', 'Place Holder');
CREATE TABLE buscribe_line_speakers
(
-- id BIGSERIAL PRIMARY KEY,
line BIGINT NOT NULL REFERENCES buscribe_transcriptions,
speaker BIGINT NOT NULL REFERENCES buscribe_speakers,
verifier INT NOT NULL REFERENCES buscribe_verifiers,
PRIMARY KEY (line, speaker, verifier)
);
CREATE TABLE buscribe_verified_lines
(
-- id BIGSERIAL PRIMARY KEY,
line BIGINT NOT NULL REFERENCES buscribe_transcriptions,
verified_line TEXT NOT NULL,
verifier INT REFERENCES buscribe_verifiers,
PRIMARY KEY (line, verifier)
);

@ -0,0 +1,12 @@
import logging
import os
import argh
from professor_api.main import main
LOG_FORMAT = "[%(asctime)s] %(levelname)8s %(name)s(%(module)s:%(lineno)d): %(message)s"
level = os.environ.get('WUBLOADER_LOG_LEVEL', 'INFO').upper()
logging.basicConfig(level=level, format=LOG_FORMAT)
argh.dispatch_command(main)

@ -0,0 +1,78 @@
import logging
import argh
import gevent
from common import dateutil
from common.database import DBManager
from dateutil.parser import ParserError
from gevent.pywsgi import WSGIServer
from professor_api.professor_api import app
def cors(app):
"""WSGI middleware that sets CORS headers"""
HEADERS = [
("Access-Control-Allow-Credentials", "false"),
("Access-Control-Allow-Headers", "*"),
("Access-Control-Allow-Methods", "GET,HEAD"),
("Access-Control-Allow-Origin", "*"),
("Access-Control-Max-Age", "86400"),
]
def handle(environ, start_response):
def _start_response(status, headers, exc_info=None):
headers += HEADERS
return start_response(status, headers, exc_info)
return app(environ, _start_response)
return handle
def servelet(server):
logging.info('Starting WSGI server.')
server.serve_forever()
@argh.arg('--host',
help='Address or socket server will listen to. Default is 0.0.0.0 (everything on the local machine).')
@argh.arg('--port',
help='Port server will listen on. Default is 8004.')
@argh.arg('--database',
help='Postgres connection string, which is either a space-separated list of key=value pairs, or a URI like: '
'postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE')
@argh.arg('--bustime-start',
help='The start time in UTC for the event, for UTC-Bustime conversion')
def main(database="", host='0.0.0.0', port=8005, bustime_start=None):
if bustime_start is None:
logging.error("Missing --bustime-start!")
exit(1)
server = WSGIServer((host, port), cors(app))
try:
app.bustime_start = dateutil.parse(bustime_start)
except ParserError:
logging.error("Invalid --bustime-start!")
exit(1)
app.db_manager = DBManager(dsn=database)
stopping = gevent.event.Event()
def stop():
logging.info("Shutting down")
stopping.set()
gevent.signal_handler(gevent.signal.SIGTERM, stop)
serve = gevent.spawn(servelet, server)
# Wait for either the stop signal or the server to oops out.
gevent.wait([serve, stopping], count=1)
server.stop()
serve.get() # Wait for server to shut down and/or re-raise if serve_forever() errored
logging.info("Gracefully shut down")

@ -0,0 +1,91 @@
import re
import flask
from common import database
from flask import jsonify, request
from psycopg2.extras import execute_values
app = flask.Flask('buscribe')
@app.route('/professor/line/<int:line_id>', methods=["GET"])
def get_line(line_id):
db_conn = app.db_manager.get_conn()
line = database.query(db_conn, "SELECT * FROM buscribe_transcriptions WHERE id = %(id)s;", id=line_id).fetchone()
if line is None:
return "Line not found.", 404
else:
return {"start_time": line.start_time.isoformat(),
"end_time": line.end_time.isoformat(),
"line_data": line.transcription_json}
@app.route('/professor/line/<int:line_id>', methods=["POST"])
def update_line(line_id):
db_conn = app.db_manager.get_conn()
if "speakers" in request.json and isinstance(request.json["speakers"], list):
# Simpler than dealing with uniqueness
database.query(db_conn,
"DELETE FROM buscribe_line_speakers WHERE line = %(line_id)s AND verifier = %(verifier)s;",
line_id=line_id, verifier=1)
execute_values(db_conn.cursor(),
"INSERT INTO buscribe_line_speakers(line, speaker, verifier) "
"VALUES %s;",
[(line_id, speaker, 1) for speaker in
request.json["speakers"]])
if "transcription" in request.json and isinstance(request.json["transcription"], str):
verified_line = request.json["transcription"].lower()
verified_line = re.sub(r"[^[a-z]\s']]", "", verified_line)
database.query(db_conn,
"DELETE FROM buscribe_verified_lines WHERE line = %(line_id)s AND verifier = %(verifier)s;",
line_id=line_id, verifier=1)
database.query(db_conn,
"INSERT INTO buscribe_verified_lines(line, verified_line, verifier) "
"VALUES (%(line)s, %(verified_line)s, %(verifier)s)",
line=line_id, verified_line=verified_line, verifier=1)
return "", 204
@app.route('/professor/speaker', methods=["GET"])
def get_speakers():
db_conn = app.db_manager.get_conn()
speakers = database.query(db_conn, "SELECT name FROM buscribe_speakers;")
return jsonify([{"id": speaker.id, "name": speaker.name} for speaker in speakers])
@app.route('/professor/speaker/<int:speaker_id>', methods=["GET"])
def get_speaker(speaker_id):
db_conn = app.db_manager.get_conn()
speaker = database.query(db_conn, "SELECT name FROM buscribe_speakers WHERE id = %(id)s;", id=speaker_id).fetchone()
if speaker is None:
return "Speaker not found.", 404
else:
return jsonify(speaker.name)
@app.route('/professor/speaker', methods=["PUT"])
def new_speaker():
name = request.json
if not isinstance(name, str):
return "Invalid name!", 400
name = name.lower()
name = re.sub(r"[^\w\s']", "", name)
db_conn = app.db_manager.get_conn()
speakers = database.query(db_conn, "INSERT INTO buscribe_speakers(name) "
"VALUES (%(name)s) "
"ON CONFLICT (name) DO UPDATE SET name=EXCLUDED.name "
"RETURNING id;", name=name)
return "", 200, {"Content-Location": f"/professor/speaker/{speakers.fetchone().id}"}
Loading…
Cancel
Save