diff --git a/buscribe_data.sql b/buscribe_data.sql index c97cf73..d99273a 100644 --- a/buscribe_data.sql +++ b/buscribe_data.sql @@ -1,13 +1,31 @@ -DROP TABLE buscribe_transcriptions; +BEGIN TRANSACTION; + +DROP TABLE IF EXISTS buscribe_verified_lines; +DROP TABLE IF EXISTS buscribe_line_speakers; +DROP TABLE IF EXISTS buscribe_speakers; +DROP TABLE IF EXISTS buscribe_verifiers; +DROP TABLE IF EXISTS buscribe_transcriptions; + +ROLLBACK; + +BEGIN TRANSACTION; + +TRUNCATE buscribe_verified_lines RESTART IDENTITY CASCADE; +TRUNCATE buscribe_line_speakers RESTART IDENTITY CASCADE; +TRUNCATE buscribe_speakers RESTART IDENTITY CASCADE; +TRUNCATE buscribe_verifiers RESTART IDENTITY CASCADE; +TRUNCATE buscribe_transcriptions RESTART IDENTITY CASCADE; + +ROLLBACK; CREATE TABLE buscribe_transcriptions ( id BIGSERIAL PRIMARY KEY, start_time timestamp without time zone NOT NULL, end_time timestamp without time zone NOT NULL, - transcription_line text NOT NULL, + transcription_line text NOT NULL, line_speaker float[128], - transcription_json jsonb NOT NULL + transcription_json jsonb NOT NULL ); CREATE INDEX buscribe_transcriptions_idx ON buscribe_transcriptions USING @@ -15,4 +33,39 @@ CREATE INDEX buscribe_transcriptions_idx ON buscribe_transcriptions USING -- This might not actually be needed. Check once there is more data. CREATE INDEX buscribe_start_time_idx ON buscribe_transcriptions (start_time); -CREATE INDEX buscribe_end_time_idx ON buscribe_transcriptions (end_time); \ No newline at end of file +CREATE INDEX buscribe_end_time_idx ON buscribe_transcriptions (end_time); + +CREATE TABLE buscribe_speakers +( + id BIGSERIAL PRIMARY KEY, + name text NOT NULL UNIQUE +); + +CREATE TABLE buscribe_verifiers +( + id SERIAL PRIMARY KEY, + email TEXT NOT NULL, + name TEXT NOT NULL +); + +-- For testing +INSERT INTO buscribe_verifiers(email, name) +VALUES ('placeholder@example.com', 'Place Holder'); + +CREATE TABLE buscribe_line_speakers +( +-- id BIGSERIAL PRIMARY KEY, + line BIGINT NOT NULL REFERENCES buscribe_transcriptions, + speaker BIGINT NOT NULL REFERENCES buscribe_speakers, + verifier INT NOT NULL REFERENCES buscribe_verifiers, + PRIMARY KEY (line, speaker, verifier) +); + +CREATE TABLE buscribe_verified_lines +( +-- id BIGSERIAL PRIMARY KEY, + line BIGINT NOT NULL REFERENCES buscribe_transcriptions, + verified_line TEXT NOT NULL, + verifier INT REFERENCES buscribe_verifiers, + PRIMARY KEY (line, verifier) +); diff --git a/professor-api/professor_api/__init__.py b/professor-api/professor_api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/professor-api/professor_api/__main__.py b/professor-api/professor_api/__main__.py new file mode 100644 index 0000000..2acd3ce --- /dev/null +++ b/professor-api/professor_api/__main__.py @@ -0,0 +1,12 @@ +import logging +import os + +import argh + +from professor_api.main import main + +LOG_FORMAT = "[%(asctime)s] %(levelname)8s %(name)s(%(module)s:%(lineno)d): %(message)s" + +level = os.environ.get('WUBLOADER_LOG_LEVEL', 'INFO').upper() +logging.basicConfig(level=level, format=LOG_FORMAT) +argh.dispatch_command(main) diff --git a/professor-api/professor_api/main.py b/professor-api/professor_api/main.py new file mode 100644 index 0000000..ee5b8a3 --- /dev/null +++ b/professor-api/professor_api/main.py @@ -0,0 +1,78 @@ +import logging + +import argh +import gevent +from common import dateutil +from common.database import DBManager +from dateutil.parser import ParserError +from gevent.pywsgi import WSGIServer + +from professor_api.professor_api import app + + +def cors(app): + """WSGI middleware that sets CORS headers""" + HEADERS = [ + ("Access-Control-Allow-Credentials", "false"), + ("Access-Control-Allow-Headers", "*"), + ("Access-Control-Allow-Methods", "GET,HEAD"), + ("Access-Control-Allow-Origin", "*"), + ("Access-Control-Max-Age", "86400"), + ] + + def handle(environ, start_response): + def _start_response(status, headers, exc_info=None): + headers += HEADERS + return start_response(status, headers, exc_info) + + return app(environ, _start_response) + + return handle + + +def servelet(server): + logging.info('Starting WSGI server.') + server.serve_forever() + + +@argh.arg('--host', + help='Address or socket server will listen to. Default is 0.0.0.0 (everything on the local machine).') +@argh.arg('--port', + help='Port server will listen on. Default is 8004.') +@argh.arg('--database', + help='Postgres connection string, which is either a space-separated list of key=value pairs, or a URI like: ' + 'postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE') +@argh.arg('--bustime-start', + help='The start time in UTC for the event, for UTC-Bustime conversion') +def main(database="", host='0.0.0.0', port=8005, bustime_start=None): + if bustime_start is None: + logging.error("Missing --bustime-start!") + exit(1) + + server = WSGIServer((host, port), cors(app)) + + try: + app.bustime_start = dateutil.parse(bustime_start) + except ParserError: + logging.error("Invalid --bustime-start!") + exit(1) + + app.db_manager = DBManager(dsn=database) + + stopping = gevent.event.Event() + + def stop(): + logging.info("Shutting down") + stopping.set() + + gevent.signal_handler(gevent.signal.SIGTERM, stop) + + serve = gevent.spawn(servelet, server) + + # Wait for either the stop signal or the server to oops out. + gevent.wait([serve, stopping], count=1) + + server.stop() + serve.get() # Wait for server to shut down and/or re-raise if serve_forever() errored + + logging.info("Gracefully shut down") diff --git a/professor-api/professor_api/professor_api.py b/professor-api/professor_api/professor_api.py new file mode 100644 index 0000000..e7cc032 --- /dev/null +++ b/professor-api/professor_api/professor_api.py @@ -0,0 +1,91 @@ +import re + +import flask +from common import database +from flask import jsonify, request +from psycopg2.extras import execute_values + +app = flask.Flask('buscribe') + + +@app.route('/professor/line/', methods=["GET"]) +def get_line(line_id): + db_conn = app.db_manager.get_conn() + + line = database.query(db_conn, "SELECT * FROM buscribe_transcriptions WHERE id = %(id)s;", id=line_id).fetchone() + + if line is None: + return "Line not found.", 404 + else: + return {"start_time": line.start_time.isoformat(), + "end_time": line.end_time.isoformat(), + "line_data": line.transcription_json} + + +@app.route('/professor/line/', methods=["POST"]) +def update_line(line_id): + db_conn = app.db_manager.get_conn() + + if "speakers" in request.json and isinstance(request.json["speakers"], list): + # Simpler than dealing with uniqueness + database.query(db_conn, + "DELETE FROM buscribe_line_speakers WHERE line = %(line_id)s AND verifier = %(verifier)s;", + line_id=line_id, verifier=1) + execute_values(db_conn.cursor(), + "INSERT INTO buscribe_line_speakers(line, speaker, verifier) " + "VALUES %s;", + [(line_id, speaker, 1) for speaker in + request.json["speakers"]]) + if "transcription" in request.json and isinstance(request.json["transcription"], str): + verified_line = request.json["transcription"].lower() + verified_line = re.sub(r"[^[a-z]\s']]", "", verified_line) + + database.query(db_conn, + "DELETE FROM buscribe_verified_lines WHERE line = %(line_id)s AND verifier = %(verifier)s;", + line_id=line_id, verifier=1) + database.query(db_conn, + "INSERT INTO buscribe_verified_lines(line, verified_line, verifier) " + "VALUES (%(line)s, %(verified_line)s, %(verifier)s)", + line=line_id, verified_line=verified_line, verifier=1) + + return "", 204 + + +@app.route('/professor/speaker', methods=["GET"]) +def get_speakers(): + db_conn = app.db_manager.get_conn() + + speakers = database.query(db_conn, "SELECT name FROM buscribe_speakers;") + + return jsonify([{"id": speaker.id, "name": speaker.name} for speaker in speakers]) + + +@app.route('/professor/speaker/', methods=["GET"]) +def get_speaker(speaker_id): + db_conn = app.db_manager.get_conn() + + speaker = database.query(db_conn, "SELECT name FROM buscribe_speakers WHERE id = %(id)s;", id=speaker_id).fetchone() + + if speaker is None: + return "Speaker not found.", 404 + else: + return jsonify(speaker.name) + + +@app.route('/professor/speaker', methods=["PUT"]) +def new_speaker(): + name = request.json + + if not isinstance(name, str): + return "Invalid name!", 400 + + name = name.lower() + name = re.sub(r"[^\w\s']", "", name) + db_conn = app.db_manager.get_conn() + + speakers = database.query(db_conn, "INSERT INTO buscribe_speakers(name) " + "VALUES (%(name)s) " + "ON CONFLICT (name) DO UPDATE SET name=EXCLUDED.name " + "RETURNING id;", name=name) + + return "", 200, {"Content-Location": f"/professor/speaker/{speakers.fetchone().id}"}