From 0c43bcb714570dbe05a4535821c175151eda0498 Mon Sep 17 00:00:00 2001 From: HeNine <> Date: Mon, 15 Nov 2021 13:36:20 +0100 Subject: [PATCH] get random line for tagging --- buscribe_data.sql | 17 +++++++++++++-- professor-api/professor_api/professor_api.py | 23 +++++++++++++++++++- professor/script.js | 11 ++++++++-- 3 files changed, 46 insertions(+), 5 deletions(-) diff --git a/buscribe_data.sql b/buscribe_data.sql index ee74263..80b61fb 100644 --- a/buscribe_data.sql +++ b/buscribe_data.sql @@ -62,6 +62,13 @@ CREATE TABLE buscribe_line_speakers PRIMARY KEY (line, speaker, verifier) ); +CREATE TABLE buscribe_line_inferred_speakers +( + line BIGINT NOT NULL REFERENCES buscribe_transcriptions, + speaker BIGINT NOT NULL REFERENCES buscribe_speakers, + PRIMARY KEY (line, speaker) +); + CREATE TABLE buscribe_verified_lines ( -- id BIGSERIAL PRIMARY KEY, @@ -107,12 +114,18 @@ SELECT id, start_time, end_time, null AS verifier, - null AS names, + names, transcription_line, to_tsvector('english', transcription_line) AS transcription_line_ts, null AS names_ts, transcription_json -FROM buscribe_transcriptions; +FROM buscribe_transcriptions + LEFT OUTER JOIN ( + SELECT line, array_agg(name) AS names + FROM buscribe_line_inferred_speakers + INNER JOIN buscribe_speakers ON buscribe_line_inferred_speakers.speaker = buscribe_speakers.id + GROUP BY line +) AS speakers ON id = speakers.line; ROLLBACK; diff --git a/professor-api/professor_api/professor_api.py b/professor-api/professor_api/professor_api.py index 3874027..f9da80c 100644 --- a/professor-api/professor_api/professor_api.py +++ b/professor-api/professor_api/professor_api.py @@ -1,6 +1,7 @@ import re import urllib.parse from functools import wraps +from random import randrange import flask import gevent @@ -63,7 +64,27 @@ def get_line(line_id): if line is None: return "Line not found.", 404 else: - return {"start_time": line.start_time.isoformat(), + return {"id": line.id, + "start_time": line.start_time.isoformat(), + "end_time": line.end_time.isoformat(), + "line_data": line.transcription_json} + + +@app.route('/professor/line/random', methods=["GET"]) +def get_random_line(): + db_conn = app.db_manager.get_conn() + + n_lines = database.query(db_conn, "SELECT count(*) AS n_lines FROM buscribe_transcriptions;").fetchone().n_lines + + row = randrange(n_lines) + + line = database.query(db_conn, "SELECT * FROM buscribe_transcriptions OFFSET %(row)s LIMIT 1;", row=row).fetchone() + + if line is None: + return "Line not found.", 404 + else: + return {"id": line.id, + "start_time": line.start_time.isoformat(), "end_time": line.end_time.isoformat(), "line_data": line.transcription_json} diff --git a/professor/script.js b/professor/script.js index 1179750..48e1c26 100644 --- a/professor/script.js +++ b/professor/script.js @@ -1,7 +1,13 @@ function pageReady() { const params = new URLSearchParams(document.location.search.substring(1)); - line_id = parseInt(params.get("line"), 10); + let line_id; + if (params.get("line") !== "random") { + line_id = parseInt(params.get("line"), 10); + } else { + line_id = "random" + } + videojs("player", { // src: "test.m3u8", @@ -74,7 +80,8 @@ function loggedIn(response) { } function fillLineInfo(line_json) { - // document.getElementById("original_transcription").innerText = line_json.line_data.text; + line_id = line_json.id + line = line_json document.getElementById("original_transcription").innerHTML = line_json.line_data.result .map(word => `${word.word}`).join(" ");