From 8a6f32975a406c9f55b627933048fbc3432efa71 Mon Sep 17 00:00:00 2001
From: HeNine <>
Date: Tue, 16 Nov 2021 13:59:31 +0100
Subject: [PATCH] search optimization
---
buscribe-api/buscribeapi/buscribeapi.py | 23 ++++++++++++
buscribe_data.sql | 47 ++++++++++++++++++++++++-
professor/script.js | 1 -
3 files changed, 69 insertions(+), 2 deletions(-)
diff --git a/buscribe-api/buscribeapi/buscribeapi.py b/buscribe-api/buscribeapi/buscribeapi.py
index 4d38718..517719b 100644
--- a/buscribe-api/buscribeapi/buscribeapi.py
+++ b/buscribe-api/buscribeapi/buscribeapi.py
@@ -139,6 +139,29 @@ def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset
query += ";"
+ query = f"""
+ WITH q AS (
+ SELECT convert_query(${"%(text_query)s" if ts_query is not None else ""})
+ )
+ (SELECT *, ts_headline(transcription_line, (SELECT * FROM q),
+ 'StartSel='''', StopSel=') AS highlighted_text
+ FROM buscribe_all_transcriptions2
+ WHERE start_time >= %(start_time)s AND end_time <= %(end_time)s
+ ${"AND verified_line_ts @@ (SELECT * FROM q)" if ts_query is not None else ""}
+ ORDER BY ${"ts_rank_cd(coalesce(transcription_line_ts, ''::tsvector) ||" +
+ "coalesce(names_ts, ''::tsvector), (SELECT * FROM q)) DESC," if ts_query is not None else ""}
+ start_time)
+ UNION
+ (SELECT *, ts_headline(transcription_line, (SELECT * FROM q),
+ 'StartSel='''', StopSel=') AS highlighted_text
+ FROM buscribe_all_transcriptions2
+ WHERE start_time >= %(start_time)s AND end_time <= %(end_time)s
+ ${"AND machine_line_ts @@ (SELECT * FROM q)" if ts_query is not None else ""}
+ ORDER BY ${"ts_rank_cd(coalesce(transcription_line_ts, ''::tsvector) ||" +
+ "coalesce(names_ts, ''::tsvector), (SELECT * FROM q)) DESC," if ts_query is not None else ""}
+ start_time)
+ """
+
return database.query(db_conn, query,
start_time=start_time if start_time is not None else '-infinity',
end_time=end_time if end_time is not None else 'infinity',
diff --git a/buscribe_data.sql b/buscribe_data.sql
index 80b61fb..aae2f6f 100644
--- a/buscribe_data.sql
+++ b/buscribe_data.sql
@@ -129,6 +129,51 @@ FROM buscribe_transcriptions
ROLLBACK;
+CREATE VIEW buscribe_all_transcriptions2 AS
+SELECT buscribe_transcriptions.id,
+ start_time,
+ end_time,
+ coalesce(buscribe_verified_lines.verifier, speakers.verifier) AS verifier,
+ names,
+ coalesce(verified_line, buscribe_transcriptions.transcription_line) AS transcription_line,
+ to_tsvector('english', buscribe_transcriptions.transcription_line) AS machine_line_ts,
+ setweight(to_tsvector('english', verified_line), 'C') AS verified_line_ts,
+ coalesce(setweight(to_tsvector('english', verified_line), 'C'),
+ to_tsvector('english', buscribe_transcriptions.transcription_line)) AS transcription_line_ts,
+ setweight(to_tsvector(array_to_string(names, ' ')), 'C') AS names_ts,
+ null AS transcription_json
+FROM buscribe_transcriptions
+ LEFT OUTER JOIN buscribe_verified_lines ON buscribe_transcriptions.id = buscribe_verified_lines.line
+ LEFT OUTER JOIN (
+ SELECT line, verifier, array_agg(name) AS names
+ FROM buscribe_line_speakers
+ INNER JOIN buscribe_speakers ON buscribe_line_speakers.speaker = buscribe_speakers.id
+ GROUP BY line, verifier
+) AS speakers ON buscribe_transcriptions.id = speakers.line AND (
+ speakers.verifier = buscribe_verified_lines.verifier OR
+ buscribe_verified_lines.verifier IS NULL
+ )
+WHERE coalesce(buscribe_verified_lines.verifier, speakers.verifier) IS NOT NULL
+UNION
+SELECT id,
+ start_time,
+ end_time,
+ null AS verifier,
+ names,
+ transcription_line,
+ to_tsvector('english', transcription_line) AS machine_line_ts,
+ null AS verified_line_ts,
+ to_tsvector('english', transcription_line) AS transcription_line_ts,
+ null AS names_ts,
+ transcription_json
+FROM buscribe_transcriptions
+ LEFT OUTER JOIN (
+ SELECT line, array_agg(name) AS names
+ FROM buscribe_line_inferred_speakers
+ INNER JOIN buscribe_speakers ON buscribe_line_inferred_speakers.speaker = buscribe_speakers.id
+ GROUP BY line
+) AS speakers ON id = speakers.line;
+
-- Convert last lexeme in a query to prefix query.
CREATE FUNCTION convert_query(query_text text) RETURNS tsquery AS
$$
@@ -137,4 +182,4 @@ DECLARE
BEGIN
RETURN (CASE WHEN ws_query != '' THEN ws_query || ':*' ELSE '' END)::tsquery;
END;
-$$ LANGUAGE plpgsql;
\ No newline at end of file
+$$ LANGUAGE plpgsql;
diff --git a/professor/script.js b/professor/script.js
index 216cc2c..8a50619 100644
--- a/professor/script.js
+++ b/professor/script.js
@@ -43,7 +43,6 @@ function pageReady() {
hotkeys('ctrl+enter', function (event, handler){
- console.log(event);
document.getElementById("submit_button").click();
});