From 8a6f32975a406c9f55b627933048fbc3432efa71 Mon Sep 17 00:00:00 2001 From: HeNine <> Date: Tue, 16 Nov 2021 13:59:31 +0100 Subject: [PATCH] search optimization --- buscribe-api/buscribeapi/buscribeapi.py | 23 ++++++++++++ buscribe_data.sql | 47 ++++++++++++++++++++++++- professor/script.js | 1 - 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/buscribe-api/buscribeapi/buscribeapi.py b/buscribe-api/buscribeapi/buscribeapi.py index 4d38718..517719b 100644 --- a/buscribe-api/buscribeapi/buscribeapi.py +++ b/buscribe-api/buscribeapi/buscribeapi.py @@ -139,6 +139,29 @@ def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset query += ";" + query = f""" + WITH q AS ( + SELECT convert_query(${"%(text_query)s" if ts_query is not None else ""}) + ) + (SELECT *, ts_headline(transcription_line, (SELECT * FROM q), + 'StartSel='''', StopSel=') AS highlighted_text + FROM buscribe_all_transcriptions2 + WHERE start_time >= %(start_time)s AND end_time <= %(end_time)s + ${"AND verified_line_ts @@ (SELECT * FROM q)" if ts_query is not None else ""} + ORDER BY ${"ts_rank_cd(coalesce(transcription_line_ts, ''::tsvector) ||" + + "coalesce(names_ts, ''::tsvector), (SELECT * FROM q)) DESC," if ts_query is not None else ""} + start_time) + UNION + (SELECT *, ts_headline(transcription_line, (SELECT * FROM q), + 'StartSel='''', StopSel=') AS highlighted_text + FROM buscribe_all_transcriptions2 + WHERE start_time >= %(start_time)s AND end_time <= %(end_time)s + ${"AND machine_line_ts @@ (SELECT * FROM q)" if ts_query is not None else ""} + ORDER BY ${"ts_rank_cd(coalesce(transcription_line_ts, ''::tsvector) ||" + + "coalesce(names_ts, ''::tsvector), (SELECT * FROM q)) DESC," if ts_query is not None else ""} + start_time) + """ + return database.query(db_conn, query, start_time=start_time if start_time is not None else '-infinity', end_time=end_time if end_time is not None else 'infinity', diff --git a/buscribe_data.sql b/buscribe_data.sql index 80b61fb..aae2f6f 100644 --- a/buscribe_data.sql +++ b/buscribe_data.sql @@ -129,6 +129,51 @@ FROM buscribe_transcriptions ROLLBACK; +CREATE VIEW buscribe_all_transcriptions2 AS +SELECT buscribe_transcriptions.id, + start_time, + end_time, + coalesce(buscribe_verified_lines.verifier, speakers.verifier) AS verifier, + names, + coalesce(verified_line, buscribe_transcriptions.transcription_line) AS transcription_line, + to_tsvector('english', buscribe_transcriptions.transcription_line) AS machine_line_ts, + setweight(to_tsvector('english', verified_line), 'C') AS verified_line_ts, + coalesce(setweight(to_tsvector('english', verified_line), 'C'), + to_tsvector('english', buscribe_transcriptions.transcription_line)) AS transcription_line_ts, + setweight(to_tsvector(array_to_string(names, ' ')), 'C') AS names_ts, + null AS transcription_json +FROM buscribe_transcriptions + LEFT OUTER JOIN buscribe_verified_lines ON buscribe_transcriptions.id = buscribe_verified_lines.line + LEFT OUTER JOIN ( + SELECT line, verifier, array_agg(name) AS names + FROM buscribe_line_speakers + INNER JOIN buscribe_speakers ON buscribe_line_speakers.speaker = buscribe_speakers.id + GROUP BY line, verifier +) AS speakers ON buscribe_transcriptions.id = speakers.line AND ( + speakers.verifier = buscribe_verified_lines.verifier OR + buscribe_verified_lines.verifier IS NULL + ) +WHERE coalesce(buscribe_verified_lines.verifier, speakers.verifier) IS NOT NULL +UNION +SELECT id, + start_time, + end_time, + null AS verifier, + names, + transcription_line, + to_tsvector('english', transcription_line) AS machine_line_ts, + null AS verified_line_ts, + to_tsvector('english', transcription_line) AS transcription_line_ts, + null AS names_ts, + transcription_json +FROM buscribe_transcriptions + LEFT OUTER JOIN ( + SELECT line, array_agg(name) AS names + FROM buscribe_line_inferred_speakers + INNER JOIN buscribe_speakers ON buscribe_line_inferred_speakers.speaker = buscribe_speakers.id + GROUP BY line +) AS speakers ON id = speakers.line; + -- Convert last lexeme in a query to prefix query. CREATE FUNCTION convert_query(query_text text) RETURNS tsquery AS $$ @@ -137,4 +182,4 @@ DECLARE BEGIN RETURN (CASE WHEN ws_query != '' THEN ws_query || ':*' ELSE '' END)::tsquery; END; -$$ LANGUAGE plpgsql; \ No newline at end of file +$$ LANGUAGE plpgsql; diff --git a/professor/script.js b/professor/script.js index 216cc2c..8a50619 100644 --- a/professor/script.js +++ b/professor/script.js @@ -43,7 +43,6 @@ function pageReady() { hotkeys('ctrl+enter', function (event, handler){ - console.log(event); document.getElementById("submit_button").click(); });