search optimization

trunk
HeNine 3 years ago
parent 8f6fc2b722
commit 8a6f32975a

@ -139,6 +139,29 @@ def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset
query += ";"
query = f"""
WITH q AS (
SELECT convert_query(${"%(text_query)s" if ts_query is not None else ""})
)
(SELECT *, ts_headline(transcription_line, (SELECT * FROM q),
'StartSel=''<span class=\"highlight\">'', StopSel=</span>') AS highlighted_text
FROM buscribe_all_transcriptions2
WHERE start_time >= %(start_time)s AND end_time <= %(end_time)s
${"AND verified_line_ts @@ (SELECT * FROM q)" if ts_query is not None else ""}
ORDER BY ${"ts_rank_cd(coalesce(transcription_line_ts, ''::tsvector) ||" +
"coalesce(names_ts, ''::tsvector), (SELECT * FROM q)) DESC," if ts_query is not None else ""}
start_time)
UNION
(SELECT *, ts_headline(transcription_line, (SELECT * FROM q),
'StartSel=''<span class=\"highlight\">'', StopSel=</span>') AS highlighted_text
FROM buscribe_all_transcriptions2
WHERE start_time >= %(start_time)s AND end_time <= %(end_time)s
${"AND machine_line_ts @@ (SELECT * FROM q)" if ts_query is not None else ""}
ORDER BY ${"ts_rank_cd(coalesce(transcription_line_ts, ''::tsvector) ||" +
"coalesce(names_ts, ''::tsvector), (SELECT * FROM q)) DESC," if ts_query is not None else ""}
start_time)
"""
return database.query(db_conn, query,
start_time=start_time if start_time is not None else '-infinity',
end_time=end_time if end_time is not None else 'infinity',

@ -129,6 +129,51 @@ FROM buscribe_transcriptions
ROLLBACK;
CREATE VIEW buscribe_all_transcriptions2 AS
SELECT buscribe_transcriptions.id,
start_time,
end_time,
coalesce(buscribe_verified_lines.verifier, speakers.verifier) AS verifier,
names,
coalesce(verified_line, buscribe_transcriptions.transcription_line) AS transcription_line,
to_tsvector('english', buscribe_transcriptions.transcription_line) AS machine_line_ts,
setweight(to_tsvector('english', verified_line), 'C') AS verified_line_ts,
coalesce(setweight(to_tsvector('english', verified_line), 'C'),
to_tsvector('english', buscribe_transcriptions.transcription_line)) AS transcription_line_ts,
setweight(to_tsvector(array_to_string(names, ' ')), 'C') AS names_ts,
null AS transcription_json
FROM buscribe_transcriptions
LEFT OUTER JOIN buscribe_verified_lines ON buscribe_transcriptions.id = buscribe_verified_lines.line
LEFT OUTER JOIN (
SELECT line, verifier, array_agg(name) AS names
FROM buscribe_line_speakers
INNER JOIN buscribe_speakers ON buscribe_line_speakers.speaker = buscribe_speakers.id
GROUP BY line, verifier
) AS speakers ON buscribe_transcriptions.id = speakers.line AND (
speakers.verifier = buscribe_verified_lines.verifier OR
buscribe_verified_lines.verifier IS NULL
)
WHERE coalesce(buscribe_verified_lines.verifier, speakers.verifier) IS NOT NULL
UNION
SELECT id,
start_time,
end_time,
null AS verifier,
names,
transcription_line,
to_tsvector('english', transcription_line) AS machine_line_ts,
null AS verified_line_ts,
to_tsvector('english', transcription_line) AS transcription_line_ts,
null AS names_ts,
transcription_json
FROM buscribe_transcriptions
LEFT OUTER JOIN (
SELECT line, array_agg(name) AS names
FROM buscribe_line_inferred_speakers
INNER JOIN buscribe_speakers ON buscribe_line_inferred_speakers.speaker = buscribe_speakers.id
GROUP BY line
) AS speakers ON id = speakers.line;
-- Convert last lexeme in a query to prefix query.
CREATE FUNCTION convert_query(query_text text) RETURNS tsquery AS
$$
@ -137,4 +182,4 @@ DECLARE
BEGIN
RETURN (CASE WHEN ws_query != '' THEN ws_query || ':*' ELSE '' END)::tsquery;
END;
$$ LANGUAGE plpgsql;
$$ LANGUAGE plpgsql;

@ -43,7 +43,6 @@ function pageReady() {
hotkeys('ctrl+enter', function (event, handler){
console.log(event);
document.getElementById("submit_button").click();
});

Loading…
Cancel
Save