From 26554883f26017c4db73358fc01a4d6be4bef393 Mon Sep 17 00:00:00 2001 From: HeNine <> Date: Wed, 20 Oct 2021 17:56:24 +0200 Subject: [PATCH] Speakers in query --- buscribe-api/buscribeapi/buscribeapi.py | 7 +++-- buscribe_data.sql | 40 +++++++++++++++++-------- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/buscribe-api/buscribeapi/buscribeapi.py b/buscribe-api/buscribeapi/buscribeapi.py index fb7c4af..9fddfbd 100644 --- a/buscribe-api/buscribeapi/buscribeapi.py +++ b/buscribe-api/buscribeapi/buscribeapi.py @@ -94,15 +94,16 @@ def get_json(): "end_time": row.end_time.isoformat(), "end_bus_time": round_bus_time(row.start_time - app.bustime_start), "verifier": row.verifier, - "text": row.transcription_line} for row in results]) + "text": row.transcription_line if row.transcription_line is not None else ""} for row in results]) def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset=None): query = "SELECT * FROM buscribe_all_transcriptions WHERE start_time > %(start_time)s AND end_time < %(end_time)s " if ts_query is not None: - query += "AND transcription_line_ts @@ (websearch_to_tsquery(%(text_query)s)::text ||':*')::tsquery " \ - "ORDER BY ts_rank_cd(transcription_line_ts, (websearch_to_tsquery(%(text_query)s)::text ||':*')::tsquery) DESC, " \ + query += "AND transcription_line_ts @@ " \ + "(CASE WHEN websearch_to_tsquery(%(text_query)s)::text != '' THEN websearch_to_tsquery(%(text_query)s)::text || ':*' ELSE '' END)::tsquery " \ + "ORDER BY ts_rank_cd(transcription_line_ts, (CASE WHEN websearch_to_tsquery(%(text_query)s)::text != '' THEN websearch_to_tsquery(%(text_query)s)::text || ':*' ELSE '' END)::tsquery) DESC, " \ "start_time" else: query += "ORDER BY start_time" diff --git a/buscribe_data.sql b/buscribe_data.sql index 308aaaa..f9fd943 100644 --- a/buscribe_data.sql +++ b/buscribe_data.sql @@ -38,7 +38,7 @@ CREATE INDEX buscribe_end_time_idx ON buscribe_transcriptions (end_time); CREATE TABLE buscribe_speakers ( id BIGSERIAL PRIMARY KEY, - name text NOT NULL UNIQUE + name text NOT NULL UNIQUE CHECK ( name != '' ) ); CREATE TABLE buscribe_verifiers @@ -50,7 +50,8 @@ CREATE TABLE buscribe_verifiers -- For testing INSERT INTO buscribe_verifiers(email, name) -VALUES ('placeholder@example.com', 'Place Holder'); +VALUES ('placeholder@example.com', 'Place Holder'), + ('aguy@example.com', 'Arnold Guyana'); CREATE TABLE buscribe_line_speakers ( @@ -75,24 +76,37 @@ CREATE INDEX buscribe_verified_lines_idx ON buscribe_verified_lines USING GIN (setweight(to_tsvector('english', verified_line), 'C')); BEGIN; + DROP VIEW buscribe_all_transcriptions; + CREATE VIEW buscribe_all_transcriptions AS -SELECT "id", +SELECT buscribe_transcriptions.id, start_time, end_time, - null AS verifier, - transcription_line, - to_tsvector('english', transcription_line) AS transcription_line_ts + coalesce(buscribe_verified_lines.verifier, speakers.verifier) AS verifier, + names, + verified_line AS transcription_line, + setweight(to_tsvector('english', verified_line), 'C') AS transcription_line_ts FROM buscribe_transcriptions + LEFT OUTER JOIN buscribe_verified_lines ON buscribe_transcriptions.id = buscribe_verified_lines.line + LEFT OUTER JOIN ( + SELECT line, verifier, array_agg(name) AS names + FROM buscribe_line_speakers + INNER JOIN buscribe_speakers ON buscribe_line_speakers.speaker = buscribe_speakers.id + GROUP BY line, verifier +) AS speakers ON buscribe_transcriptions.id = speakers.line AND ( + speakers.verifier = buscribe_verified_lines.verifier OR + buscribe_verified_lines.verifier IS NULL + ) +WHERE coalesce(buscribe_verified_lines.verifier, speakers.verifier) IS NOT NULL UNION -SELECT "id", +SELECT id, start_time, end_time, - verifier, - verified_line AS transcription_line, - setweight(to_tsvector('english', verified_line), 'C') AS transcription_line_ts -FROM buscribe_verified_lines - INNER JOIN buscribe_transcriptions ON (line = "id") -ORDER BY "id"; + null AS verifier, + null AS names, + transcription_line, + to_tsvector('english', transcription_line) AS transcription_line_ts +FROM buscribe_transcriptions; ROLLBACK; \ No newline at end of file