Speakers in query

multichannel
HeNine 3 years ago
parent 266c532863
commit 26554883f2

@ -94,15 +94,16 @@ def get_json():
"end_time": row.end_time.isoformat(), "end_time": row.end_time.isoformat(),
"end_bus_time": round_bus_time(row.start_time - app.bustime_start), "end_bus_time": round_bus_time(row.start_time - app.bustime_start),
"verifier": row.verifier, "verifier": row.verifier,
"text": row.transcription_line} for row in results]) "text": row.transcription_line if row.transcription_line is not None else ""} for row in results])
def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset=None): def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset=None):
query = "SELECT * FROM buscribe_all_transcriptions WHERE start_time > %(start_time)s AND end_time < %(end_time)s " query = "SELECT * FROM buscribe_all_transcriptions WHERE start_time > %(start_time)s AND end_time < %(end_time)s "
if ts_query is not None: if ts_query is not None:
query += "AND transcription_line_ts @@ (websearch_to_tsquery(%(text_query)s)::text ||':*')::tsquery " \ query += "AND transcription_line_ts @@ " \
"ORDER BY ts_rank_cd(transcription_line_ts, (websearch_to_tsquery(%(text_query)s)::text ||':*')::tsquery) DESC, " \ "(CASE WHEN websearch_to_tsquery(%(text_query)s)::text != '' THEN websearch_to_tsquery(%(text_query)s)::text || ':*' ELSE '' END)::tsquery " \
"ORDER BY ts_rank_cd(transcription_line_ts, (CASE WHEN websearch_to_tsquery(%(text_query)s)::text != '' THEN websearch_to_tsquery(%(text_query)s)::text || ':*' ELSE '' END)::tsquery) DESC, " \
"start_time" "start_time"
else: else:
query += "ORDER BY start_time" query += "ORDER BY start_time"

@ -38,7 +38,7 @@ CREATE INDEX buscribe_end_time_idx ON buscribe_transcriptions (end_time);
CREATE TABLE buscribe_speakers CREATE TABLE buscribe_speakers
( (
id BIGSERIAL PRIMARY KEY, id BIGSERIAL PRIMARY KEY,
name text NOT NULL UNIQUE name text NOT NULL UNIQUE CHECK ( name != '' )
); );
CREATE TABLE buscribe_verifiers CREATE TABLE buscribe_verifiers
@ -50,7 +50,8 @@ CREATE TABLE buscribe_verifiers
-- For testing -- For testing
INSERT INTO buscribe_verifiers(email, name) INSERT INTO buscribe_verifiers(email, name)
VALUES ('placeholder@example.com', 'Place Holder'); VALUES ('placeholder@example.com', 'Place Holder'),
('aguy@example.com', 'Arnold Guyana');
CREATE TABLE buscribe_line_speakers CREATE TABLE buscribe_line_speakers
( (
@ -75,24 +76,37 @@ CREATE INDEX buscribe_verified_lines_idx ON buscribe_verified_lines USING
GIN (setweight(to_tsvector('english', verified_line), 'C')); GIN (setweight(to_tsvector('english', verified_line), 'C'));
BEGIN; BEGIN;
DROP VIEW buscribe_all_transcriptions; DROP VIEW buscribe_all_transcriptions;
CREATE VIEW buscribe_all_transcriptions AS CREATE VIEW buscribe_all_transcriptions AS
SELECT "id", SELECT buscribe_transcriptions.id,
start_time, start_time,
end_time, end_time,
null AS verifier, coalesce(buscribe_verified_lines.verifier, speakers.verifier) AS verifier,
transcription_line, names,
to_tsvector('english', transcription_line) AS transcription_line_ts verified_line AS transcription_line,
setweight(to_tsvector('english', verified_line), 'C') AS transcription_line_ts
FROM buscribe_transcriptions FROM buscribe_transcriptions
LEFT OUTER JOIN buscribe_verified_lines ON buscribe_transcriptions.id = buscribe_verified_lines.line
LEFT OUTER JOIN (
SELECT line, verifier, array_agg(name) AS names
FROM buscribe_line_speakers
INNER JOIN buscribe_speakers ON buscribe_line_speakers.speaker = buscribe_speakers.id
GROUP BY line, verifier
) AS speakers ON buscribe_transcriptions.id = speakers.line AND (
speakers.verifier = buscribe_verified_lines.verifier OR
buscribe_verified_lines.verifier IS NULL
)
WHERE coalesce(buscribe_verified_lines.verifier, speakers.verifier) IS NOT NULL
UNION UNION
SELECT "id", SELECT id,
start_time, start_time,
end_time, end_time,
verifier, null AS verifier,
verified_line AS transcription_line, null AS names,
setweight(to_tsvector('english', verified_line), 'C') AS transcription_line_ts transcription_line,
FROM buscribe_verified_lines to_tsvector('english', transcription_line) AS transcription_line_ts
INNER JOIN buscribe_transcriptions ON (line = "id") FROM buscribe_transcriptions;
ORDER BY "id";
ROLLBACK; ROLLBACK;
Loading…
Cancel
Save