Add hand-transcribed lines to search

Add partial word search
multichannel
HeNine 3 years ago
parent 6e31b1df2f
commit 6c9873ac60

@ -88,20 +88,24 @@ def get_json():
results = fetch_lines(db_conn, start_time, end_time, query, limit, offset)
return jsonify([{"start_time": row.start_time.isoformat(),
return jsonify([{"id": row.id,
"start_time": row.start_time.isoformat(),
"start_bus_time": round_bus_time(row.start_time - app.bustime_start),
"end_time": row.end_time.isoformat(),
"end_bus_time": round_bus_time(row.start_time - app.bustime_start),
"verifier": row.verifier,
"text": row.transcription_line} for row in results])
def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset=None):
query = "SELECT * FROM buscribe_transcriptions WHERE start_time > %(start_time)s AND end_time < %(end_time)s "
query = "SELECT * FROM buscribe_all_transcriptions WHERE start_time > %(start_time)s AND end_time < %(end_time)s "
if ts_query is not None:
query += "AND to_tsvector(transcription_line) @@ websearch_to_tsquery(%(text_query)s) " \
"ORDER BY ts_rank_cd(to_tsvector(transcription_line), websearch_to_tsquery(%(text_query)s)) DESC, " \
query += "AND transcription_line_ts @@ (websearch_to_tsquery(%(text_query)s)::text ||':*')::tsquery " \
"ORDER BY ts_rank_cd(transcription_line_ts, (websearch_to_tsquery(%(text_query)s)::text ||':*')::tsquery) DESC, " \
"start_time"
else:
query += "ORDER BY start_time"
if limit is not None:
query += "LIMIT %(limit)s"
@ -111,8 +115,6 @@ def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset
query += ";"
print(query)
return database.query(db_conn, query,
start_time=start_time if start_time is not None else '-infinity',
end_time=end_time if end_time is not None else 'infinity',

@ -47,7 +47,7 @@ def servelet(server):
'postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE')
@argh.arg('--bustime-start',
help='The start time in UTC for the event, for UTC-Bustime conversion')
def main(database="", host='0.0.0.0', port=8005, bustime_start=None):
def main(database="", host='0.0.0.0', port=8010, bustime_start=None):
if bustime_start is None:
logging.error("Missing --bustime-start!")
exit(1)

@ -42,3 +42,7 @@
grid-row: span 2;
}
}
.line.verified {
background: #555;
}

@ -19,7 +19,7 @@ function query(text, start_time, end_time) {
query_string += `&query=${text}`
}
fetch(`http://localhost:8005/buscribe/json?${query_string}`)
fetch(`http://localhost:8010/buscribe/json?${query_string}`)
.then(response => response.json())
// .then(response => console.log(response.error()))
.then(fillResults)
@ -42,6 +42,9 @@ function fillResults(results) {
const line_div = document.createElement("div");
line_div.classList.add("line");
if (line.verifier) {
line_div.classList.add("verified");
}
line_div.innerHTML = `
<div class="line_start_bus_time">${line.start_bus_time}</div>

@ -69,3 +69,30 @@ CREATE TABLE buscribe_verified_lines
verifier text REFERENCES buscribe_verifiers,
PRIMARY KEY (line, verifier)
);
-- Indexed with C weight (0.2 vs default 0.1)
CREATE INDEX buscribe_verified_lines_idx ON buscribe_verified_lines USING
GIN (setweight(to_tsvector('english', verified_line), 'C'));
BEGIN;
DROP VIEW buscribe_all_transcriptions;
CREATE VIEW buscribe_all_transcriptions AS
SELECT "id",
start_time,
end_time,
null AS verifier,
transcription_line,
to_tsvector('english', transcription_line) AS transcription_line_ts
FROM buscribe_transcriptions
UNION
SELECT "id",
start_time,
end_time,
verifier,
verified_line AS transcription_line,
setweight(to_tsvector('english', verified_line), 'C') AS transcription_line_ts
FROM buscribe_verified_lines
INNER JOIN buscribe_transcriptions ON (line = "id")
ORDER BY "id";
ROLLBACK;
Loading…
Cancel
Save