Add hand-transcribed lines to search

Add partial word search
multichannel
HeNine 3 years ago
parent 6e31b1df2f
commit 6c9873ac60

@ -88,20 +88,24 @@ def get_json():
results = fetch_lines(db_conn, start_time, end_time, query, limit, offset) results = fetch_lines(db_conn, start_time, end_time, query, limit, offset)
return jsonify([{"start_time": row.start_time.isoformat(), return jsonify([{"id": row.id,
"start_time": row.start_time.isoformat(),
"start_bus_time": round_bus_time(row.start_time - app.bustime_start), "start_bus_time": round_bus_time(row.start_time - app.bustime_start),
"end_time": row.end_time.isoformat(), "end_time": row.end_time.isoformat(),
"end_bus_time": round_bus_time(row.start_time - app.bustime_start), "end_bus_time": round_bus_time(row.start_time - app.bustime_start),
"verifier": row.verifier,
"text": row.transcription_line} for row in results]) "text": row.transcription_line} for row in results])
def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset=None): def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset=None):
query = "SELECT * FROM buscribe_transcriptions WHERE start_time > %(start_time)s AND end_time < %(end_time)s " query = "SELECT * FROM buscribe_all_transcriptions WHERE start_time > %(start_time)s AND end_time < %(end_time)s "
if ts_query is not None: if ts_query is not None:
query += "AND to_tsvector(transcription_line) @@ websearch_to_tsquery(%(text_query)s) " \ query += "AND transcription_line_ts @@ (websearch_to_tsquery(%(text_query)s)::text ||':*')::tsquery " \
"ORDER BY ts_rank_cd(to_tsvector(transcription_line), websearch_to_tsquery(%(text_query)s)) DESC, " \ "ORDER BY ts_rank_cd(transcription_line_ts, (websearch_to_tsquery(%(text_query)s)::text ||':*')::tsquery) DESC, " \
"start_time" "start_time"
else:
query += "ORDER BY start_time"
if limit is not None: if limit is not None:
query += "LIMIT %(limit)s" query += "LIMIT %(limit)s"
@ -111,8 +115,6 @@ def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset
query += ";" query += ";"
print(query)
return database.query(db_conn, query, return database.query(db_conn, query,
start_time=start_time if start_time is not None else '-infinity', start_time=start_time if start_time is not None else '-infinity',
end_time=end_time if end_time is not None else 'infinity', end_time=end_time if end_time is not None else 'infinity',

@ -47,7 +47,7 @@ def servelet(server):
'postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE') 'postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE')
@argh.arg('--bustime-start', @argh.arg('--bustime-start',
help='The start time in UTC for the event, for UTC-Bustime conversion') help='The start time in UTC for the event, for UTC-Bustime conversion')
def main(database="", host='0.0.0.0', port=8005, bustime_start=None): def main(database="", host='0.0.0.0', port=8010, bustime_start=None):
if bustime_start is None: if bustime_start is None:
logging.error("Missing --bustime-start!") logging.error("Missing --bustime-start!")
exit(1) exit(1)

@ -41,4 +41,8 @@
grid-column: text; grid-column: text;
grid-row: span 2; grid-row: span 2;
} }
}
.line.verified {
background: #555;
} }

@ -19,7 +19,7 @@ function query(text, start_time, end_time) {
query_string += `&query=${text}` query_string += `&query=${text}`
} }
fetch(`http://localhost:8005/buscribe/json?${query_string}`) fetch(`http://localhost:8010/buscribe/json?${query_string}`)
.then(response => response.json()) .then(response => response.json())
// .then(response => console.log(response.error())) // .then(response => console.log(response.error()))
.then(fillResults) .then(fillResults)
@ -42,6 +42,9 @@ function fillResults(results) {
const line_div = document.createElement("div"); const line_div = document.createElement("div");
line_div.classList.add("line"); line_div.classList.add("line");
if (line.verifier) {
line_div.classList.add("verified");
}
line_div.innerHTML = ` line_div.innerHTML = `
<div class="line_start_bus_time">${line.start_bus_time}</div> <div class="line_start_bus_time">${line.start_bus_time}</div>

@ -69,3 +69,30 @@ CREATE TABLE buscribe_verified_lines
verifier text REFERENCES buscribe_verifiers, verifier text REFERENCES buscribe_verifiers,
PRIMARY KEY (line, verifier) PRIMARY KEY (line, verifier)
); );
-- Indexed with C weight (0.2 vs default 0.1)
CREATE INDEX buscribe_verified_lines_idx ON buscribe_verified_lines USING
GIN (setweight(to_tsvector('english', verified_line), 'C'));
BEGIN;
DROP VIEW buscribe_all_transcriptions;
CREATE VIEW buscribe_all_transcriptions AS
SELECT "id",
start_time,
end_time,
null AS verifier,
transcription_line,
to_tsvector('english', transcription_line) AS transcription_line_ts
FROM buscribe_transcriptions
UNION
SELECT "id",
start_time,
end_time,
verifier,
verified_line AS transcription_line,
setweight(to_tsvector('english', verified_line), 'C') AS transcription_line_ts
FROM buscribe_verified_lines
INNER JOIN buscribe_transcriptions ON (line = "id")
ORDER BY "id";
ROLLBACK;
Loading…
Cancel
Save