Compare commits

...

43 Commits

Author SHA1 Message Date
Matija Rezar fbb6c4dca0 current stat dump 3 months ago
HeNine 0355c59ee8 search optimization 7 3 years ago
HeNine 7673c900ae search optimization 6: aaaaaaaa 3 years ago
HeNine 5260fb60c0 search optimization 5: the morning after 3 years ago
HeNine 881712ed29 search optimization 4 i give up 3 years ago
HeNine 8733526c01 search optimization 3 3 years ago
HeNine dd4dede5cd search optimization 2 3 years ago
HeNine 8a6f32975a search optimization 3 years ago
HeNine 8f6fc2b722 hotkeys 3 years ago
HeNine 0c43bcb714 get random line for tagging 3 years ago
HeNine ec2929c99f buscribe api segment path 3 years ago
HeNine 073d6c8769 improved display when speakers are set but line isn't 3 years ago
HeNine 7c60a407be blugh 3 years ago
HeNine ae1ca08dbe add line links 3 years ago
HeNine 9f4f15232f professor deployment 3 years ago
HeNine 46d228c42e preparing professor deployment 3 years ago
HeNine 770a97387a other authentication 3 years ago
HeNine fb2132dd16 front end authentication 3 years ago
HeNine 74155a7f6c idk what google is doing 3 years ago
HeNine 7a3328f5f3 show line context 3 years ago
HeNine e6325b31c8 Templates included. Closes #6 3 years ago
HeNine 2125f9d263 goating there 3 years ago
HeNine 87d6849670 Getting DB transcription up and running 3 years ago
HeNine 22cf34641b Cosmetics 3 years ago
HeNine c0b334695a time-related unbusening, cont. 3 years ago
HeNine 022c271e07 time-related unbusening 3 years ago
HeNine a1a0a47a72 git also sucks 3 years ago
HeNine 337e1fcc23 final touches closes #4 3 years ago
HeNine 22e270b6ff #4 Add bustime search options 3 years ago
HeNine 753784657e channel selection closes #5 3 years ago
HeNine 0a3fc28a01 prepare for channel selection 3 years ago
HeNine f217931fa3 bustime format 3 years ago
HeNine 481581b84f nginx fixest 3 years ago
HeNine 9f90fb822c nginx fixer 3 years ago
HeNine 0e9bf87d61 nginx fix 3 years ago
HeNine 61c69db72f more waiting 3 years ago
HeNine f7c05ff53d less 3 years ago
HeNine 090037f261 End of segments weirdness 3 years ago
HeNine 11954d1a31 Docker compose attempt 1 3 years ago
HeNine 2f4189bc31 One more database fix 3 years ago
HeNine 372c96f29d Update port to conform to wub 3 years ago
HeNine cc49374096 Change start time priority to make database take priority 3 years ago
HeNine 50c4d8a096 Final result sometimes has no content (if segments are missing after silence?) 3 years ago

@ -0,0 +1 @@
models/

@ -0,0 +1,15 @@
#!/bin/bash
VERSION=0.0.0
#bash fetch_models.sh
docker build -f buscribe/Dockerfile -t buscribe:$VERSION .
docker build -f buscribe-api/Dockerfile -t buscribe-api:$VERSION .
docker build -f professor-api/Dockerfile -t professor-api:$VERSION .
docker build -f docker-less/Dockerfile -t lessc .
docker run --rm -v "$(pwd)"/buscribe-web:/buscribe-web lessc /buscribe-web/style.less > buscribe-web/style.css
docker run --rm -v "$(pwd)"/professor:/professor lessc /professor/style.less > professor/style.css
docker build -f nginx/Dockerfile -t buscribe-web:$VERSION .

@ -13,6 +13,7 @@ RUN pip install /tmp/common && rm -r /tmp/common
# Install actual application # Install actual application
RUN apk add postgresql-dev postgresql-libs RUN apk add postgresql-dev postgresql-libs
COPY buscribe-api /tmp/buscribe-api COPY buscribe-api /tmp/buscribe-api
RUN pip install /tmp/buscribe-api && rm -r /tmp/buscribe-api RUN pip install /tmp/buscribe-api && cp -r /tmp/buscribe-api/templates /templates \
&& rm -r /tmp/buscribe-api
ENTRYPOINT ["python3", "-m", "buscribeapi"] ENTRYPOINT ["python3", "-m", "buscribeapi", "--base-dir", "/mnt"]

@ -1,9 +1,8 @@
import json
from datetime import timedelta from datetime import timedelta
import flask as flask
import common import common
from common import dateutil, database import flask as flask
from common import dateutil, database, format_bustime, dt_to_bustime, bustime_to_dt, parse_bustime
from dateutil.parser import ParserError from dateutil.parser import ParserError
from flask import request, jsonify, Response, render_template from flask import request, jsonify, Response, render_template
@ -22,11 +21,6 @@ def create_seconds_timedelta(seconds):
return timedelta(seconds=seconds) return timedelta(seconds=seconds)
def round_bus_time(delta: timedelta):
"""Round bus time down to the second."""
return f'{delta.days * 24 + delta.seconds // 3600:02}:{(delta.seconds % 3600) // 60:02}:{delta.seconds % 60:02}'
@app.route('/buscribe/vtt') @app.route('/buscribe/vtt')
def get_vtt(): def get_vtt():
"""Returns WebVTT subtitle file for the period between start_time and end_time. """Returns WebVTT subtitle file for the period between start_time and end_time.
@ -74,20 +68,32 @@ def get_json():
(https://www.postgresql.org/docs/13/functions-textsearch.html)""" (https://www.postgresql.org/docs/13/functions-textsearch.html)"""
start_time_string = request.args.get('start_time') start_time_string = request.args.get('start_time')
bus_start_time_string = request.args.get('bus_start_time')
if start_time_string is not None: if start_time_string is not None:
try: try:
start_time = dateutil.parse(start_time_string) start_time = dateutil.parse(start_time_string)
except ParserError: except ParserError:
return "Invalid start time!", 400 return "Invalid start time!", 400
elif bus_start_time_string is not None:
try:
start_time = bustime_to_dt(app.bustime_start, parse_bustime(bus_start_time_string))
except ValueError:
return "Invalid bus end time!", 400
else: else:
start_time = None start_time = None
end_time_string = request.args.get('end_time') end_time_string = request.args.get('end_time')
bus_end_time_string = request.args.get('bus_end_time')
if end_time_string is not None: if end_time_string is not None:
try: try:
end_time = dateutil.parse(end_time_string) end_time = dateutil.parse(end_time_string)
except ParserError: except ParserError:
return "Invalid end time!", 400 return "Invalid end time!", 400
elif bus_end_time_string is not None:
try:
end_time = bustime_to_dt(app.bustime_start, parse_bustime(bus_end_time_string))
except ValueError:
return "Invalid bus end time!", 400
else: else:
end_time = None end_time = None
@ -103,35 +109,120 @@ def get_json():
return jsonify([{"id": row.id, return jsonify([{"id": row.id,
"start_time": row.start_time.isoformat(), "start_time": row.start_time.isoformat(),
"start_bus_time": round_bus_time(row.start_time - app.bustime_start), "start_bus_time": format_bustime(dt_to_bustime(app.bustime_start, row.start_time), "second"),
"end_time": row.end_time.isoformat(), "end_time": row.end_time.isoformat(),
"end_bus_time": round_bus_time(row.start_time - app.bustime_start), "end_bus_time": format_bustime(dt_to_bustime(app.bustime_start, row.end_time), "second"),
"verifier": row.verifier, "verifier": row.verifier,
"speakers": row.names, "speakers": row.names,
"text": row.highlighted_text if row.highlighted_text is not None else ""} for row in results]) "text": row.highlighted_text if row.highlighted_text is not None else ""} for row in results])
def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset=None): def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset=None):
query = "SELECT *" + \ query = f"""
( WITH q AS (
",ts_headline(transcription_line, convert_query(%(text_query)s), 'StartSel=''<span class=\"highlight\">'', StopSel=</span>') AS highlighted_text" if ts_query is not None else ",transcription_line AS highlighted_text") + \ SELECT convert_query(%(text_query)s)
" FROM buscribe_all_transcriptions WHERE start_time >= %(start_time)s AND end_time <= %(end_time)s " ),
time_window AS (
if ts_query is not None: SELECT id
query += "AND (coalesce(transcription_line_ts, ''::tsvector) || coalesce(names_ts, ''::tsvector)) @@ " \ FROM buscribe_transcriptions
"convert_query(%(text_query)s) " \ WHERE start_time >= %(start_time)s
"ORDER BY ts_rank_cd(coalesce(transcription_line_ts, ''::tsvector) || coalesce(names_ts, ''::tsvector), convert_query(%(text_query)s)) DESC, " \ AND end_time <= %(end_time)s
"start_time " ),
else: relevant_lines AS (
query += "ORDER BY start_time " (
SELECT id
if limit is not None: FROM buscribe_transcriptions
query += "LIMIT %(limit)s " WHERE id IN (SELECT id FROM time_window)
{"AND to_tsvector('english', transcription_line) @@ (SELECT * FROM q)" if ts_query else ""}
if offset is not None: )
query += "OFFSET %(limit)s " UNION
(
query += ";" SELECT line
FROM buscribe_verified_lines
WHERE line IN (SELECT id FROM time_window)
{"AND to_tsvector('english', verified_line) @@ (SELECT * FROM q)" if ts_query else ""}
)
UNION
(
SELECT line
FROM buscribe_line_speakers
INNER JOIN buscribe_speakers ON buscribe_line_speakers.speaker = buscribe_speakers.id
WHERE line IN (SELECT id FROM time_window)
{"AND to_tsvector(name) @@ (SELECT * FROM q)" if ts_query else ""}
)
UNION
(
SELECT line
FROM buscribe_line_inferred_speakers
INNER JOIN buscribe_speakers ON buscribe_line_inferred_speakers.speaker = buscribe_speakers.id
WHERE line IN (SELECT id FROM time_window)
{"AND to_tsvector(name) @@ (SELECT * FROM q)" if ts_query else ""}
)
)
(
(SELECT id,
start_time,
end_time,
null AS verifier,
names,
transcription_line,
ts_rank_cd(coalesce(to_tsvector('english', transcription_line), ''::tsvector) ||
coalesce(to_tsvector(array_to_string(names, ' ')), ''::tsvector), (SELECT * FROM q)) AS rank,
ts_headline(transcription_line,
(SELECT * FROM q), 'StartSel=''<span class=\"highlight\">'', StopSel=</span>') AS highlighted_text,
transcription_json
FROM buscribe_transcriptions
LEFT OUTER JOIN (SELECT line, array_agg(name) AS names
FROM buscribe_line_inferred_speakers
INNER JOIN buscribe_speakers
ON buscribe_line_inferred_speakers.speaker = buscribe_speakers.id
GROUP BY line) AS inferred_speakers ON id = inferred_speakers.line
WHERE id IN (SELECT id FROM relevant_lines)
)
UNION
(
SELECT buscribe_transcriptions.id AS id,
start_time,
end_time,
cverifier AS verifier,
names,
coalesce(verifications.verified_line,
buscribe_transcriptions.transcription_line) AS transcription_line,
ts_rank_cd(coalesce(
setweight(to_tsvector('english', verified_line), 'C'),
to_tsvector('english', buscribe_transcriptions.transcription_line),
''::tsvector) ||
coalesce(setweight(to_tsvector(array_to_string(names, ' ')), 'C'), ''::tsvector),
(SELECT * FROM q)) AS rank,
ts_headline(coalesce(verifications.verified_line, buscribe_transcriptions.transcription_line),
(SELECT * FROM q), 'StartSel=''<span class=\"highlight\">'', StopSel=</span>') AS highlighted_text,
null AS transcription_json
FROM buscribe_transcriptions
INNER JOIN (
SELECT *,
coalesce(relevant_verified.line, relevant_speakers.line) AS cline,
coalesce(relevant_verified.verifier, relevant_speakers.verifier) AS cverifier
FROM (SELECT *
FROM buscribe_verified_lines
WHERE line IN (SELECT id FROM relevant_lines)) AS relevant_verified
FULL OUTER JOIN
(SELECT line, verifier, array_agg(name) AS names
FROM buscribe_line_speakers
INNER JOIN buscribe_speakers
ON buscribe_line_speakers.speaker = buscribe_speakers.id
WHERE line IN (SELECT id FROM relevant_lines)
GROUP BY line, verifier) AS relevant_speakers
ON relevant_verified.line = relevant_speakers.line AND
relevant_speakers.verifier = relevant_verified.verifier) AS verifications
ON id = verifications.cline
)
)
ORDER BY
{"rank DESC," if ts_query is not None else ""}
start_time
{"OFFSET %(offset)s" if offset is not None else ""}
{"LIMIT %(limit)s" if limit is not None else ""};
"""
return database.query(db_conn, query, return database.query(db_conn, query,
start_time=start_time if start_time is not None else '-infinity', start_time=start_time if start_time is not None else '-infinity',

@ -37,7 +37,8 @@ def servelet(server):
logging.info('Starting WSGI server.') logging.info('Starting WSGI server.')
server.serve_forever() server.serve_forever()
@argh.arg('channel',
help="Twitch channel to transcribe.")
@argh.arg('--host', @argh.arg('--host',
help='Address or socket server will listen to. Default is 0.0.0.0 (everything on the local machine).') help='Address or socket server will listen to. Default is 0.0.0.0 (everything on the local machine).')
@argh.arg('--port', @argh.arg('--port',
@ -49,7 +50,7 @@ def servelet(server):
help='The start time in UTC for the event, for UTC-Bustime conversion') help='The start time in UTC for the event, for UTC-Bustime conversion')
@argh.arg('--base-dir', @argh.arg('--base-dir',
help='Directory from which segments will be grabbed. Default is current working directory.') help='Directory from which segments will be grabbed. Default is current working directory.')
def main(database="", host='0.0.0.0', port=8010, bustime_start=None, base_dir=None): def main(channel, database="", host='0.0.0.0', port=8010, bustime_start=None, base_dir=None):
if bustime_start is None: if bustime_start is None:
logging.error("Missing --bustime-start!") logging.error("Missing --bustime-start!")
exit(1) exit(1)
@ -62,7 +63,7 @@ def main(database="", host='0.0.0.0', port=8010, bustime_start=None, base_dir=No
logging.error("Invalid --bustime-start!") logging.error("Invalid --bustime-start!")
exit(1) exit(1)
app.segments_dir = base_dir app.segments_dir = os.path.join(base_dir, channel, "source")
app.db_manager = DBManager(dsn=database) app.db_manager = DBManager(dsn=database)

@ -6,6 +6,12 @@
margin-bottom: 1em; margin-bottom: 1em;
div {
margin: 0;
padding: 0;
display: flex;
}
label { label {
display: inline-block; display: inline-block;
font-family: @sans-serif; font-family: @sans-serif;
@ -14,7 +20,7 @@
padding: 0.2em; padding: 0.2em;
} }
#text_search_line{ #text_search_line {
display: flex; display: flex;
flex-direction: row; flex-direction: row;
@ -29,10 +35,18 @@
#time_search_line { #time_search_line {
display: flex; display: flex;
flex-direction: row; flex-direction: row;
flex-wrap: wrap;
input[type=datetime-local] { div {
align-items: center;
}
input[type=datetime-local], input[type=text] {
width: 13em; width: 13em;
}
input[type=text] {
text-align: right;
} }
#search_button { #search_button {

@ -9,15 +9,27 @@
<body onload="onSiteLoad()"> <body onload="onSiteLoad()">
<div id="search_tools"> <div id="search_tools">
<div id="text_search_line" class="form_line"> <div id="text_search_line" class="form_line">
<label for="search_text">Search</label> <input type="search" id="search_text" oninput="doSearch()" <label for="search_text">Search</label> <input type="search" id="search_text" oninput="doSearch()"
placeholder="Supports quotes, 'or' and -."> placeholder="Supports quotes, 'or' and -.">
</div>
<div id="time_search_line" class="form_line">
<div><label for="start_time">Start time</label> <input id="start_time" type="datetime-local" autocomplete="off"></div>
<div><label for="end_time">End time</label> <input id="end_time" type="datetime-local" autocomplete="off"></div>
<div>
<label for="channel_select">Channel</label><select id="channel_select">
<option value="desertbus" selected>desertbus</option>
<option value="loadingreadyrun">loadingreadyrun</option>
</select>
</div> </div>
<div id="time_search_line" class="form_line"> <div>
<label for="start_time">Start time</label> <input id="start_time" type="datetime-local"> <label>Time type</label>
<label for="end_time">End time</label> <input id="end_time" type="datetime-local"> <input type="radio" name="time_type" id="UTC_time_radio" oninput="switchToUTC()" checked autocomplete="off"><label for="UTC_time_radio">UTC Time</label>
<button id="search_button" onclick="doSearch()" type="button">Search</button> <input type="radio" name="time_type" id="bus_time_radio" oninput="switchToBus()" autocomplete="off"><label for="bus_time_radio">Bus Time</label>
</div> </div>
<button id="search_button" onclick="doSearch()" type="button">Search</button>
</div>
</div> </div>
<div id="results"> <div id="results">

@ -60,6 +60,20 @@
grid-column: text; grid-column: text;
} }
.line_links {
text-align: right;
grid-column: times;
a {
margin-left: 0.5em;
font-size: small;
font-family: @sans-serif;
color: lightgray;
text-align: right;
}
}
} }
.line.verified { .line.verified {

@ -11,11 +11,13 @@ function onSiteLoad(e) {
function query(text, start_time, end_time) { function query(text, start_time, end_time) {
let query_string = "" let query_string = ""
const time_type = document.getElementById("UTC_time_radio").checked ? "" : "bus_";
if (start_time !== "") { if (start_time !== "") {
query_string += `start_time=${start_time}`; query_string += `${time_type}start_time=${start_time}`;
} }
if (end_time !== "") { if (end_time !== "") {
query_string += `&end_time=${end_time}`; query_string += `&${time_type}end_time=${end_time}`;
} }
if (text !== "") { if (text !== "") {
query_string += `&query=${text}` query_string += `&query=${text}`
@ -23,9 +25,10 @@ function query(text, start_time, end_time) {
query_string += "&limit=30"; query_string += "&limit=30";
fetch(`http://localhost:8010/buscribe/json?${query_string}`) const channel = document.getElementById("channel_select").value;
fetch(`https://wubloader.raptorpond.com/buscribe/${channel}/json?${query_string}`)
.then(response => response.json()) .then(response => response.json())
// .then(response => console.log(response.error()))
.then(fillResults) .then(fillResults)
} }
@ -42,6 +45,8 @@ function fillResults(results) {
const results_element = document.getElementById("results") const results_element = document.getElementById("results")
results_element.innerHTML = "" results_element.innerHTML = ""
const channel = document.getElementById("channel_select").value;
for (const line of results) { for (const line of results) {
const line_div = document.createElement("div"); const line_div = document.createElement("div");
@ -56,9 +61,42 @@ function fillResults(results) {
<div class="line_speakers">${line.speakers == null ? "" : line.speakers.join(", ")}</div> <div class="line_speakers">${line.speakers == null ? "" : line.speakers.join(", ")}</div>
<div class="line_start_time">${line.start_time}</div> <div class="line_start_time">${line.start_time}</div>
<div class="line_text">${line.text}</div> <div class="line_text">${line.text}</div>
<div class="line_links">
<a href="/professor/professor.html?line=${line.id}">Edit</a>
<a href="javascript:showContext('${line.start_time}');">Show context</a>
</div>
`; `;
results_element.append(line_div) results_element.append(line_div)
} }
}
function switchToUTC() {
document.getElementById("start_time").type = "datetime-local";
document.getElementById("end_time").type = "datetime-local";
}
function switchToBus() {
document.getElementById("start_time").type = "text";
document.getElementById("end_time").type = "text";
}
function showContext(time) {
let start_time = new Date(time + "Z");
start_time.setMinutes(start_time.getMinutes() - 3);
let start_time_string = start_time.toISOString();
start_time_string = start_time_string.substring(0, start_time_string.length - 1)
let end_time = new Date(time + "Z");
end_time.setMinutes(end_time.getMinutes() + 3);
let end_time_string = end_time.toISOString();
end_time_string = end_time_string.substring(0, end_time_string.length - 1)
document.getElementById("start_time").value = start_time_string;
document.getElementById("end_time").value = end_time_string;
document.getElementById("search_text").value = "";
doSearch();
} }

@ -1,9 +1,9 @@
FROM debian:latest FROM debian:11
RUN apt update &&\ RUN apt update &&\
apt install -y python3 libpq-dev python3-pip curl unzip ffmpeg apt install -y python3 libpq-dev python3-pip curl unzip ffmpeg
COPY ../common /tmp/common COPY common /tmp/common
RUN pip install /tmp/common && rm -r /tmp/common RUN pip install /tmp/common && rm -r /tmp/common
COPY buscribe /tmp/buscribe COPY buscribe /tmp/buscribe

@ -84,7 +84,7 @@ def get_end_of_transcript(db_cursor):
"""Grab the end timestamp of the current transcript. """Grab the end timestamp of the current transcript.
If there is no existing transcript returns default; used for cold starts.""" If there is no existing transcript returns default; used for cold starts."""
db_cursor.execute("SELECT end_time FROM buscribe.public.buscribe_transcriptions ORDER BY end_time DESC LIMIT 1") db_cursor.execute("SELECT end_time FROM buscribe_transcriptions ORDER BY end_time DESC LIMIT 1")
end_of_transcript_row = db_cursor.fetchone() end_of_transcript_row = db_cursor.fetchone()
return end_of_transcript_row.end_time if end_of_transcript_row is not None else None return end_of_transcript_row.end_time if end_of_transcript_row is not None else None
@ -94,9 +94,10 @@ def finish_off_recognizer(recognizer: BuscribeRecognizer, db_cursor):
"""Flush the recognizer, commit the final line to the database and reset it.""" """Flush the recognizer, commit the final line to the database and reset it."""
final_result_json = json.loads(recognizer.final_result()) # Flush the tubes final_result_json = json.loads(recognizer.final_result()) # Flush the tubes
line_start_time = recognizer.segments_start_time + timedelta(seconds=final_result_json["result"][0]["start"]) if "result" in final_result_json:
line_end_time = recognizer.segments_start_time + timedelta(seconds=final_result_json["result"][-1]["end"]) line_start_time = recognizer.segments_start_time + timedelta(seconds=final_result_json["result"][0]["start"])
line_end_time = recognizer.segments_start_time + timedelta(seconds=final_result_json["result"][-1]["end"])
write_line(final_result_json, line_start_time, line_end_time, db_cursor) write_line(final_result_json, line_start_time, line_end_time, db_cursor)
recognizer.reset() recognizer.reset()

@ -1,6 +1,6 @@
import logging import logging
import os import os
from datetime import timedelta, datetime from datetime import timedelta, datetime, timezone
from time import sleep from time import sleep
import argh import argh
@ -27,13 +27,15 @@ from buscribe.recognizer import BuscribeRecognizer
help='Start time of the transcript. Buscript will try to start reading 2 min before this time, if available, ' help='Start time of the transcript. Buscript will try to start reading 2 min before this time, if available, '
'to prime the model. The transcripts for that time will not be written to the database. If not given ' 'to prime the model. The transcripts for that time will not be written to the database. If not given '
'transcription will start after last already transcribed line.') 'transcription will start after last already transcribed line.')
@argh.arg('--start-time-override',
help='Ignore database and force override the start time.')
@argh.arg('--end-time', @argh.arg('--end-time',
help='End of transcript. If not given continues to transcribe live.') help='End of transcript. If not given continues to transcribe live.')
@argh.arg('--base-dir', @argh.arg('--base-dir',
help='Directory from which segments will be grabbed. Default is current working directory.') help='Directory from which segments will be grabbed. Default is current working directory.')
def main(channel, database="", base_dir=".", def main(channel, database="", base_dir=".",
model="/usr/share/buscribe/vosk-model-en-us-0.21/", spk_model="/usr/share/buscribe/vosk-model-spk-0.4/", model="/usr/share/buscribe/vosk-model-en-us-0.21/", spk_model="/usr/share/buscribe/vosk-model-spk-0.4/",
start_time=None, end_time=None): start_time=None, end_time=None, start_time_override=None):
SAMPLE_RATE = 48000 SAMPLE_RATE = 48000
segments_dir = os.path.join(base_dir, channel, "source") segments_dir = os.path.join(base_dir, channel, "source")
@ -44,19 +46,27 @@ def main(channel, database="", base_dir=".",
logging.debug("Got database cursor.") logging.debug("Got database cursor.")
logging.info("Figuring out starting time...") logging.info("Figuring out starting time...")
if start_time is not None: db_start_time = get_end_of_transcript(db_cursor)
# ~~Database start time takes priority~~
# Overrride takes priority
if start_time_override is not None:
start_time = dateutil.parse(start_time_override)
elif db_start_time is not None:
start_time = db_start_time
elif start_time is not None:
start_time = dateutil.parse(start_time) start_time = dateutil.parse(start_time)
else: else:
start_time = get_end_of_transcript(db_cursor) # No start time argument AND no end of transcript (empty database)
logging.error("Couldn't figure out start time!")
db_conn.close()
exit(1)
logging.info("Start time: {}".format(start_time))
if end_time is not None: if end_time is not None:
end_time = dateutil.parse(end_time) end_time = dateutil.parse(end_time)
# No start time argument AND no end of transcript (empty database) logging.info("End time: {}".format(end_time))
if start_time is None:
logging.error("Couldn't figure out start time!")
db_conn.close()
exit(1)
logging.info("Loading models...") logging.info("Loading models...")
recognizer = BuscribeRecognizer(SAMPLE_RATE, model, spk_model) recognizer = BuscribeRecognizer(SAMPLE_RATE, model, spk_model)
@ -77,15 +87,34 @@ def main(channel, database="", base_dir=".",
gevent.signal_handler(signal.SIGTERM, stop) gevent.signal_handler(signal.SIGTERM, stop)
while True: while start_time < end_time:
# If end time isn't given, use current time (plus fudge) to get a "live" segment list # If end time isn't given, use current time (plus fudge) to get a "live" segment list
segments = common.get_best_segments(segments_dir, segments = common.get_best_segments(segments_dir,
start_time, start_time,
end_time if end_time is not None else datetime.now() + timedelta(minutes=2)) end_time if end_time is not None else
# Remove initial None segment if it exists datetime.utcnow() + timedelta(minutes=2))
# If there is a hole at the start of the requested range because
if segments[0] is None:
# The hole is older than a minute, therefore
# - reset recognizer
# - continue from existing segments
if datetime.utcnow() - start_time > timedelta(minutes=1):
finish_off_recognizer(recognizer, db_cursor)
# If the hole is less than a minute old, or if we don't have new segments: wait for segments
if datetime.utcnow() - start_time <= timedelta(minutes=1) or \
segments == [None]:
logging.info("Waiting for new or backfilled segments.")
sleep(30)
continue # Retry
# Remove initial None segment (indicating segments start time is after desired start time) if it exists
if segments[0] is None: if segments[0] is None:
segments = segments[1:] segments = segments[1:]
# Recognizer is fresh or was reset
if recognizer.segments_start_time is None: if recognizer.segments_start_time is None:
recognizer.segments_start_time = segments[0].start recognizer.segments_start_time = segments[0].start
logging.info(f"Starting from: {segments[0].start}") logging.info(f"Starting from: {segments[0].start}")
@ -99,14 +128,5 @@ def main(channel, database="", base_dir=".",
finish_off_recognizer(recognizer, db_cursor) finish_off_recognizer(recognizer, db_cursor)
db_conn.close() db_conn.close()
exit(0) exit(0)
elif datetime.now() - segments_end_time > timedelta(minutes=5):
# Last seen segment ended more than five minutes ago. We hit a gap that will likely stay unfilled.
# Reset and jump to the other end of the gap.
finish_off_recognizer(recognizer, db_cursor)
else:
# End of live segment or a gap that is not old and might get filled.
# Give it a bit of time and continue.
# Note: if the gap is not filled within 30s, we jump to the next available segment.
sleep(30)
start_time = segments_end_time start_time = segments_end_time

@ -7,8 +7,10 @@ setup(
install_requires = [ install_requires = [
"argh", "argh",
"psycopg2", "psycopg2",
"gevent==1.5a2", #"gevent==1.5a2",
"greenlet==0.4.16", "gevent",
#"greenlet==0.4.16",
"greenlet",
"psycogreen", "psycogreen",
"wubloader-common", "wubloader-common",
"python-dateutil", "python-dateutil",

@ -49,9 +49,9 @@ CREATE TABLE buscribe_verifiers
); );
-- For testing -- For testing
INSERT INTO buscribe_verifiers(email, name) -- INSERT INTO buscribe_verifiers(email, name)
VALUES ('placeholder@example.com', 'Place Holder'), -- VALUES ('placeholder@example.com', 'Place Holder'),
('aguy@example.com', 'Arnold Guyana'); -- ('aguy@example.com', 'Arnold Guyana');
CREATE TABLE buscribe_line_speakers CREATE TABLE buscribe_line_speakers
( (
@ -62,6 +62,13 @@ CREATE TABLE buscribe_line_speakers
PRIMARY KEY (line, speaker, verifier) PRIMARY KEY (line, speaker, verifier)
); );
CREATE TABLE buscribe_line_inferred_speakers
(
line BIGINT NOT NULL REFERENCES buscribe_transcriptions,
speaker BIGINT NOT NULL REFERENCES buscribe_speakers,
PRIMARY KEY (line, speaker)
);
CREATE TABLE buscribe_verified_lines CREATE TABLE buscribe_verified_lines
( (
-- id BIGSERIAL PRIMARY KEY, -- id BIGSERIAL PRIMARY KEY,
@ -83,12 +90,13 @@ CREATE VIEW buscribe_all_transcriptions AS
SELECT buscribe_transcriptions.id, SELECT buscribe_transcriptions.id,
start_time, start_time,
end_time, end_time,
coalesce(buscribe_verified_lines.verifier, speakers.verifier) AS verifier, coalesce(buscribe_verified_lines.verifier, speakers.verifier) AS verifier,
names, names,
verified_line AS transcription_line, coalesce(verified_line, buscribe_transcriptions.transcription_line) AS transcription_line,
setweight(to_tsvector('english', verified_line), 'C') AS transcription_line_ts, coalesce(setweight(to_tsvector('english', verified_line), 'C'),
setweight(to_tsvector(array_to_string(names, ' ')), 'C') AS names_ts, to_tsvector('english', buscribe_transcriptions.transcription_line)) AS transcription_line_ts,
null AS transcription_json setweight(to_tsvector(array_to_string(names, ' ')), 'C') AS names_ts,
null AS transcription_json
FROM buscribe_transcriptions FROM buscribe_transcriptions
LEFT OUTER JOIN buscribe_verified_lines ON buscribe_transcriptions.id = buscribe_verified_lines.line LEFT OUTER JOIN buscribe_verified_lines ON buscribe_transcriptions.id = buscribe_verified_lines.line
LEFT OUTER JOIN ( LEFT OUTER JOIN (
@ -106,15 +114,66 @@ SELECT id,
start_time, start_time,
end_time, end_time,
null AS verifier, null AS verifier,
null AS names, names,
transcription_line, transcription_line,
to_tsvector('english', transcription_line) AS transcription_line_ts, to_tsvector('english', transcription_line) AS transcription_line_ts,
null AS names_ts, null AS names_ts,
transcription_json transcription_json
FROM buscribe_transcriptions; FROM buscribe_transcriptions
LEFT OUTER JOIN (
SELECT line, array_agg(name) AS names
FROM buscribe_line_inferred_speakers
INNER JOIN buscribe_speakers ON buscribe_line_inferred_speakers.speaker = buscribe_speakers.id
GROUP BY line
) AS speakers ON id = speakers.line;
ROLLBACK; ROLLBACK;
CREATE VIEW buscribe_all_transcriptions2 AS
SELECT buscribe_transcriptions.id,
start_time,
end_time,
coalesce(buscribe_verified_lines.verifier, speakers.verifier) AS verifier,
names,
coalesce(verified_line, buscribe_transcriptions.transcription_line) AS transcription_line,
to_tsvector('english', buscribe_transcriptions.transcription_line) AS machine_line_ts,
setweight(to_tsvector('english', verified_line), 'C') AS verified_line_ts,
coalesce(setweight(to_tsvector('english', verified_line), 'C'),
to_tsvector('english', buscribe_transcriptions.transcription_line)) AS transcription_line_ts,
setweight(to_tsvector(array_to_string(names, ' ')), 'C') AS names_ts,
null AS transcription_json
FROM buscribe_transcriptions
LEFT OUTER JOIN buscribe_verified_lines ON buscribe_transcriptions.id = buscribe_verified_lines.line
LEFT OUTER JOIN (
SELECT line, verifier, array_agg(name) AS names
FROM buscribe_line_speakers
INNER JOIN buscribe_speakers ON buscribe_line_speakers.speaker = buscribe_speakers.id
GROUP BY line, verifier
) AS speakers ON buscribe_transcriptions.id = speakers.line AND (
speakers.verifier = buscribe_verified_lines.verifier OR
buscribe_verified_lines.verifier IS NULL
)
WHERE coalesce(buscribe_verified_lines.verifier, speakers.verifier) IS NOT NULL
UNION
SELECT id,
start_time,
end_time,
null AS verifier,
names,
transcription_line,
to_tsvector('english', transcription_line) AS machine_line_ts,
null AS verified_line_ts,
to_tsvector('english', transcription_line) AS transcription_line_ts,
null AS names_ts,
transcription_json
FROM buscribe_transcriptions
LEFT OUTER JOIN (
SELECT line, array_agg(name) AS names
FROM buscribe_line_inferred_speakers
INNER JOIN buscribe_speakers ON buscribe_line_inferred_speakers.speaker = buscribe_speakers.id
GROUP BY line
) AS speakers ON id = speakers.line;
-- Convert last lexeme in a query to prefix query. -- Convert last lexeme in a query to prefix query.
CREATE FUNCTION convert_query(query_text text) RETURNS tsquery AS CREATE FUNCTION convert_query(query_text text) RETURNS tsquery AS
$$ $$
@ -123,4 +182,4 @@ DECLARE
BEGIN BEGIN
RETURN (CASE WHEN ws_query != '' THEN ws_query || ':*' ELSE '' END)::tsquery; RETURN (CASE WHEN ws_query != '' THEN ws_query || ':*' ELSE '' END)::tsquery;
END; END;
$$ LANGUAGE plpgsql; $$ LANGUAGE plpgsql;

@ -0,0 +1,130 @@
version: "3"
services:
buscribenginx:
image: buscribe-web:0.0.0
ports:
- "8020:80"
volumes:
- /srv/wubloader/segments:/usr/share/nginx/html/segments
networks:
- default
- wubloader_default
- traefik_network
labels:
- "traefik.docker.network=traefik_network"
- "traefik.http.routers.buscribe-router.rule=Host(`wubloader.raptorpond.com`)"
- "traefik.http.routers.buscribe-redirect.rule=Host(`wubloader.raptorpond.com`)"
- "traefik.http.routers.buscribe-redirect.entrypoints=web"
- "traefik.http.routers.buscribe-router.tls=true"
- "traefik.http.routers.buscribe-router.tls.certresolver=leresolver"
- "traefik.http.middlewares.buscribe-redirectscheme.redirectscheme.scheme=https"
- "traefik.http.middlewares.buscribe-redirectscheme.redirectscheme.permanent=true"
- "traefik.http.routers.buscribe-redirect.middlewares=buscribe-redirectscheme@docker"
restart: "on-failure"
# buscribelrr:
# image: buscribe:0.0.0
# command: [ "loadingreadyrun",
# "--start-time=2022-11-11T12:00:00Z",
# "--end-time=2022-11-20T22:00:00Z",
# "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_lrr",
# "--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
# volumes:
# - /srv/wubloader/segments:/mnt
# buscribedb:
# image: buscribe:0.0.0
# command: [ "desertbus",
# "--start-time=2023-11-10T12:00:00Z",
# "--end-time=2023-11-15T00:00:00Z",
# "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
# "--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
# volumes:
# - /srv/wubloader/segments:/mnt
buscribedb0:
image: buscribe:0.0.0
command: [ "desertbus",
"--start-time-override=2023-11-19T00:00:00Z",
"--end-time=2023-11-19T06:00:00Z",
"--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
volumes:
- /srv/wubloader/segments:/mnt
buscribedb1:
image: buscribe:0.0.0
command: [ "desertbus",
"--start-time-override=2023-11-18T06:00:00Z",
"--end-time=2023-11-18T12:00:00Z",
"--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
volumes:
- /srv/wubloader/segments:/mnt
buscribedb2:
image: buscribe:0.0.0
command: [ "desertbus",
"--start-time-override=2023-11-18T12:00:00Z",
"--end-time=2023-11-18T18:00:00Z",
"--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
volumes:
- /srv/wubloader/segments:/mnt
buscribedb3:
image: buscribe:0.0.0
command: [ "desertbus",
"--start-time-override=2023-11-18T18:00:00Z",
"--end-time=2023-11-19T00:00:00Z",
"--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
volumes:
- /srv/wubloader/segments:/mnt
# buscribeapilrr:
# image: buscribe-api:0.0.0
# command: [
# "loadingreadyrun",
# "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_lrr",
# "--bustime-start=2023-11-11T22:00:00Z" ]
buscribeapidb:
image: buscribe-api:0.0.0
command: [
"desertbus",
"--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--bustime-start=2023-11-11T22:00:00Z" ]
volumes:
- /srv/wubloader/segments:/mnt
professorapidb:
image: professor-api:0.0.0
command: [
"--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--bustime-start=2023-11-11T22:00:00Z" ]
postgres:
image: postgres:13
ports:
- "7654:5432"
environment:
- POSTGRES_USER=vst
- POSTGRES_DB=postgres
- POSTGRES_PASSWORD=flnMSYPRf
volumes:
- /srv/buscribe/postgres:/var/lib/postgresql/data
restart: "unless-stopped"
postgres-prometheus:
image: quay.io/prometheuscommunity/postgres-exporter
ports:
- "9187:9187"
environment:
- DATA_SOURCE_NAME=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_lrr?sslmode=disable
networks:
wubloader_default:
external: true
traefik_network:
external: true

@ -0,0 +1,5 @@
FROM node:17-alpine
RUN npm install less -g
ENTRYPOINT ["lessc"]

@ -0,0 +1,5 @@
FROM nginx:latest
COPY buscribe-web /usr/share/nginx/html/buscribe
COPY professor /usr/share/nginx/html/professor
COPY nginx/nginx.conf /etc/nginx/nginx.conf

@ -0,0 +1,57 @@
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log notice;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
sendfile on;
#tcp_nopush on;
keepalive_timeout 65;
gzip on;
gzip_comp_level 9;
absolute_redirect off;
server {
listen 80;
server_name localhost;
#access_log /var/log/nginx/host.access.log main;
location / { proxy_pass http://nginx; }
location /buscribelrr {
alias /usr/share/nginx/html/buscribe;
}
location /buscribe {
alias /usr/share/nginx/html/buscribe;
}
location /professor {
alias /usr/share/nginx/html/professor;
}
#location /buscribe/loadingreadyrun/json { proxy_pass http://buscribeapilrr:8010/buscribe/json; }
location /buscribe/desertbus/json { proxy_pass http://buscribeapidb:8010/buscribe/json; }
location /professor/desertbus { proxy_pass http://professorapidb:8011/professor; }
}
}

@ -13,10 +13,10 @@ from professor_api.professor_api import app
def cors(app): def cors(app):
"""WSGI middleware that sets CORS headers""" """WSGI middleware that sets CORS headers"""
HEADERS = [ HEADERS = [
("Access-Control-Allow-Credentials", "false"), ("Access-Control-Allow-Credentials", "true"),
("Access-Control-Allow-Headers", "*"), ("Access-Control-Allow-Headers", "content-type"),
("Access-Control-Allow-Methods", "GET,HEAD,POST,PUT"), ("Access-Control-Allow-Methods", "GET,HEAD,POST,PUT"),
("Access-Control-Allow-Origin", "*"), ("Access-Control-Allow-Origin", "http://localhost:63342,https://wubloader.raptorpond.com"),
("Access-Control-Expose-Headers", "*"), ("Access-Control-Expose-Headers", "*"),
("Access-Control-Max-Age", "86400"), ("Access-Control-Max-Age", "86400"),
] ]
@ -45,7 +45,7 @@ def servelet(server):
'postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE') 'postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE')
@argh.arg('--bustime-start', @argh.arg('--bustime-start',
help='The start time in UTC for the event, for UTC-Bustime conversion') help='The start time in UTC for the event, for UTC-Bustime conversion')
def main(database="", host='0.0.0.0', port=8005, bustime_start=None): def main(database="", host='0.0.0.0', port=8011, bustime_start=None):
if bustime_start is None: if bustime_start is None:
logging.error("Missing --bustime-start!") logging.error("Missing --bustime-start!")
exit(1) exit(1)

@ -1,5 +1,7 @@
import re import re
import urllib.parse import urllib.parse
from functools import wraps
from random import randrange
import flask import flask
import gevent import gevent
@ -8,9 +10,51 @@ from flask import jsonify, request, copy_current_request_context
from gevent import sleep from gevent import sleep
from psycopg2.extras import execute_values from psycopg2.extras import execute_values
from google.oauth2 import id_token
from google.auth.transport import requests
app = flask.Flask('buscribe') app = flask.Flask('buscribe')
def authenticate(f):
"""Authenticate a token against the database.
Reference: https://developers.google.com/identity/sign-in/web/backend-auth
https://developers.google.com/identity/gsi/web/guides/verify-google-id-token#using-a-google-api-client-library"""
@wraps(f)
def auth_wrapper(*args, **kwargs):
try:
user_token = request.cookies.get("credentials")
print(user_token)
except (KeyError, TypeError):
return 'User token required', 401
try:
idinfo = id_token.verify_oauth2_token(user_token, requests.Request(),
"164084252563-kaks3no7muqb82suvbubg7r0o87aip7n.apps.googleusercontent.com")
if idinfo['iss'] not in ['accounts.google.com', 'https://accounts.google.com']:
raise ValueError('Wrong issuer.')
except ValueError:
return 'Invalid token. Access denied.', 403
# check whether user is in the database
email = idinfo['email'].lower()
conn = app.db_manager.get_conn()
results = database.query(conn, """
SELECT email
FROM buscribe_verifiers
WHERE lower(email) = %s""", email)
row = results.fetchone()
if row is None:
return 'Unknown user. Access denied.', 403
return f(*args, editor=email, **kwargs)
return auth_wrapper
@app.route('/professor/line/<int:line_id>', methods=["GET"]) @app.route('/professor/line/<int:line_id>', methods=["GET"])
def get_line(line_id): def get_line(line_id):
db_conn = app.db_manager.get_conn() db_conn = app.db_manager.get_conn()
@ -20,7 +64,27 @@ def get_line(line_id):
if line is None: if line is None:
return "Line not found.", 404 return "Line not found.", 404
else: else:
return {"start_time": line.start_time.isoformat(), return {"id": line.id,
"start_time": line.start_time.isoformat(),
"end_time": line.end_time.isoformat(),
"line_data": line.transcription_json}
@app.route('/professor/line/random', methods=["GET"])
def get_random_line():
db_conn = app.db_manager.get_conn()
n_lines = database.query(db_conn, "SELECT count(*) AS n_lines FROM buscribe_transcriptions;").fetchone().n_lines
row = randrange(n_lines)
line = database.query(db_conn, "SELECT * FROM buscribe_transcriptions OFFSET %(row)s LIMIT 1;", row=row).fetchone()
if line is None:
return "Line not found.", 404
else:
return {"id": line.id,
"start_time": line.start_time.isoformat(),
"end_time": line.end_time.isoformat(), "end_time": line.end_time.isoformat(),
"line_data": line.transcription_json} "line_data": line.transcription_json}
@ -42,12 +106,13 @@ def get_playlist(line_id):
#EXT-X-TARGETDURATION:{duration.total_seconds()} #EXT-X-TARGETDURATION:{duration.total_seconds()}
#EXT-X-PROGRAM-DATE-TIME:{start_time_iso} #EXT-X-PROGRAM-DATE-TIME:{start_time_iso}
#EXTINF:{duration.total_seconds()} #EXTINF:{duration.total_seconds()}
//localhost/cut/desertbus/source.ts?start={urllib.parse.quote_plus(start_time_iso)}&end={urllib.parse.quote_plus(end_time_iso)}&type=rough&allow_holes=true /cut/desertbus/source.ts?start={urllib.parse.quote_plus(start_time_iso)}&end={urllib.parse.quote_plus(end_time_iso)}&type=rough&allow_holes=true
#EXT-X-ENDLIST""" #EXT-X-ENDLIST"""
@app.route('/professor/line/<int:line_id>', methods=["POST"]) @app.route('/professor/line/<int:line_id>', methods=["POST"])
def update_line(line_id): @authenticate
def update_line(line_id, editor):
db_conn = app.db_manager.get_conn() db_conn = app.db_manager.get_conn()
if "speakers" in request.json and \ if "speakers" in request.json and \
@ -56,11 +121,11 @@ def update_line(line_id):
# Simpler than dealing with uniqueness # Simpler than dealing with uniqueness
database.query(db_conn, database.query(db_conn,
"DELETE FROM buscribe_line_speakers WHERE line = %(line_id)s AND verifier = %(verifier)s;", "DELETE FROM buscribe_line_speakers WHERE line = %(line_id)s AND verifier = %(verifier)s;",
line_id=line_id, verifier="placeholder@example.com") line_id=line_id, verifier=editor)
execute_values(db_conn.cursor(), execute_values(db_conn.cursor(),
"INSERT INTO buscribe_line_speakers(line, speaker, verifier) " "INSERT INTO buscribe_line_speakers(line, speaker, verifier) "
"VALUES %s;", "VALUES %s;",
[(line_id, speaker, "placeholder@example.com") for speaker in [(line_id, speaker, editor) for speaker in
request.json["speakers"]]) request.json["speakers"]])
if "transcription" in request.json and \ if "transcription" in request.json and \
isinstance(request.json["transcription"], str) and \ isinstance(request.json["transcription"], str) and \
@ -70,11 +135,11 @@ def update_line(line_id):
database.query(db_conn, database.query(db_conn,
"DELETE FROM buscribe_verified_lines WHERE line = %(line_id)s AND verifier = %(verifier)s;", "DELETE FROM buscribe_verified_lines WHERE line = %(line_id)s AND verifier = %(verifier)s;",
line_id=line_id, verifier="placeholder@example.com") line_id=line_id, verifier=editor)
database.query(db_conn, database.query(db_conn,
"INSERT INTO buscribe_verified_lines(line, verified_line, verifier) " "INSERT INTO buscribe_verified_lines(line, verified_line, verifier) "
"VALUES (%(line)s, %(verified_line)s, %(verifier)s)", "VALUES (%(line)s, %(verified_line)s, %(verifier)s)",
line=line_id, verified_line=verified_line, verifier="placeholder@example.com") line=line_id, verified_line=verified_line, verifier=editor)
return "", 204 return "", 204
@ -101,7 +166,8 @@ def get_speaker(speaker_id):
@app.route('/professor/speaker', methods=["PUT"]) @app.route('/professor/speaker', methods=["PUT"])
def new_speaker(): @authenticate
def new_speaker(editor=None):
name = request.json name = request.json
if not isinstance(name, str): if not isinstance(name, str):

@ -11,6 +11,7 @@ setup(
"psycogreen", "psycogreen",
"wubloader-common", "wubloader-common",
"python-dateutil", "python-dateutil",
"flask" "flask",
"google-auth"
], ],
) )

File diff suppressed because one or more lines are too long

@ -5,7 +5,6 @@
<title>Buscribe -- Professor</title> <title>Buscribe -- Professor</title>
<link href="video.js/dist/video-js.min.css" rel="stylesheet"> <link href="video.js/dist/video-js.min.css" rel="stylesheet">
<!-- <link href="videojs-hls-quality-selector/dist/videojs-hls-quality-selector.css" rel="stylesheet">-->
<link href="jquery-ui-1.13.0.custom/jquery-ui.css" rel="stylesheet"> <link href="jquery-ui-1.13.0.custom/jquery-ui.css" rel="stylesheet">
<link href="style.css" rel="stylesheet"> <link href="style.css" rel="stylesheet">
@ -13,11 +12,9 @@
<script src="jquery-ui-1.13.0.custom/external/jquery/jquery.js"></script> <script src="jquery-ui-1.13.0.custom/external/jquery/jquery.js"></script>
<script src="jquery-ui-1.13.0.custom/jquery-ui.js"></script> <script src="jquery-ui-1.13.0.custom/jquery-ui.js"></script>
<script src="script.js"></script> <script src="hotkeys.min.js"></script>
<!-- <script src="videojs-contrib-quality-levels/dist/videojs-contrib-quality-levels.min.js"></script>--> <script src="script.js"></script>
<!-- <script src="videojs-hls-quality-selector/dist/videojs-hls-quality-selector.min.js"></script>-->
</head> </head>
<body onload="pageReady()"> <body onload="pageReady()">
@ -46,7 +43,13 @@
<button id="submit_button" onclick="submit()" type="button">Submit</button><span id="update_indicator"></span> <button id="submit_button" onclick="submit()" type="button">Submit</button><span id="update_indicator"></span>
<script src="video.js/dist/video.min.js"></script> <div id="googleLoginButton" style="display: none"></div>
<div id="logout" style="display: none"><a href="javascript:doLogout()">Log out</a></div>
<script src="video.js/dist/video.min.js"></script>
<script src="https://accounts.google.com/gsi/client" async defer></script>
<script>
window.onGoogleLibraryLoad = doGoogle
</script>
</body> </body>
</html> </html>

@ -1,7 +1,13 @@
function pageReady() { function pageReady() {
const params = new URLSearchParams(document.location.search.substring(1)); const params = new URLSearchParams(document.location.search.substring(1));
line_id = parseInt(params.get("line"), 10); let line_id;
if (params.get("line") !== "random") {
line_id = parseInt(params.get("line"), 10);
} else {
line_id = "random"
}
videojs("player", { videojs("player", {
// src: "test.m3u8", // src: "test.m3u8",
@ -27,15 +33,61 @@ function pageReady() {
const bgOpacitySelector = document.querySelector('.vjs-bg-opacity > select'); const bgOpacitySelector = document.querySelector('.vjs-bg-opacity > select');
bgOpacitySelector.value = "0.5" bgOpacitySelector.value = "0.5"
fetch(`//localhost:8005/professor/line/${line_id}`) fetch(`/professor/desertbus/line/${line_id}`)
.then(response => response.json()) .then(response => response.json())
.then(fillLineInfo) .then(fillLineInfo)
.then(initializePlayer); .then(initializePlayer);
handleLoginState();
}
hotkeys('ctrl+enter', function (event, handler){
document.getElementById("submit_button").click();
});
function handleLoginState() {
if (document.cookie.split('; ').find(row => row.startsWith('credentials='))) {
document.getElementById("logout").style.display = "";
} else {
document.getElementById("googleLoginButton").style.display = "";
}
}
function doGoogle() {
google.accounts.id.initialize({
client_id: "164084252563-kaks3no7muqb82suvbubg7r0o87aip7n.apps.googleusercontent.com",
callback: loggedIn,
auto_select: true
});
google.accounts.id.renderButton(
document.getElementById("googleLoginButton"),
{theme: "outline", size: "large"} // customization attributes
);
google.accounts.id.prompt(); // also display the One Tap dialog
}
function doLogout() {
document.cookie = `credentials=;expires=Thu, 01 Jan 1970 00:00:01 GMT`;
document.getElementById("googleLoginButton").style.display = "";
document.getElementById("logout").style.display = "none";
}
function loggedIn(response) {
document.cookie = `credentials=${response.credential}`;
document.getElementById("googleLoginButton").style.display = "none";
document.getElementById("logout").style.display = "";
console.log(response);
} }
function fillLineInfo(line_json) { function fillLineInfo(line_json) {
// document.getElementById("original_transcription").innerText = line_json.line_data.text; line_id = line_json.id
line = line_json line = line_json
document.getElementById("original_transcription").innerHTML = line_json.line_data.result document.getElementById("original_transcription").innerHTML = line_json.line_data.result
.map(word => `<span style="opacity: ${word.conf}">${word.word}</span>`).join(" "); .map(word => `<span style="opacity: ${word.conf}">${word.word}</span>`).join(" ");
@ -45,11 +97,12 @@ function fillLineInfo(line_json) {
function initializePlayer() { function initializePlayer() {
videojs.getPlayer("player").src([ videojs.getPlayer("player").src([
{src: `//localhost:8005/professor/line/${line_id}/playlist.m3u8`} //{src: `/professor/desertbus/line/${line_id}/playlist.m3u8`}
{src: `/playlist/desertbus/source.m3u8?start=${line.start_time}&end=${line.end_time}`}
]); ]);
videojs.getPlayer("player").addRemoteTextTrack({ videojs.getPlayer("player").addRemoteTextTrack({
kind: "captions", kind: "captions",
src: `//localhost:8010/buscribe/vtt?start_time=${line.start_time}&end_time=${line.end_time}`, src: `/buscribe/desertbus/vtt?start_time=${line.start_time}&end_time=${line.end_time}`,
srclang: "en", srclang: "en",
label: "English", label: "English",
default: true default: true
@ -73,26 +126,28 @@ async function submit() {
} }
} }
return await fetch("//localhost:8005/professor/speaker", return await fetch("/professor/desertbus/speaker",
{ {
method: "PUT", method: "PUT",
headers: { headers: {
'Content-Type': 'application/json' 'Content-Type': 'application/json'
}, },
body: JSON.stringify(speaker) body: JSON.stringify(speaker),
credentials: "include"
}).then(response => }).then(response =>
parseInt(response.headers.get("Content-Location") parseInt(response.headers.get("Content-Location")
.split("/") .split("/")
.pop(), 10)); .pop(), 10));
})); }));
fetch(`//localhost:8005/professor/line/${line_id}`, fetch(`/professor/desertbus/line/${line_id}`,
{ {
method: "POST", method: "POST",
headers: { headers: {
'Content-Type': 'application/json' 'Content-Type': 'application/json'
}, },
body: JSON.stringify({transcription: new_transcription, speakers: new_speakers}) body: JSON.stringify({transcription: new_transcription, speakers: new_speakers}),
credentials: "include"
}).then(response => { }).then(response => {
if (response.ok) { if (response.ok) {
document.getElementById("update_indicator").innerText = "\u2714\ufe0f" document.getElementById("update_indicator").innerText = "\u2714\ufe0f"
@ -103,7 +158,7 @@ async function submit() {
} }
$(function () { $(function () {
fetch("//localhost:8005/professor/speaker") fetch("/professor/desertbus/speaker")
.then(response => response.json()) .then(response => response.json())
.then(function (speakers_json) { .then(function (speakers_json) {
speakers = speakers_json; speakers = speakers_json;
@ -152,4 +207,17 @@ $(function () {
} }
) )
}); });
function parseJwt(token) {
const base64Url = token.split('.')[1];
const base64 = base64Url.replace(/-/g, '+').replace(/_/g, '/');
const jsonPayload = decodeURIComponent(
atob(base64)
.split('')
.map(function (c) {
return '%' + ('00' + c.charCodeAt(0).toString(16)).slice(-2);
}).join(''));
return JSON.parse(jsonPayload);
}

@ -61,3 +61,10 @@ button {
span.verified_cc { span.verified_cc {
color: #c1ffc1; color: #c1ffc1;
} }
#logout {
padding: 0.1em;
a {
color: darkgray
}
}

@ -0,0 +1,12 @@
#!/bin/bash
docker run \
--rm \
-v /srv/wubloader/segments/:/mnt/ \
buscribe:0.0.0 \
loadingreadyrun \
--start-time='2021-11-05T00:00' \
--end-time='2021-11-07T00:00' \
--database=postgresql://vst:flnMSYPRf@mula.lan:6543/buscribe_lrr \
--model=/usr/share/buscribe/vosk-model-en-us-0.22/
# --model=/usr/share/buscribe/vosk-model-small-en-us-0.15/
Loading…
Cancel
Save