Compare commits

..

No commits in common. 'trunk' and 'multichannel' have entirely different histories.

@ -1 +0,0 @@
models/

@ -1,15 +0,0 @@
#!/bin/bash
VERSION=0.0.0
#bash fetch_models.sh
docker build -f buscribe/Dockerfile -t buscribe:$VERSION .
docker build -f buscribe-api/Dockerfile -t buscribe-api:$VERSION .
docker build -f professor-api/Dockerfile -t professor-api:$VERSION .
docker build -f docker-less/Dockerfile -t lessc .
docker run --rm -v "$(pwd)"/buscribe-web:/buscribe-web lessc /buscribe-web/style.less > buscribe-web/style.css
docker run --rm -v "$(pwd)"/professor:/professor lessc /professor/style.less > professor/style.css
docker build -f nginx/Dockerfile -t buscribe-web:$VERSION .

@ -13,7 +13,6 @@ RUN pip install /tmp/common && rm -r /tmp/common
# Install actual application # Install actual application
RUN apk add postgresql-dev postgresql-libs RUN apk add postgresql-dev postgresql-libs
COPY buscribe-api /tmp/buscribe-api COPY buscribe-api /tmp/buscribe-api
RUN pip install /tmp/buscribe-api && cp -r /tmp/buscribe-api/templates /templates \ RUN pip install /tmp/buscribe-api && rm -r /tmp/buscribe-api
&& rm -r /tmp/buscribe-api
ENTRYPOINT ["python3", "-m", "buscribeapi", "--base-dir", "/mnt"] ENTRYPOINT ["python3", "-m", "buscribeapi"]

@ -1,8 +1,9 @@
import json
from datetime import timedelta from datetime import timedelta
import common
import flask as flask import flask as flask
from common import dateutil, database, format_bustime, dt_to_bustime, bustime_to_dt, parse_bustime import common
from common import dateutil, database
from dateutil.parser import ParserError from dateutil.parser import ParserError
from flask import request, jsonify, Response, render_template from flask import request, jsonify, Response, render_template
@ -21,6 +22,11 @@ def create_seconds_timedelta(seconds):
return timedelta(seconds=seconds) return timedelta(seconds=seconds)
def round_bus_time(delta: timedelta):
"""Round bus time down to the second."""
return f'{delta.days * 24 + delta.seconds // 3600:02}:{(delta.seconds % 3600) // 60:02}:{delta.seconds % 60:02}'
@app.route('/buscribe/vtt') @app.route('/buscribe/vtt')
def get_vtt(): def get_vtt():
"""Returns WebVTT subtitle file for the period between start_time and end_time. """Returns WebVTT subtitle file for the period between start_time and end_time.
@ -68,32 +74,20 @@ def get_json():
(https://www.postgresql.org/docs/13/functions-textsearch.html)""" (https://www.postgresql.org/docs/13/functions-textsearch.html)"""
start_time_string = request.args.get('start_time') start_time_string = request.args.get('start_time')
bus_start_time_string = request.args.get('bus_start_time')
if start_time_string is not None: if start_time_string is not None:
try: try:
start_time = dateutil.parse(start_time_string) start_time = dateutil.parse(start_time_string)
except ParserError: except ParserError:
return "Invalid start time!", 400 return "Invalid start time!", 400
elif bus_start_time_string is not None:
try:
start_time = bustime_to_dt(app.bustime_start, parse_bustime(bus_start_time_string))
except ValueError:
return "Invalid bus end time!", 400
else: else:
start_time = None start_time = None
end_time_string = request.args.get('end_time') end_time_string = request.args.get('end_time')
bus_end_time_string = request.args.get('bus_end_time')
if end_time_string is not None: if end_time_string is not None:
try: try:
end_time = dateutil.parse(end_time_string) end_time = dateutil.parse(end_time_string)
except ParserError: except ParserError:
return "Invalid end time!", 400 return "Invalid end time!", 400
elif bus_end_time_string is not None:
try:
end_time = bustime_to_dt(app.bustime_start, parse_bustime(bus_end_time_string))
except ValueError:
return "Invalid bus end time!", 400
else: else:
end_time = None end_time = None
@ -109,120 +103,35 @@ def get_json():
return jsonify([{"id": row.id, return jsonify([{"id": row.id,
"start_time": row.start_time.isoformat(), "start_time": row.start_time.isoformat(),
"start_bus_time": format_bustime(dt_to_bustime(app.bustime_start, row.start_time), "second"), "start_bus_time": round_bus_time(row.start_time - app.bustime_start),
"end_time": row.end_time.isoformat(), "end_time": row.end_time.isoformat(),
"end_bus_time": format_bustime(dt_to_bustime(app.bustime_start, row.end_time), "second"), "end_bus_time": round_bus_time(row.start_time - app.bustime_start),
"verifier": row.verifier, "verifier": row.verifier,
"speakers": row.names, "speakers": row.names,
"text": row.highlighted_text if row.highlighted_text is not None else ""} for row in results]) "text": row.highlighted_text if row.highlighted_text is not None else ""} for row in results])
def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset=None): def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset=None):
query = f""" query = "SELECT *" + \
WITH q AS ( (
SELECT convert_query(%(text_query)s) ",ts_headline(transcription_line, convert_query(%(text_query)s), 'StartSel=''<span class=\"highlight\">'', StopSel=</span>') AS highlighted_text" if ts_query is not None else ",transcription_line AS highlighted_text") + \
), " FROM buscribe_all_transcriptions WHERE start_time >= %(start_time)s AND end_time <= %(end_time)s "
time_window AS (
SELECT id if ts_query is not None:
FROM buscribe_transcriptions query += "AND (coalesce(transcription_line_ts, ''::tsvector) || coalesce(names_ts, ''::tsvector)) @@ " \
WHERE start_time >= %(start_time)s "convert_query(%(text_query)s) " \
AND end_time <= %(end_time)s "ORDER BY ts_rank_cd(coalesce(transcription_line_ts, ''::tsvector) || coalesce(names_ts, ''::tsvector), convert_query(%(text_query)s)) DESC, " \
), "start_time "
relevant_lines AS ( else:
( query += "ORDER BY start_time "
SELECT id
FROM buscribe_transcriptions if limit is not None:
WHERE id IN (SELECT id FROM time_window) query += "LIMIT %(limit)s "
{"AND to_tsvector('english', transcription_line) @@ (SELECT * FROM q)" if ts_query else ""}
) if offset is not None:
UNION query += "OFFSET %(limit)s "
(
SELECT line query += ";"
FROM buscribe_verified_lines
WHERE line IN (SELECT id FROM time_window)
{"AND to_tsvector('english', verified_line) @@ (SELECT * FROM q)" if ts_query else ""}
)
UNION
(
SELECT line
FROM buscribe_line_speakers
INNER JOIN buscribe_speakers ON buscribe_line_speakers.speaker = buscribe_speakers.id
WHERE line IN (SELECT id FROM time_window)
{"AND to_tsvector(name) @@ (SELECT * FROM q)" if ts_query else ""}
)
UNION
(
SELECT line
FROM buscribe_line_inferred_speakers
INNER JOIN buscribe_speakers ON buscribe_line_inferred_speakers.speaker = buscribe_speakers.id
WHERE line IN (SELECT id FROM time_window)
{"AND to_tsvector(name) @@ (SELECT * FROM q)" if ts_query else ""}
)
)
(
(SELECT id,
start_time,
end_time,
null AS verifier,
names,
transcription_line,
ts_rank_cd(coalesce(to_tsvector('english', transcription_line), ''::tsvector) ||
coalesce(to_tsvector(array_to_string(names, ' ')), ''::tsvector), (SELECT * FROM q)) AS rank,
ts_headline(transcription_line,
(SELECT * FROM q), 'StartSel=''<span class=\"highlight\">'', StopSel=</span>') AS highlighted_text,
transcription_json
FROM buscribe_transcriptions
LEFT OUTER JOIN (SELECT line, array_agg(name) AS names
FROM buscribe_line_inferred_speakers
INNER JOIN buscribe_speakers
ON buscribe_line_inferred_speakers.speaker = buscribe_speakers.id
GROUP BY line) AS inferred_speakers ON id = inferred_speakers.line
WHERE id IN (SELECT id FROM relevant_lines)
)
UNION
(
SELECT buscribe_transcriptions.id AS id,
start_time,
end_time,
cverifier AS verifier,
names,
coalesce(verifications.verified_line,
buscribe_transcriptions.transcription_line) AS transcription_line,
ts_rank_cd(coalesce(
setweight(to_tsvector('english', verified_line), 'C'),
to_tsvector('english', buscribe_transcriptions.transcription_line),
''::tsvector) ||
coalesce(setweight(to_tsvector(array_to_string(names, ' ')), 'C'), ''::tsvector),
(SELECT * FROM q)) AS rank,
ts_headline(coalesce(verifications.verified_line, buscribe_transcriptions.transcription_line),
(SELECT * FROM q), 'StartSel=''<span class=\"highlight\">'', StopSel=</span>') AS highlighted_text,
null AS transcription_json
FROM buscribe_transcriptions
INNER JOIN (
SELECT *,
coalesce(relevant_verified.line, relevant_speakers.line) AS cline,
coalesce(relevant_verified.verifier, relevant_speakers.verifier) AS cverifier
FROM (SELECT *
FROM buscribe_verified_lines
WHERE line IN (SELECT id FROM relevant_lines)) AS relevant_verified
FULL OUTER JOIN
(SELECT line, verifier, array_agg(name) AS names
FROM buscribe_line_speakers
INNER JOIN buscribe_speakers
ON buscribe_line_speakers.speaker = buscribe_speakers.id
WHERE line IN (SELECT id FROM relevant_lines)
GROUP BY line, verifier) AS relevant_speakers
ON relevant_verified.line = relevant_speakers.line AND
relevant_speakers.verifier = relevant_verified.verifier) AS verifications
ON id = verifications.cline
)
)
ORDER BY
{"rank DESC," if ts_query is not None else ""}
start_time
{"OFFSET %(offset)s" if offset is not None else ""}
{"LIMIT %(limit)s" if limit is not None else ""};
"""
return database.query(db_conn, query, return database.query(db_conn, query,
start_time=start_time if start_time is not None else '-infinity', start_time=start_time if start_time is not None else '-infinity',

@ -37,8 +37,7 @@ def servelet(server):
logging.info('Starting WSGI server.') logging.info('Starting WSGI server.')
server.serve_forever() server.serve_forever()
@argh.arg('channel',
help="Twitch channel to transcribe.")
@argh.arg('--host', @argh.arg('--host',
help='Address or socket server will listen to. Default is 0.0.0.0 (everything on the local machine).') help='Address or socket server will listen to. Default is 0.0.0.0 (everything on the local machine).')
@argh.arg('--port', @argh.arg('--port',
@ -50,7 +49,7 @@ def servelet(server):
help='The start time in UTC for the event, for UTC-Bustime conversion') help='The start time in UTC for the event, for UTC-Bustime conversion')
@argh.arg('--base-dir', @argh.arg('--base-dir',
help='Directory from which segments will be grabbed. Default is current working directory.') help='Directory from which segments will be grabbed. Default is current working directory.')
def main(channel, database="", host='0.0.0.0', port=8010, bustime_start=None, base_dir=None): def main(database="", host='0.0.0.0', port=8010, bustime_start=None, base_dir=None):
if bustime_start is None: if bustime_start is None:
logging.error("Missing --bustime-start!") logging.error("Missing --bustime-start!")
exit(1) exit(1)
@ -63,7 +62,7 @@ def main(channel, database="", host='0.0.0.0', port=8010, bustime_start=None, ba
logging.error("Invalid --bustime-start!") logging.error("Invalid --bustime-start!")
exit(1) exit(1)
app.segments_dir = os.path.join(base_dir, channel, "source") app.segments_dir = base_dir
app.db_manager = DBManager(dsn=database) app.db_manager = DBManager(dsn=database)

@ -6,12 +6,6 @@
margin-bottom: 1em; margin-bottom: 1em;
div {
margin: 0;
padding: 0;
display: flex;
}
label { label {
display: inline-block; display: inline-block;
font-family: @sans-serif; font-family: @sans-serif;
@ -20,7 +14,7 @@
padding: 0.2em; padding: 0.2em;
} }
#text_search_line { #text_search_line{
display: flex; display: flex;
flex-direction: row; flex-direction: row;
@ -35,18 +29,10 @@
#time_search_line { #time_search_line {
display: flex; display: flex;
flex-direction: row; flex-direction: row;
flex-wrap: wrap;
div { input[type=datetime-local] {
align-items: center;
}
input[type=datetime-local], input[type=text] {
width: 13em; width: 13em;
}
input[type=text] {
text-align: right;
} }
#search_button { #search_button {

@ -9,27 +9,15 @@
<body onload="onSiteLoad()"> <body onload="onSiteLoad()">
<div id="search_tools"> <div id="search_tools">
<div id="text_search_line" class="form_line"> <div id="text_search_line" class="form_line">
<label for="search_text">Search</label> <input type="search" id="search_text" oninput="doSearch()" <label for="search_text">Search</label> <input type="search" id="search_text" oninput="doSearch()"
placeholder="Supports quotes, 'or' and -."> placeholder="Supports quotes, 'or' and -.">
</div>
<div id="time_search_line" class="form_line">
<div><label for="start_time">Start time</label> <input id="start_time" type="datetime-local" autocomplete="off"></div>
<div><label for="end_time">End time</label> <input id="end_time" type="datetime-local" autocomplete="off"></div>
<div>
<label for="channel_select">Channel</label><select id="channel_select">
<option value="desertbus" selected>desertbus</option>
<option value="loadingreadyrun">loadingreadyrun</option>
</select>
</div> </div>
<div> <div id="time_search_line" class="form_line">
<label>Time type</label> <label for="start_time">Start time</label> <input id="start_time" type="datetime-local">
<input type="radio" name="time_type" id="UTC_time_radio" oninput="switchToUTC()" checked autocomplete="off"><label for="UTC_time_radio">UTC Time</label> <label for="end_time">End time</label> <input id="end_time" type="datetime-local">
<input type="radio" name="time_type" id="bus_time_radio" oninput="switchToBus()" autocomplete="off"><label for="bus_time_radio">Bus Time</label> <button id="search_button" onclick="doSearch()" type="button">Search</button>
</div> </div>
<button id="search_button" onclick="doSearch()" type="button">Search</button>
</div>
</div> </div>
<div id="results"> <div id="results">

@ -60,20 +60,6 @@
grid-column: text; grid-column: text;
} }
.line_links {
text-align: right;
grid-column: times;
a {
margin-left: 0.5em;
font-size: small;
font-family: @sans-serif;
color: lightgray;
text-align: right;
}
}
} }
.line.verified { .line.verified {

@ -11,13 +11,11 @@ function onSiteLoad(e) {
function query(text, start_time, end_time) { function query(text, start_time, end_time) {
let query_string = "" let query_string = ""
const time_type = document.getElementById("UTC_time_radio").checked ? "" : "bus_";
if (start_time !== "") { if (start_time !== "") {
query_string += `${time_type}start_time=${start_time}`; query_string += `start_time=${start_time}`;
} }
if (end_time !== "") { if (end_time !== "") {
query_string += `&${time_type}end_time=${end_time}`; query_string += `&end_time=${end_time}`;
} }
if (text !== "") { if (text !== "") {
query_string += `&query=${text}` query_string += `&query=${text}`
@ -25,10 +23,9 @@ function query(text, start_time, end_time) {
query_string += "&limit=30"; query_string += "&limit=30";
const channel = document.getElementById("channel_select").value; fetch(`http://localhost:8010/buscribe/json?${query_string}`)
fetch(`https://wubloader.raptorpond.com/buscribe/${channel}/json?${query_string}`)
.then(response => response.json()) .then(response => response.json())
// .then(response => console.log(response.error()))
.then(fillResults) .then(fillResults)
} }
@ -45,8 +42,6 @@ function fillResults(results) {
const results_element = document.getElementById("results") const results_element = document.getElementById("results")
results_element.innerHTML = "" results_element.innerHTML = ""
const channel = document.getElementById("channel_select").value;
for (const line of results) { for (const line of results) {
const line_div = document.createElement("div"); const line_div = document.createElement("div");
@ -61,42 +56,9 @@ function fillResults(results) {
<div class="line_speakers">${line.speakers == null ? "" : line.speakers.join(", ")}</div> <div class="line_speakers">${line.speakers == null ? "" : line.speakers.join(", ")}</div>
<div class="line_start_time">${line.start_time}</div> <div class="line_start_time">${line.start_time}</div>
<div class="line_text">${line.text}</div> <div class="line_text">${line.text}</div>
<div class="line_links">
<a href="/professor/professor.html?line=${line.id}">Edit</a>
<a href="javascript:showContext('${line.start_time}');">Show context</a>
</div>
`; `;
results_element.append(line_div) results_element.append(line_div)
} }
}
function switchToUTC() {
document.getElementById("start_time").type = "datetime-local";
document.getElementById("end_time").type = "datetime-local";
}
function switchToBus() {
document.getElementById("start_time").type = "text";
document.getElementById("end_time").type = "text";
}
function showContext(time) {
let start_time = new Date(time + "Z");
start_time.setMinutes(start_time.getMinutes() - 3);
let start_time_string = start_time.toISOString();
start_time_string = start_time_string.substring(0, start_time_string.length - 1)
let end_time = new Date(time + "Z");
end_time.setMinutes(end_time.getMinutes() + 3);
let end_time_string = end_time.toISOString();
end_time_string = end_time_string.substring(0, end_time_string.length - 1)
document.getElementById("start_time").value = start_time_string;
document.getElementById("end_time").value = end_time_string;
document.getElementById("search_text").value = "";
doSearch();
} }

@ -1,9 +1,9 @@
FROM debian:11 FROM debian:latest
RUN apt update &&\ RUN apt update &&\
apt install -y python3 libpq-dev python3-pip curl unzip ffmpeg apt install -y python3 libpq-dev python3-pip curl unzip ffmpeg
COPY common /tmp/common COPY ../common /tmp/common
RUN pip install /tmp/common && rm -r /tmp/common RUN pip install /tmp/common && rm -r /tmp/common
COPY buscribe /tmp/buscribe COPY buscribe /tmp/buscribe

@ -84,7 +84,7 @@ def get_end_of_transcript(db_cursor):
"""Grab the end timestamp of the current transcript. """Grab the end timestamp of the current transcript.
If there is no existing transcript returns default; used for cold starts.""" If there is no existing transcript returns default; used for cold starts."""
db_cursor.execute("SELECT end_time FROM buscribe_transcriptions ORDER BY end_time DESC LIMIT 1") db_cursor.execute("SELECT end_time FROM buscribe.public.buscribe_transcriptions ORDER BY end_time DESC LIMIT 1")
end_of_transcript_row = db_cursor.fetchone() end_of_transcript_row = db_cursor.fetchone()
return end_of_transcript_row.end_time if end_of_transcript_row is not None else None return end_of_transcript_row.end_time if end_of_transcript_row is not None else None
@ -94,10 +94,9 @@ def finish_off_recognizer(recognizer: BuscribeRecognizer, db_cursor):
"""Flush the recognizer, commit the final line to the database and reset it.""" """Flush the recognizer, commit the final line to the database and reset it."""
final_result_json = json.loads(recognizer.final_result()) # Flush the tubes final_result_json = json.loads(recognizer.final_result()) # Flush the tubes
if "result" in final_result_json: line_start_time = recognizer.segments_start_time + timedelta(seconds=final_result_json["result"][0]["start"])
line_start_time = recognizer.segments_start_time + timedelta(seconds=final_result_json["result"][0]["start"]) line_end_time = recognizer.segments_start_time + timedelta(seconds=final_result_json["result"][-1]["end"])
line_end_time = recognizer.segments_start_time + timedelta(seconds=final_result_json["result"][-1]["end"])
write_line(final_result_json, line_start_time, line_end_time, db_cursor) write_line(final_result_json, line_start_time, line_end_time, db_cursor)
recognizer.reset() recognizer.reset()

@ -1,6 +1,6 @@
import logging import logging
import os import os
from datetime import timedelta, datetime, timezone from datetime import timedelta, datetime
from time import sleep from time import sleep
import argh import argh
@ -27,15 +27,13 @@ from buscribe.recognizer import BuscribeRecognizer
help='Start time of the transcript. Buscript will try to start reading 2 min before this time, if available, ' help='Start time of the transcript. Buscript will try to start reading 2 min before this time, if available, '
'to prime the model. The transcripts for that time will not be written to the database. If not given ' 'to prime the model. The transcripts for that time will not be written to the database. If not given '
'transcription will start after last already transcribed line.') 'transcription will start after last already transcribed line.')
@argh.arg('--start-time-override',
help='Ignore database and force override the start time.')
@argh.arg('--end-time', @argh.arg('--end-time',
help='End of transcript. If not given continues to transcribe live.') help='End of transcript. If not given continues to transcribe live.')
@argh.arg('--base-dir', @argh.arg('--base-dir',
help='Directory from which segments will be grabbed. Default is current working directory.') help='Directory from which segments will be grabbed. Default is current working directory.')
def main(channel, database="", base_dir=".", def main(channel, database="", base_dir=".",
model="/usr/share/buscribe/vosk-model-en-us-0.21/", spk_model="/usr/share/buscribe/vosk-model-spk-0.4/", model="/usr/share/buscribe/vosk-model-en-us-0.21/", spk_model="/usr/share/buscribe/vosk-model-spk-0.4/",
start_time=None, end_time=None, start_time_override=None): start_time=None, end_time=None):
SAMPLE_RATE = 48000 SAMPLE_RATE = 48000
segments_dir = os.path.join(base_dir, channel, "source") segments_dir = os.path.join(base_dir, channel, "source")
@ -46,27 +44,19 @@ def main(channel, database="", base_dir=".",
logging.debug("Got database cursor.") logging.debug("Got database cursor.")
logging.info("Figuring out starting time...") logging.info("Figuring out starting time...")
db_start_time = get_end_of_transcript(db_cursor) if start_time is not None:
# ~~Database start time takes priority~~
# Overrride takes priority
if start_time_override is not None:
start_time = dateutil.parse(start_time_override)
elif db_start_time is not None:
start_time = db_start_time
elif start_time is not None:
start_time = dateutil.parse(start_time) start_time = dateutil.parse(start_time)
else: else:
# No start time argument AND no end of transcript (empty database) start_time = get_end_of_transcript(db_cursor)
logging.error("Couldn't figure out start time!")
db_conn.close()
exit(1)
logging.info("Start time: {}".format(start_time))
if end_time is not None: if end_time is not None:
end_time = dateutil.parse(end_time) end_time = dateutil.parse(end_time)
logging.info("End time: {}".format(end_time)) # No start time argument AND no end of transcript (empty database)
if start_time is None:
logging.error("Couldn't figure out start time!")
db_conn.close()
exit(1)
logging.info("Loading models...") logging.info("Loading models...")
recognizer = BuscribeRecognizer(SAMPLE_RATE, model, spk_model) recognizer = BuscribeRecognizer(SAMPLE_RATE, model, spk_model)
@ -87,34 +77,15 @@ def main(channel, database="", base_dir=".",
gevent.signal_handler(signal.SIGTERM, stop) gevent.signal_handler(signal.SIGTERM, stop)
while start_time < end_time: while True:
# If end time isn't given, use current time (plus fudge) to get a "live" segment list # If end time isn't given, use current time (plus fudge) to get a "live" segment list
segments = common.get_best_segments(segments_dir, segments = common.get_best_segments(segments_dir,
start_time, start_time,
end_time if end_time is not None else end_time if end_time is not None else datetime.now() + timedelta(minutes=2))
datetime.utcnow() + timedelta(minutes=2)) # Remove initial None segment if it exists
# If there is a hole at the start of the requested range because
if segments[0] is None:
# The hole is older than a minute, therefore
# - reset recognizer
# - continue from existing segments
if datetime.utcnow() - start_time > timedelta(minutes=1):
finish_off_recognizer(recognizer, db_cursor)
# If the hole is less than a minute old, or if we don't have new segments: wait for segments
if datetime.utcnow() - start_time <= timedelta(minutes=1) or \
segments == [None]:
logging.info("Waiting for new or backfilled segments.")
sleep(30)
continue # Retry
# Remove initial None segment (indicating segments start time is after desired start time) if it exists
if segments[0] is None: if segments[0] is None:
segments = segments[1:] segments = segments[1:]
# Recognizer is fresh or was reset
if recognizer.segments_start_time is None: if recognizer.segments_start_time is None:
recognizer.segments_start_time = segments[0].start recognizer.segments_start_time = segments[0].start
logging.info(f"Starting from: {segments[0].start}") logging.info(f"Starting from: {segments[0].start}")
@ -128,5 +99,14 @@ def main(channel, database="", base_dir=".",
finish_off_recognizer(recognizer, db_cursor) finish_off_recognizer(recognizer, db_cursor)
db_conn.close() db_conn.close()
exit(0) exit(0)
elif datetime.now() - segments_end_time > timedelta(minutes=5):
# Last seen segment ended more than five minutes ago. We hit a gap that will likely stay unfilled.
# Reset and jump to the other end of the gap.
finish_off_recognizer(recognizer, db_cursor)
else:
# End of live segment or a gap that is not old and might get filled.
# Give it a bit of time and continue.
# Note: if the gap is not filled within 30s, we jump to the next available segment.
sleep(30)
start_time = segments_end_time start_time = segments_end_time

@ -7,10 +7,8 @@ setup(
install_requires = [ install_requires = [
"argh", "argh",
"psycopg2", "psycopg2",
#"gevent==1.5a2", "gevent==1.5a2",
"gevent", "greenlet==0.4.16",
#"greenlet==0.4.16",
"greenlet",
"psycogreen", "psycogreen",
"wubloader-common", "wubloader-common",
"python-dateutil", "python-dateutil",

@ -49,9 +49,9 @@ CREATE TABLE buscribe_verifiers
); );
-- For testing -- For testing
-- INSERT INTO buscribe_verifiers(email, name) INSERT INTO buscribe_verifiers(email, name)
-- VALUES ('placeholder@example.com', 'Place Holder'), VALUES ('placeholder@example.com', 'Place Holder'),
-- ('aguy@example.com', 'Arnold Guyana'); ('aguy@example.com', 'Arnold Guyana');
CREATE TABLE buscribe_line_speakers CREATE TABLE buscribe_line_speakers
( (
@ -62,13 +62,6 @@ CREATE TABLE buscribe_line_speakers
PRIMARY KEY (line, speaker, verifier) PRIMARY KEY (line, speaker, verifier)
); );
CREATE TABLE buscribe_line_inferred_speakers
(
line BIGINT NOT NULL REFERENCES buscribe_transcriptions,
speaker BIGINT NOT NULL REFERENCES buscribe_speakers,
PRIMARY KEY (line, speaker)
);
CREATE TABLE buscribe_verified_lines CREATE TABLE buscribe_verified_lines
( (
-- id BIGSERIAL PRIMARY KEY, -- id BIGSERIAL PRIMARY KEY,
@ -90,13 +83,12 @@ CREATE VIEW buscribe_all_transcriptions AS
SELECT buscribe_transcriptions.id, SELECT buscribe_transcriptions.id,
start_time, start_time,
end_time, end_time,
coalesce(buscribe_verified_lines.verifier, speakers.verifier) AS verifier, coalesce(buscribe_verified_lines.verifier, speakers.verifier) AS verifier,
names, names,
coalesce(verified_line, buscribe_transcriptions.transcription_line) AS transcription_line, verified_line AS transcription_line,
coalesce(setweight(to_tsvector('english', verified_line), 'C'), setweight(to_tsvector('english', verified_line), 'C') AS transcription_line_ts,
to_tsvector('english', buscribe_transcriptions.transcription_line)) AS transcription_line_ts, setweight(to_tsvector(array_to_string(names, ' ')), 'C') AS names_ts,
setweight(to_tsvector(array_to_string(names, ' ')), 'C') AS names_ts, null AS transcription_json
null AS transcription_json
FROM buscribe_transcriptions FROM buscribe_transcriptions
LEFT OUTER JOIN buscribe_verified_lines ON buscribe_transcriptions.id = buscribe_verified_lines.line LEFT OUTER JOIN buscribe_verified_lines ON buscribe_transcriptions.id = buscribe_verified_lines.line
LEFT OUTER JOIN ( LEFT OUTER JOIN (
@ -114,66 +106,15 @@ SELECT id,
start_time, start_time,
end_time, end_time,
null AS verifier, null AS verifier,
names, null AS names,
transcription_line, transcription_line,
to_tsvector('english', transcription_line) AS transcription_line_ts, to_tsvector('english', transcription_line) AS transcription_line_ts,
null AS names_ts, null AS names_ts,
transcription_json transcription_json
FROM buscribe_transcriptions FROM buscribe_transcriptions;
LEFT OUTER JOIN (
SELECT line, array_agg(name) AS names
FROM buscribe_line_inferred_speakers
INNER JOIN buscribe_speakers ON buscribe_line_inferred_speakers.speaker = buscribe_speakers.id
GROUP BY line
) AS speakers ON id = speakers.line;
ROLLBACK; ROLLBACK;
CREATE VIEW buscribe_all_transcriptions2 AS
SELECT buscribe_transcriptions.id,
start_time,
end_time,
coalesce(buscribe_verified_lines.verifier, speakers.verifier) AS verifier,
names,
coalesce(verified_line, buscribe_transcriptions.transcription_line) AS transcription_line,
to_tsvector('english', buscribe_transcriptions.transcription_line) AS machine_line_ts,
setweight(to_tsvector('english', verified_line), 'C') AS verified_line_ts,
coalesce(setweight(to_tsvector('english', verified_line), 'C'),
to_tsvector('english', buscribe_transcriptions.transcription_line)) AS transcription_line_ts,
setweight(to_tsvector(array_to_string(names, ' ')), 'C') AS names_ts,
null AS transcription_json
FROM buscribe_transcriptions
LEFT OUTER JOIN buscribe_verified_lines ON buscribe_transcriptions.id = buscribe_verified_lines.line
LEFT OUTER JOIN (
SELECT line, verifier, array_agg(name) AS names
FROM buscribe_line_speakers
INNER JOIN buscribe_speakers ON buscribe_line_speakers.speaker = buscribe_speakers.id
GROUP BY line, verifier
) AS speakers ON buscribe_transcriptions.id = speakers.line AND (
speakers.verifier = buscribe_verified_lines.verifier OR
buscribe_verified_lines.verifier IS NULL
)
WHERE coalesce(buscribe_verified_lines.verifier, speakers.verifier) IS NOT NULL
UNION
SELECT id,
start_time,
end_time,
null AS verifier,
names,
transcription_line,
to_tsvector('english', transcription_line) AS machine_line_ts,
null AS verified_line_ts,
to_tsvector('english', transcription_line) AS transcription_line_ts,
null AS names_ts,
transcription_json
FROM buscribe_transcriptions
LEFT OUTER JOIN (
SELECT line, array_agg(name) AS names
FROM buscribe_line_inferred_speakers
INNER JOIN buscribe_speakers ON buscribe_line_inferred_speakers.speaker = buscribe_speakers.id
GROUP BY line
) AS speakers ON id = speakers.line;
-- Convert last lexeme in a query to prefix query. -- Convert last lexeme in a query to prefix query.
CREATE FUNCTION convert_query(query_text text) RETURNS tsquery AS CREATE FUNCTION convert_query(query_text text) RETURNS tsquery AS
$$ $$
@ -182,4 +123,4 @@ DECLARE
BEGIN BEGIN
RETURN (CASE WHEN ws_query != '' THEN ws_query || ':*' ELSE '' END)::tsquery; RETURN (CASE WHEN ws_query != '' THEN ws_query || ':*' ELSE '' END)::tsquery;
END; END;
$$ LANGUAGE plpgsql; $$ LANGUAGE plpgsql;

@ -1,130 +0,0 @@
version: "3"
services:
buscribenginx:
image: buscribe-web:0.0.0
ports:
- "8020:80"
volumes:
- /srv/wubloader/segments:/usr/share/nginx/html/segments
networks:
- default
- wubloader_default
- traefik_network
labels:
- "traefik.docker.network=traefik_network"
- "traefik.http.routers.buscribe-router.rule=Host(`wubloader.raptorpond.com`)"
- "traefik.http.routers.buscribe-redirect.rule=Host(`wubloader.raptorpond.com`)"
- "traefik.http.routers.buscribe-redirect.entrypoints=web"
- "traefik.http.routers.buscribe-router.tls=true"
- "traefik.http.routers.buscribe-router.tls.certresolver=leresolver"
- "traefik.http.middlewares.buscribe-redirectscheme.redirectscheme.scheme=https"
- "traefik.http.middlewares.buscribe-redirectscheme.redirectscheme.permanent=true"
- "traefik.http.routers.buscribe-redirect.middlewares=buscribe-redirectscheme@docker"
restart: "on-failure"
# buscribelrr:
# image: buscribe:0.0.0
# command: [ "loadingreadyrun",
# "--start-time=2022-11-11T12:00:00Z",
# "--end-time=2022-11-20T22:00:00Z",
# "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_lrr",
# "--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
# volumes:
# - /srv/wubloader/segments:/mnt
# buscribedb:
# image: buscribe:0.0.0
# command: [ "desertbus",
# "--start-time=2023-11-10T12:00:00Z",
# "--end-time=2023-11-15T00:00:00Z",
# "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
# "--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
# volumes:
# - /srv/wubloader/segments:/mnt
buscribedb0:
image: buscribe:0.0.0
command: [ "desertbus",
"--start-time-override=2023-11-19T00:00:00Z",
"--end-time=2023-11-19T06:00:00Z",
"--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
volumes:
- /srv/wubloader/segments:/mnt
buscribedb1:
image: buscribe:0.0.0
command: [ "desertbus",
"--start-time-override=2023-11-18T06:00:00Z",
"--end-time=2023-11-18T12:00:00Z",
"--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
volumes:
- /srv/wubloader/segments:/mnt
buscribedb2:
image: buscribe:0.0.0
command: [ "desertbus",
"--start-time-override=2023-11-18T12:00:00Z",
"--end-time=2023-11-18T18:00:00Z",
"--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
volumes:
- /srv/wubloader/segments:/mnt
buscribedb3:
image: buscribe:0.0.0
command: [ "desertbus",
"--start-time-override=2023-11-18T18:00:00Z",
"--end-time=2023-11-19T00:00:00Z",
"--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
volumes:
- /srv/wubloader/segments:/mnt
# buscribeapilrr:
# image: buscribe-api:0.0.0
# command: [
# "loadingreadyrun",
# "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_lrr",
# "--bustime-start=2023-11-11T22:00:00Z" ]
buscribeapidb:
image: buscribe-api:0.0.0
command: [
"desertbus",
"--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--bustime-start=2023-11-11T22:00:00Z" ]
volumes:
- /srv/wubloader/segments:/mnt
professorapidb:
image: professor-api:0.0.0
command: [
"--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--bustime-start=2023-11-11T22:00:00Z" ]
postgres:
image: postgres:13
ports:
- "7654:5432"
environment:
- POSTGRES_USER=vst
- POSTGRES_DB=postgres
- POSTGRES_PASSWORD=flnMSYPRf
volumes:
- /srv/buscribe/postgres:/var/lib/postgresql/data
restart: "unless-stopped"
postgres-prometheus:
image: quay.io/prometheuscommunity/postgres-exporter
ports:
- "9187:9187"
environment:
- DATA_SOURCE_NAME=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_lrr?sslmode=disable
networks:
wubloader_default:
external: true
traefik_network:
external: true

@ -1,5 +0,0 @@
FROM node:17-alpine
RUN npm install less -g
ENTRYPOINT ["lessc"]

@ -1,5 +0,0 @@
FROM nginx:latest
COPY buscribe-web /usr/share/nginx/html/buscribe
COPY professor /usr/share/nginx/html/professor
COPY nginx/nginx.conf /etc/nginx/nginx.conf

@ -1,57 +0,0 @@
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log notice;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
sendfile on;
#tcp_nopush on;
keepalive_timeout 65;
gzip on;
gzip_comp_level 9;
absolute_redirect off;
server {
listen 80;
server_name localhost;
#access_log /var/log/nginx/host.access.log main;
location / { proxy_pass http://nginx; }
location /buscribelrr {
alias /usr/share/nginx/html/buscribe;
}
location /buscribe {
alias /usr/share/nginx/html/buscribe;
}
location /professor {
alias /usr/share/nginx/html/professor;
}
#location /buscribe/loadingreadyrun/json { proxy_pass http://buscribeapilrr:8010/buscribe/json; }
location /buscribe/desertbus/json { proxy_pass http://buscribeapidb:8010/buscribe/json; }
location /professor/desertbus { proxy_pass http://professorapidb:8011/professor; }
}
}

@ -13,10 +13,10 @@ from professor_api.professor_api import app
def cors(app): def cors(app):
"""WSGI middleware that sets CORS headers""" """WSGI middleware that sets CORS headers"""
HEADERS = [ HEADERS = [
("Access-Control-Allow-Credentials", "true"), ("Access-Control-Allow-Credentials", "false"),
("Access-Control-Allow-Headers", "content-type"), ("Access-Control-Allow-Headers", "*"),
("Access-Control-Allow-Methods", "GET,HEAD,POST,PUT"), ("Access-Control-Allow-Methods", "GET,HEAD,POST,PUT"),
("Access-Control-Allow-Origin", "http://localhost:63342,https://wubloader.raptorpond.com"), ("Access-Control-Allow-Origin", "*"),
("Access-Control-Expose-Headers", "*"), ("Access-Control-Expose-Headers", "*"),
("Access-Control-Max-Age", "86400"), ("Access-Control-Max-Age", "86400"),
] ]
@ -45,7 +45,7 @@ def servelet(server):
'postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE') 'postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE')
@argh.arg('--bustime-start', @argh.arg('--bustime-start',
help='The start time in UTC for the event, for UTC-Bustime conversion') help='The start time in UTC for the event, for UTC-Bustime conversion')
def main(database="", host='0.0.0.0', port=8011, bustime_start=None): def main(database="", host='0.0.0.0', port=8005, bustime_start=None):
if bustime_start is None: if bustime_start is None:
logging.error("Missing --bustime-start!") logging.error("Missing --bustime-start!")
exit(1) exit(1)

@ -1,7 +1,5 @@
import re import re
import urllib.parse import urllib.parse
from functools import wraps
from random import randrange
import flask import flask
import gevent import gevent
@ -10,51 +8,9 @@ from flask import jsonify, request, copy_current_request_context
from gevent import sleep from gevent import sleep
from psycopg2.extras import execute_values from psycopg2.extras import execute_values
from google.oauth2 import id_token
from google.auth.transport import requests
app = flask.Flask('buscribe') app = flask.Flask('buscribe')
def authenticate(f):
"""Authenticate a token against the database.
Reference: https://developers.google.com/identity/sign-in/web/backend-auth
https://developers.google.com/identity/gsi/web/guides/verify-google-id-token#using-a-google-api-client-library"""
@wraps(f)
def auth_wrapper(*args, **kwargs):
try:
user_token = request.cookies.get("credentials")
print(user_token)
except (KeyError, TypeError):
return 'User token required', 401
try:
idinfo = id_token.verify_oauth2_token(user_token, requests.Request(),
"164084252563-kaks3no7muqb82suvbubg7r0o87aip7n.apps.googleusercontent.com")
if idinfo['iss'] not in ['accounts.google.com', 'https://accounts.google.com']:
raise ValueError('Wrong issuer.')
except ValueError:
return 'Invalid token. Access denied.', 403
# check whether user is in the database
email = idinfo['email'].lower()
conn = app.db_manager.get_conn()
results = database.query(conn, """
SELECT email
FROM buscribe_verifiers
WHERE lower(email) = %s""", email)
row = results.fetchone()
if row is None:
return 'Unknown user. Access denied.', 403
return f(*args, editor=email, **kwargs)
return auth_wrapper
@app.route('/professor/line/<int:line_id>', methods=["GET"]) @app.route('/professor/line/<int:line_id>', methods=["GET"])
def get_line(line_id): def get_line(line_id):
db_conn = app.db_manager.get_conn() db_conn = app.db_manager.get_conn()
@ -64,27 +20,7 @@ def get_line(line_id):
if line is None: if line is None:
return "Line not found.", 404 return "Line not found.", 404
else: else:
return {"id": line.id, return {"start_time": line.start_time.isoformat(),
"start_time": line.start_time.isoformat(),
"end_time": line.end_time.isoformat(),
"line_data": line.transcription_json}
@app.route('/professor/line/random', methods=["GET"])
def get_random_line():
db_conn = app.db_manager.get_conn()
n_lines = database.query(db_conn, "SELECT count(*) AS n_lines FROM buscribe_transcriptions;").fetchone().n_lines
row = randrange(n_lines)
line = database.query(db_conn, "SELECT * FROM buscribe_transcriptions OFFSET %(row)s LIMIT 1;", row=row).fetchone()
if line is None:
return "Line not found.", 404
else:
return {"id": line.id,
"start_time": line.start_time.isoformat(),
"end_time": line.end_time.isoformat(), "end_time": line.end_time.isoformat(),
"line_data": line.transcription_json} "line_data": line.transcription_json}
@ -106,13 +42,12 @@ def get_playlist(line_id):
#EXT-X-TARGETDURATION:{duration.total_seconds()} #EXT-X-TARGETDURATION:{duration.total_seconds()}
#EXT-X-PROGRAM-DATE-TIME:{start_time_iso} #EXT-X-PROGRAM-DATE-TIME:{start_time_iso}
#EXTINF:{duration.total_seconds()} #EXTINF:{duration.total_seconds()}
/cut/desertbus/source.ts?start={urllib.parse.quote_plus(start_time_iso)}&end={urllib.parse.quote_plus(end_time_iso)}&type=rough&allow_holes=true //localhost/cut/desertbus/source.ts?start={urllib.parse.quote_plus(start_time_iso)}&end={urllib.parse.quote_plus(end_time_iso)}&type=rough&allow_holes=true
#EXT-X-ENDLIST""" #EXT-X-ENDLIST"""
@app.route('/professor/line/<int:line_id>', methods=["POST"]) @app.route('/professor/line/<int:line_id>', methods=["POST"])
@authenticate def update_line(line_id):
def update_line(line_id, editor):
db_conn = app.db_manager.get_conn() db_conn = app.db_manager.get_conn()
if "speakers" in request.json and \ if "speakers" in request.json and \
@ -121,11 +56,11 @@ def update_line(line_id, editor):
# Simpler than dealing with uniqueness # Simpler than dealing with uniqueness
database.query(db_conn, database.query(db_conn,
"DELETE FROM buscribe_line_speakers WHERE line = %(line_id)s AND verifier = %(verifier)s;", "DELETE FROM buscribe_line_speakers WHERE line = %(line_id)s AND verifier = %(verifier)s;",
line_id=line_id, verifier=editor) line_id=line_id, verifier="placeholder@example.com")
execute_values(db_conn.cursor(), execute_values(db_conn.cursor(),
"INSERT INTO buscribe_line_speakers(line, speaker, verifier) " "INSERT INTO buscribe_line_speakers(line, speaker, verifier) "
"VALUES %s;", "VALUES %s;",
[(line_id, speaker, editor) for speaker in [(line_id, speaker, "placeholder@example.com") for speaker in
request.json["speakers"]]) request.json["speakers"]])
if "transcription" in request.json and \ if "transcription" in request.json and \
isinstance(request.json["transcription"], str) and \ isinstance(request.json["transcription"], str) and \
@ -135,11 +70,11 @@ def update_line(line_id, editor):
database.query(db_conn, database.query(db_conn,
"DELETE FROM buscribe_verified_lines WHERE line = %(line_id)s AND verifier = %(verifier)s;", "DELETE FROM buscribe_verified_lines WHERE line = %(line_id)s AND verifier = %(verifier)s;",
line_id=line_id, verifier=editor) line_id=line_id, verifier="placeholder@example.com")
database.query(db_conn, database.query(db_conn,
"INSERT INTO buscribe_verified_lines(line, verified_line, verifier) " "INSERT INTO buscribe_verified_lines(line, verified_line, verifier) "
"VALUES (%(line)s, %(verified_line)s, %(verifier)s)", "VALUES (%(line)s, %(verified_line)s, %(verifier)s)",
line=line_id, verified_line=verified_line, verifier=editor) line=line_id, verified_line=verified_line, verifier="placeholder@example.com")
return "", 204 return "", 204
@ -166,8 +101,7 @@ def get_speaker(speaker_id):
@app.route('/professor/speaker', methods=["PUT"]) @app.route('/professor/speaker', methods=["PUT"])
@authenticate def new_speaker():
def new_speaker(editor=None):
name = request.json name = request.json
if not isinstance(name, str): if not isinstance(name, str):

@ -11,7 +11,6 @@ setup(
"psycogreen", "psycogreen",
"wubloader-common", "wubloader-common",
"python-dateutil", "python-dateutil",
"flask", "flask"
"google-auth"
], ],
) )

File diff suppressed because one or more lines are too long

@ -5,6 +5,7 @@
<title>Buscribe -- Professor</title> <title>Buscribe -- Professor</title>
<link href="video.js/dist/video-js.min.css" rel="stylesheet"> <link href="video.js/dist/video-js.min.css" rel="stylesheet">
<!-- <link href="videojs-hls-quality-selector/dist/videojs-hls-quality-selector.css" rel="stylesheet">-->
<link href="jquery-ui-1.13.0.custom/jquery-ui.css" rel="stylesheet"> <link href="jquery-ui-1.13.0.custom/jquery-ui.css" rel="stylesheet">
<link href="style.css" rel="stylesheet"> <link href="style.css" rel="stylesheet">
@ -12,10 +13,12 @@
<script src="jquery-ui-1.13.0.custom/external/jquery/jquery.js"></script> <script src="jquery-ui-1.13.0.custom/external/jquery/jquery.js"></script>
<script src="jquery-ui-1.13.0.custom/jquery-ui.js"></script> <script src="jquery-ui-1.13.0.custom/jquery-ui.js"></script>
<script src="hotkeys.min.js"></script>
<script src="script.js"></script> <script src="script.js"></script>
<!-- <script src="videojs-contrib-quality-levels/dist/videojs-contrib-quality-levels.min.js"></script>-->
<!-- <script src="videojs-hls-quality-selector/dist/videojs-hls-quality-selector.min.js"></script>-->
</head> </head>
<body onload="pageReady()"> <body onload="pageReady()">
@ -43,13 +46,7 @@
<button id="submit_button" onclick="submit()" type="button">Submit</button><span id="update_indicator"></span> <button id="submit_button" onclick="submit()" type="button">Submit</button><span id="update_indicator"></span>
<div id="googleLoginButton" style="display: none"></div>
<div id="logout" style="display: none"><a href="javascript:doLogout()">Log out</a></div>
<script src="video.js/dist/video.min.js"></script> <script src="video.js/dist/video.min.js"></script>
<script src="https://accounts.google.com/gsi/client" async defer></script>
<script>
window.onGoogleLibraryLoad = doGoogle
</script>
</body> </body>
</html> </html>

@ -1,13 +1,7 @@
function pageReady() { function pageReady() {
const params = new URLSearchParams(document.location.search.substring(1)); const params = new URLSearchParams(document.location.search.substring(1));
let line_id; line_id = parseInt(params.get("line"), 10);
if (params.get("line") !== "random") {
line_id = parseInt(params.get("line"), 10);
} else {
line_id = "random"
}
videojs("player", { videojs("player", {
// src: "test.m3u8", // src: "test.m3u8",
@ -33,61 +27,15 @@ function pageReady() {
const bgOpacitySelector = document.querySelector('.vjs-bg-opacity > select'); const bgOpacitySelector = document.querySelector('.vjs-bg-opacity > select');
bgOpacitySelector.value = "0.5" bgOpacitySelector.value = "0.5"
fetch(`/professor/desertbus/line/${line_id}`) fetch(`//localhost:8005/professor/line/${line_id}`)
.then(response => response.json()) .then(response => response.json())
.then(fillLineInfo) .then(fillLineInfo)
.then(initializePlayer); .then(initializePlayer);
handleLoginState();
}
hotkeys('ctrl+enter', function (event, handler){
document.getElementById("submit_button").click();
});
function handleLoginState() {
if (document.cookie.split('; ').find(row => row.startsWith('credentials='))) {
document.getElementById("logout").style.display = "";
} else {
document.getElementById("googleLoginButton").style.display = "";
}
}
function doGoogle() {
google.accounts.id.initialize({
client_id: "164084252563-kaks3no7muqb82suvbubg7r0o87aip7n.apps.googleusercontent.com",
callback: loggedIn,
auto_select: true
});
google.accounts.id.renderButton(
document.getElementById("googleLoginButton"),
{theme: "outline", size: "large"} // customization attributes
);
google.accounts.id.prompt(); // also display the One Tap dialog
}
function doLogout() {
document.cookie = `credentials=;expires=Thu, 01 Jan 1970 00:00:01 GMT`;
document.getElementById("googleLoginButton").style.display = "";
document.getElementById("logout").style.display = "none";
}
function loggedIn(response) {
document.cookie = `credentials=${response.credential}`;
document.getElementById("googleLoginButton").style.display = "none";
document.getElementById("logout").style.display = "";
console.log(response);
} }
function fillLineInfo(line_json) { function fillLineInfo(line_json) {
line_id = line_json.id // document.getElementById("original_transcription").innerText = line_json.line_data.text;
line = line_json line = line_json
document.getElementById("original_transcription").innerHTML = line_json.line_data.result document.getElementById("original_transcription").innerHTML = line_json.line_data.result
.map(word => `<span style="opacity: ${word.conf}">${word.word}</span>`).join(" "); .map(word => `<span style="opacity: ${word.conf}">${word.word}</span>`).join(" ");
@ -97,12 +45,11 @@ function fillLineInfo(line_json) {
function initializePlayer() { function initializePlayer() {
videojs.getPlayer("player").src([ videojs.getPlayer("player").src([
//{src: `/professor/desertbus/line/${line_id}/playlist.m3u8`} {src: `//localhost:8005/professor/line/${line_id}/playlist.m3u8`}
{src: `/playlist/desertbus/source.m3u8?start=${line.start_time}&end=${line.end_time}`}
]); ]);
videojs.getPlayer("player").addRemoteTextTrack({ videojs.getPlayer("player").addRemoteTextTrack({
kind: "captions", kind: "captions",
src: `/buscribe/desertbus/vtt?start_time=${line.start_time}&end_time=${line.end_time}`, src: `//localhost:8010/buscribe/vtt?start_time=${line.start_time}&end_time=${line.end_time}`,
srclang: "en", srclang: "en",
label: "English", label: "English",
default: true default: true
@ -126,28 +73,26 @@ async function submit() {
} }
} }
return await fetch("/professor/desertbus/speaker", return await fetch("//localhost:8005/professor/speaker",
{ {
method: "PUT", method: "PUT",
headers: { headers: {
'Content-Type': 'application/json' 'Content-Type': 'application/json'
}, },
body: JSON.stringify(speaker), body: JSON.stringify(speaker)
credentials: "include"
}).then(response => }).then(response =>
parseInt(response.headers.get("Content-Location") parseInt(response.headers.get("Content-Location")
.split("/") .split("/")
.pop(), 10)); .pop(), 10));
})); }));
fetch(`/professor/desertbus/line/${line_id}`, fetch(`//localhost:8005/professor/line/${line_id}`,
{ {
method: "POST", method: "POST",
headers: { headers: {
'Content-Type': 'application/json' 'Content-Type': 'application/json'
}, },
body: JSON.stringify({transcription: new_transcription, speakers: new_speakers}), body: JSON.stringify({transcription: new_transcription, speakers: new_speakers})
credentials: "include"
}).then(response => { }).then(response => {
if (response.ok) { if (response.ok) {
document.getElementById("update_indicator").innerText = "\u2714\ufe0f" document.getElementById("update_indicator").innerText = "\u2714\ufe0f"
@ -158,7 +103,7 @@ async function submit() {
} }
$(function () { $(function () {
fetch("/professor/desertbus/speaker") fetch("//localhost:8005/professor/speaker")
.then(response => response.json()) .then(response => response.json())
.then(function (speakers_json) { .then(function (speakers_json) {
speakers = speakers_json; speakers = speakers_json;
@ -207,17 +152,4 @@ $(function () {
} }
) )
}); });
function parseJwt(token) {
const base64Url = token.split('.')[1];
const base64 = base64Url.replace(/-/g, '+').replace(/_/g, '/');
const jsonPayload = decodeURIComponent(
atob(base64)
.split('')
.map(function (c) {
return '%' + ('00' + c.charCodeAt(0).toString(16)).slice(-2);
}).join(''));
return JSON.parse(jsonPayload);
}

@ -61,10 +61,3 @@ button {
span.verified_cc { span.verified_cc {
color: #c1ffc1; color: #c1ffc1;
} }
#logout {
padding: 0.1em;
a {
color: darkgray
}
}

@ -1,12 +0,0 @@
#!/bin/bash
docker run \
--rm \
-v /srv/wubloader/segments/:/mnt/ \
buscribe:0.0.0 \
loadingreadyrun \
--start-time='2021-11-05T00:00' \
--end-time='2021-11-07T00:00' \
--database=postgresql://vst:flnMSYPRf@mula.lan:6543/buscribe_lrr \
--model=/usr/share/buscribe/vosk-model-en-us-0.22/
# --model=/usr/share/buscribe/vosk-model-small-en-us-0.15/
Loading…
Cancel
Save