From aa7871b5f01e150649857fb00a7c0f414e86e396 Mon Sep 17 00:00:00 2001
From: HeNine <>
Date: Sat, 23 Oct 2021 11:29:44 +0200
Subject: [PATCH] Rough cut compatible captions
---
buscribe-api/buscribeapi/buscribeapi.py | 17 ++++++++++++--
buscribe-api/buscribeapi/main.py | 6 ++++-
buscribe-api/templates/busub.jinja | 12 ++++++++--
buscribe-api/templates/busubs.jinja | 4 +++-
buscribe_data.sql | 6 +++--
professor/script.js | 30 +++++++++++++++++++++----
professor/style.less | 6 ++++-
7 files changed, 68 insertions(+), 13 deletions(-)
diff --git a/buscribe-api/buscribeapi/buscribeapi.py b/buscribe-api/buscribeapi/buscribeapi.py
index 74d7f7b..e7efc4f 100644
--- a/buscribe-api/buscribeapi/buscribeapi.py
+++ b/buscribe-api/buscribeapi/buscribeapi.py
@@ -2,6 +2,7 @@ import json
from datetime import timedelta
import flask as flask
+import common
from common import dateutil, database
from dateutil.parser import ParserError
from flask import request, jsonify, Response, render_template
@@ -11,9 +12,16 @@ app = flask.Flask('buscribe')
@app.template_filter()
def convert_vtt_timedelta(delta: timedelta):
+ """Converts a timedelta to a VTT compatible format."""
return f'{delta.days * 24 + delta.seconds // 3600:02}:{(delta.seconds % 3600) // 60:02}:{delta.seconds % 60:02}.{delta.microseconds // 1000:03}'
+@app.template_filter()
+def create_seconds_timedelta(seconds):
+ """Converts a float of seconds to a timedelta."""
+ return timedelta(seconds=seconds)
+
+
def round_bus_time(delta: timedelta):
"""Round bus time down to the second."""
return f'{delta.days * 24 + delta.seconds // 3600:02}:{(delta.seconds % 3600) // 60:02}:{delta.seconds % 60:02}'
@@ -44,10 +52,15 @@ def get_vtt():
db_conn = app.db_manager.get_conn()
+ segments = common.get_best_segments(app.segments_dir,
+ start_time,
+ end_time)
+ segments_start_time = segments[0].start
+
results = fetch_lines(db_conn, start_time, end_time)
return Response(
- render_template("busubs.jinja", results=results, bustime_start=app.bustime_start,
+ render_template("busubs.jinja", results=results, start_time=segments_start_time,
duration_extend=timedelta(seconds=0.3)),
mimetype="text/vtt"
)
@@ -102,7 +115,7 @@ def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset
query = "SELECT *" + \
(
",ts_headline(transcription_line, convert_query(%(text_query)s), 'StartSel='''', StopSel=') AS highlighted_text" if ts_query is not None else ",transcription_line AS highlighted_text") + \
- " FROM buscribe_all_transcriptions WHERE start_time > %(start_time)s AND end_time < %(end_time)s "
+ " FROM buscribe_all_transcriptions WHERE start_time >= %(start_time)s AND end_time <= %(end_time)s "
if ts_query is not None:
query += "AND (coalesce(transcription_line_ts, ''::tsvector) || coalesce(names_ts, ''::tsvector)) @@ " \
diff --git a/buscribe-api/buscribeapi/main.py b/buscribe-api/buscribeapi/main.py
index 62ed766..87568c2 100644
--- a/buscribe-api/buscribeapi/main.py
+++ b/buscribe-api/buscribeapi/main.py
@@ -47,7 +47,9 @@ def servelet(server):
'postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE')
@argh.arg('--bustime-start',
help='The start time in UTC for the event, for UTC-Bustime conversion')
-def main(database="", host='0.0.0.0', port=8010, bustime_start=None):
+@argh.arg('--base-dir',
+ help='Directory from which segments will be grabbed. Default is current working directory.')
+def main(database="", host='0.0.0.0', port=8010, bustime_start=None, base_dir=None):
if bustime_start is None:
logging.error("Missing --bustime-start!")
exit(1)
@@ -60,6 +62,8 @@ def main(database="", host='0.0.0.0', port=8010, bustime_start=None):
logging.error("Invalid --bustime-start!")
exit(1)
+ app.segments_dir = base_dir
+
app.db_manager = DBManager(dsn=database)
stopping = gevent.event.Event()
diff --git a/buscribe-api/templates/busub.jinja b/buscribe-api/templates/busub.jinja
index 51b41be..e585d8b 100644
--- a/buscribe-api/templates/busub.jinja
+++ b/buscribe-api/templates/busub.jinja
@@ -1,2 +1,10 @@
-{{ (row.start_time - bustime_start - duration_extend)|convert_vtt_timedelta }} --> {{ (row.end_time - bustime_start + duration_extend)|convert_vtt_timedelta }}
-- {{ row.transcription_line }}
+{{ (row.start_time - start_time)|convert_vtt_timedelta }} --> {{ (row.end_time - start_time + duration_extend)|convert_vtt_timedelta }}
+
+ {%- if row.transcription_json is none -%}
+ {{ row.transcription_line }}
+ {%- else -%}
+ {%- set line_start_time = row.transcription_json["result"][0]["start"] -%}
+ {%- for word in row.transcription_json["result"] -%}
+ <{{ ((row.start_time - start_time) + (word["start"] - line_start_time)|create_seconds_timedelta)|convert_vtt_timedelta }}>{{ word["word"] + " " }}
+ {%- endfor -%}
+ {%- endif -%}
\ No newline at end of file
diff --git a/buscribe-api/templates/busubs.jinja b/buscribe-api/templates/busubs.jinja
index 258e009..0d4a47a 100644
--- a/buscribe-api/templates/busubs.jinja
+++ b/buscribe-api/templates/busubs.jinja
@@ -1,5 +1,7 @@
WEBVTT
{% for row in results %}
-{% include "busub.jinja" %}
+ {%- if row.transcription_line is not none %}
+ {%- include "busub.jinja" %}
+ {%- endif %}
{% endfor %}
\ No newline at end of file
diff --git a/buscribe_data.sql b/buscribe_data.sql
index b06607f..b2a4ff4 100644
--- a/buscribe_data.sql
+++ b/buscribe_data.sql
@@ -87,7 +87,8 @@ SELECT buscribe_transcriptions.id,
names,
verified_line AS transcription_line,
setweight(to_tsvector('english', verified_line), 'C') AS transcription_line_ts,
- setweight(to_tsvector(array_to_string(names, ' ')), 'C') AS names_ts
+ setweight(to_tsvector(array_to_string(names, ' ')), 'C') AS names_ts,
+ null AS transcription_json
FROM buscribe_transcriptions
LEFT OUTER JOIN buscribe_verified_lines ON buscribe_transcriptions.id = buscribe_verified_lines.line
LEFT OUTER JOIN (
@@ -108,7 +109,8 @@ SELECT id,
null AS names,
transcription_line,
to_tsvector('english', transcription_line) AS transcription_line_ts,
- null AS names_ts
+ null AS names_ts,
+ transcription_json
FROM buscribe_transcriptions;
ROLLBACK;
diff --git a/professor/script.js b/professor/script.js
index 98fce30..3a42ba8 100644
--- a/professor/script.js
+++ b/professor/script.js
@@ -8,7 +8,7 @@ function pageReady() {
controls: true,
autoplay: false,
width: 900,
- height: 420,
+ height: 900 / 16 * 9,
playbackRates: [0.5, 1, 1.25, 1.5, 2],
inactivityTimeout: 0,
controlBar: {
@@ -16,24 +16,46 @@ function pageReady() {
volumePanel: {
inline: false,
},
- },
- sources: [{src: `//localhost:8005/professor/line/${line_id}/playlist.m3u8`}]
+ }
});
+ // this changes the background color to red
+ const bgColorSelector = document.querySelector('.vjs-bg-color > select');
+ bgColorSelector.value = "#000";
+
+ // this changes the background opacity to 0.5
+ const bgOpacitySelector = document.querySelector('.vjs-bg-opacity > select');
+ bgOpacitySelector.value = "0.5"
+
fetch(`//localhost:8005/professor/line/${line_id}`)
.then(response => response.json())
- .then(fillLineInfo);
+ .then(fillLineInfo)
+ .then(initializePlayer);
}
function fillLineInfo(line_json) {
// document.getElementById("original_transcription").innerText = line_json.line_data.text;
+ line = line_json
document.getElementById("original_transcription").innerHTML = line_json.line_data.result
.map(word => `${word.word}`).join(" ");
document.getElementById("new_transcription")
.attributes.getNamedItem("placeholder").value = line_json.line_data.text;
}
+function initializePlayer() {
+ videojs.getPlayer("player").src([
+ {src: `//localhost:8005/professor/line/${line_id}/playlist.m3u8`}
+ ]);
+ videojs.getPlayer("player").addRemoteTextTrack({
+ kind: "captions",
+ src: `//localhost:8010/buscribe/vtt?start_time=${line.start_time}&end_time=${line.end_time}`,
+ srclang: "en",
+ label: "English",
+ default: true
+ }, false);
+}
+
async function submit() {
document.getElementById("update_indicator").innerText = "⭯"
diff --git a/professor/style.less b/professor/style.less
index b6a5093..def9a0d 100644
--- a/professor/style.less
+++ b/professor/style.less
@@ -56,4 +56,8 @@ button {
height: 1.3em;
margin-left: 1em;
vertical-align: middle;
-}
\ No newline at end of file
+}
+
+span.verified_cc {
+ color: #c1ffc1;
+}