Rough cut compatible captions

multichannel
HeNine 3 years ago
parent 0fc51b71fe
commit aa7871b5f0

@ -2,6 +2,7 @@ import json
from datetime import timedelta from datetime import timedelta
import flask as flask import flask as flask
import common
from common import dateutil, database from common import dateutil, database
from dateutil.parser import ParserError from dateutil.parser import ParserError
from flask import request, jsonify, Response, render_template from flask import request, jsonify, Response, render_template
@ -11,9 +12,16 @@ app = flask.Flask('buscribe')
@app.template_filter() @app.template_filter()
def convert_vtt_timedelta(delta: timedelta): def convert_vtt_timedelta(delta: timedelta):
"""Converts a timedelta to a VTT compatible format."""
return f'{delta.days * 24 + delta.seconds // 3600:02}:{(delta.seconds % 3600) // 60:02}:{delta.seconds % 60:02}.{delta.microseconds // 1000:03}' return f'{delta.days * 24 + delta.seconds // 3600:02}:{(delta.seconds % 3600) // 60:02}:{delta.seconds % 60:02}.{delta.microseconds // 1000:03}'
@app.template_filter()
def create_seconds_timedelta(seconds):
"""Converts a float of seconds to a timedelta."""
return timedelta(seconds=seconds)
def round_bus_time(delta: timedelta): def round_bus_time(delta: timedelta):
"""Round bus time down to the second.""" """Round bus time down to the second."""
return f'{delta.days * 24 + delta.seconds // 3600:02}:{(delta.seconds % 3600) // 60:02}:{delta.seconds % 60:02}' return f'{delta.days * 24 + delta.seconds // 3600:02}:{(delta.seconds % 3600) // 60:02}:{delta.seconds % 60:02}'
@ -44,10 +52,15 @@ def get_vtt():
db_conn = app.db_manager.get_conn() db_conn = app.db_manager.get_conn()
segments = common.get_best_segments(app.segments_dir,
start_time,
end_time)
segments_start_time = segments[0].start
results = fetch_lines(db_conn, start_time, end_time) results = fetch_lines(db_conn, start_time, end_time)
return Response( return Response(
render_template("busubs.jinja", results=results, bustime_start=app.bustime_start, render_template("busubs.jinja", results=results, start_time=segments_start_time,
duration_extend=timedelta(seconds=0.3)), duration_extend=timedelta(seconds=0.3)),
mimetype="text/vtt" mimetype="text/vtt"
) )
@ -102,7 +115,7 @@ def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset
query = "SELECT *" + \ query = "SELECT *" + \
( (
",ts_headline(transcription_line, convert_query(%(text_query)s), 'StartSel=''<span class=\"highlight\">'', StopSel=</span>') AS highlighted_text" if ts_query is not None else ",transcription_line AS highlighted_text") + \ ",ts_headline(transcription_line, convert_query(%(text_query)s), 'StartSel=''<span class=\"highlight\">'', StopSel=</span>') AS highlighted_text" if ts_query is not None else ",transcription_line AS highlighted_text") + \
" FROM buscribe_all_transcriptions WHERE start_time > %(start_time)s AND end_time < %(end_time)s " " FROM buscribe_all_transcriptions WHERE start_time >= %(start_time)s AND end_time <= %(end_time)s "
if ts_query is not None: if ts_query is not None:
query += "AND (coalesce(transcription_line_ts, ''::tsvector) || coalesce(names_ts, ''::tsvector)) @@ " \ query += "AND (coalesce(transcription_line_ts, ''::tsvector) || coalesce(names_ts, ''::tsvector)) @@ " \

@ -47,7 +47,9 @@ def servelet(server):
'postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE') 'postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE')
@argh.arg('--bustime-start', @argh.arg('--bustime-start',
help='The start time in UTC for the event, for UTC-Bustime conversion') help='The start time in UTC for the event, for UTC-Bustime conversion')
def main(database="", host='0.0.0.0', port=8010, bustime_start=None): @argh.arg('--base-dir',
help='Directory from which segments will be grabbed. Default is current working directory.')
def main(database="", host='0.0.0.0', port=8010, bustime_start=None, base_dir=None):
if bustime_start is None: if bustime_start is None:
logging.error("Missing --bustime-start!") logging.error("Missing --bustime-start!")
exit(1) exit(1)
@ -60,6 +62,8 @@ def main(database="", host='0.0.0.0', port=8010, bustime_start=None):
logging.error("Invalid --bustime-start!") logging.error("Invalid --bustime-start!")
exit(1) exit(1)
app.segments_dir = base_dir
app.db_manager = DBManager(dsn=database) app.db_manager = DBManager(dsn=database)
stopping = gevent.event.Event() stopping = gevent.event.Event()

@ -1,2 +1,10 @@
{{ (row.start_time - bustime_start - duration_extend)|convert_vtt_timedelta }} --> {{ (row.end_time - bustime_start + duration_extend)|convert_vtt_timedelta }} {{ (row.start_time - start_time)|convert_vtt_timedelta }} --> {{ (row.end_time - start_time + duration_extend)|convert_vtt_timedelta }}
- {{ row.transcription_line }} <c.{%- if row.verifier is not none -%}verified_cc{%- else -%}machine_cc{%- endif -%}>
{%- if row.transcription_json is none -%}
{{ row.transcription_line }}
{%- else -%}
{%- set line_start_time = row.transcription_json["result"][0]["start"] -%}
{%- for word in row.transcription_json["result"] -%}
<{{ ((row.start_time - start_time) + (word["start"] - line_start_time)|create_seconds_timedelta)|convert_vtt_timedelta }}>{{ word["word"] + " " }}
{%- endfor -%}
{%- endif -%}</c>

@ -1,5 +1,7 @@
WEBVTT WEBVTT
{% for row in results %} {% for row in results %}
{% include "busub.jinja" %} {%- if row.transcription_line is not none %}
{%- include "busub.jinja" %}
{%- endif %}
{% endfor %} {% endfor %}

@ -87,7 +87,8 @@ SELECT buscribe_transcriptions.id,
names, names,
verified_line AS transcription_line, verified_line AS transcription_line,
setweight(to_tsvector('english', verified_line), 'C') AS transcription_line_ts, setweight(to_tsvector('english', verified_line), 'C') AS transcription_line_ts,
setweight(to_tsvector(array_to_string(names, ' ')), 'C') AS names_ts setweight(to_tsvector(array_to_string(names, ' ')), 'C') AS names_ts,
null AS transcription_json
FROM buscribe_transcriptions FROM buscribe_transcriptions
LEFT OUTER JOIN buscribe_verified_lines ON buscribe_transcriptions.id = buscribe_verified_lines.line LEFT OUTER JOIN buscribe_verified_lines ON buscribe_transcriptions.id = buscribe_verified_lines.line
LEFT OUTER JOIN ( LEFT OUTER JOIN (
@ -108,7 +109,8 @@ SELECT id,
null AS names, null AS names,
transcription_line, transcription_line,
to_tsvector('english', transcription_line) AS transcription_line_ts, to_tsvector('english', transcription_line) AS transcription_line_ts,
null AS names_ts null AS names_ts,
transcription_json
FROM buscribe_transcriptions; FROM buscribe_transcriptions;
ROLLBACK; ROLLBACK;

@ -8,7 +8,7 @@ function pageReady() {
controls: true, controls: true,
autoplay: false, autoplay: false,
width: 900, width: 900,
height: 420, height: 900 / 16 * 9,
playbackRates: [0.5, 1, 1.25, 1.5, 2], playbackRates: [0.5, 1, 1.25, 1.5, 2],
inactivityTimeout: 0, inactivityTimeout: 0,
controlBar: { controlBar: {
@ -16,24 +16,46 @@ function pageReady() {
volumePanel: { volumePanel: {
inline: false, inline: false,
}, },
}, }
sources: [{src: `//localhost:8005/professor/line/${line_id}/playlist.m3u8`}]
}); });
// this changes the background color to red
const bgColorSelector = document.querySelector('.vjs-bg-color > select');
bgColorSelector.value = "#000";
// this changes the background opacity to 0.5
const bgOpacitySelector = document.querySelector('.vjs-bg-opacity > select');
bgOpacitySelector.value = "0.5"
fetch(`//localhost:8005/professor/line/${line_id}`) fetch(`//localhost:8005/professor/line/${line_id}`)
.then(response => response.json()) .then(response => response.json())
.then(fillLineInfo); .then(fillLineInfo)
.then(initializePlayer);
} }
function fillLineInfo(line_json) { function fillLineInfo(line_json) {
// document.getElementById("original_transcription").innerText = line_json.line_data.text; // document.getElementById("original_transcription").innerText = line_json.line_data.text;
line = line_json
document.getElementById("original_transcription").innerHTML = line_json.line_data.result document.getElementById("original_transcription").innerHTML = line_json.line_data.result
.map(word => `<span style="opacity: ${word.conf}">${word.word}</span>`).join(" "); .map(word => `<span style="opacity: ${word.conf}">${word.word}</span>`).join(" ");
document.getElementById("new_transcription") document.getElementById("new_transcription")
.attributes.getNamedItem("placeholder").value = line_json.line_data.text; .attributes.getNamedItem("placeholder").value = line_json.line_data.text;
} }
function initializePlayer() {
videojs.getPlayer("player").src([
{src: `//localhost:8005/professor/line/${line_id}/playlist.m3u8`}
]);
videojs.getPlayer("player").addRemoteTextTrack({
kind: "captions",
src: `//localhost:8010/buscribe/vtt?start_time=${line.start_time}&end_time=${line.end_time}`,
srclang: "en",
label: "English",
default: true
}, false);
}
async function submit() { async function submit() {
document.getElementById("update_indicator").innerText = "⭯" document.getElementById("update_indicator").innerText = "⭯"

@ -57,3 +57,7 @@ button {
margin-left: 1em; margin-left: 1em;
vertical-align: middle; vertical-align: middle;
} }
span.verified_cc {
color: #c1ffc1;
}

Loading…
Cancel
Save