From aa7871b5f01e150649857fb00a7c0f414e86e396 Mon Sep 17 00:00:00 2001 From: HeNine <> Date: Sat, 23 Oct 2021 11:29:44 +0200 Subject: [PATCH] Rough cut compatible captions --- buscribe-api/buscribeapi/buscribeapi.py | 17 ++++++++++++-- buscribe-api/buscribeapi/main.py | 6 ++++- buscribe-api/templates/busub.jinja | 12 ++++++++-- buscribe-api/templates/busubs.jinja | 4 +++- buscribe_data.sql | 6 +++-- professor/script.js | 30 +++++++++++++++++++++---- professor/style.less | 6 ++++- 7 files changed, 68 insertions(+), 13 deletions(-) diff --git a/buscribe-api/buscribeapi/buscribeapi.py b/buscribe-api/buscribeapi/buscribeapi.py index 74d7f7b..e7efc4f 100644 --- a/buscribe-api/buscribeapi/buscribeapi.py +++ b/buscribe-api/buscribeapi/buscribeapi.py @@ -2,6 +2,7 @@ import json from datetime import timedelta import flask as flask +import common from common import dateutil, database from dateutil.parser import ParserError from flask import request, jsonify, Response, render_template @@ -11,9 +12,16 @@ app = flask.Flask('buscribe') @app.template_filter() def convert_vtt_timedelta(delta: timedelta): + """Converts a timedelta to a VTT compatible format.""" return f'{delta.days * 24 + delta.seconds // 3600:02}:{(delta.seconds % 3600) // 60:02}:{delta.seconds % 60:02}.{delta.microseconds // 1000:03}' +@app.template_filter() +def create_seconds_timedelta(seconds): + """Converts a float of seconds to a timedelta.""" + return timedelta(seconds=seconds) + + def round_bus_time(delta: timedelta): """Round bus time down to the second.""" return f'{delta.days * 24 + delta.seconds // 3600:02}:{(delta.seconds % 3600) // 60:02}:{delta.seconds % 60:02}' @@ -44,10 +52,15 @@ def get_vtt(): db_conn = app.db_manager.get_conn() + segments = common.get_best_segments(app.segments_dir, + start_time, + end_time) + segments_start_time = segments[0].start + results = fetch_lines(db_conn, start_time, end_time) return Response( - render_template("busubs.jinja", results=results, bustime_start=app.bustime_start, + render_template("busubs.jinja", results=results, start_time=segments_start_time, duration_extend=timedelta(seconds=0.3)), mimetype="text/vtt" ) @@ -102,7 +115,7 @@ def fetch_lines(db_conn, start_time, end_time, ts_query=None, limit=None, offset query = "SELECT *" + \ ( ",ts_headline(transcription_line, convert_query(%(text_query)s), 'StartSel='''', StopSel=') AS highlighted_text" if ts_query is not None else ",transcription_line AS highlighted_text") + \ - " FROM buscribe_all_transcriptions WHERE start_time > %(start_time)s AND end_time < %(end_time)s " + " FROM buscribe_all_transcriptions WHERE start_time >= %(start_time)s AND end_time <= %(end_time)s " if ts_query is not None: query += "AND (coalesce(transcription_line_ts, ''::tsvector) || coalesce(names_ts, ''::tsvector)) @@ " \ diff --git a/buscribe-api/buscribeapi/main.py b/buscribe-api/buscribeapi/main.py index 62ed766..87568c2 100644 --- a/buscribe-api/buscribeapi/main.py +++ b/buscribe-api/buscribeapi/main.py @@ -47,7 +47,9 @@ def servelet(server): 'postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE') @argh.arg('--bustime-start', help='The start time in UTC for the event, for UTC-Bustime conversion') -def main(database="", host='0.0.0.0', port=8010, bustime_start=None): +@argh.arg('--base-dir', + help='Directory from which segments will be grabbed. Default is current working directory.') +def main(database="", host='0.0.0.0', port=8010, bustime_start=None, base_dir=None): if bustime_start is None: logging.error("Missing --bustime-start!") exit(1) @@ -60,6 +62,8 @@ def main(database="", host='0.0.0.0', port=8010, bustime_start=None): logging.error("Invalid --bustime-start!") exit(1) + app.segments_dir = base_dir + app.db_manager = DBManager(dsn=database) stopping = gevent.event.Event() diff --git a/buscribe-api/templates/busub.jinja b/buscribe-api/templates/busub.jinja index 51b41be..e585d8b 100644 --- a/buscribe-api/templates/busub.jinja +++ b/buscribe-api/templates/busub.jinja @@ -1,2 +1,10 @@ -{{ (row.start_time - bustime_start - duration_extend)|convert_vtt_timedelta }} --> {{ (row.end_time - bustime_start + duration_extend)|convert_vtt_timedelta }} -- {{ row.transcription_line }} +{{ (row.start_time - start_time)|convert_vtt_timedelta }} --> {{ (row.end_time - start_time + duration_extend)|convert_vtt_timedelta }} + + {%- if row.transcription_json is none -%} + {{ row.transcription_line }} + {%- else -%} + {%- set line_start_time = row.transcription_json["result"][0]["start"] -%} + {%- for word in row.transcription_json["result"] -%} + <{{ ((row.start_time - start_time) + (word["start"] - line_start_time)|create_seconds_timedelta)|convert_vtt_timedelta }}>{{ word["word"] + " " }} + {%- endfor -%} + {%- endif -%} \ No newline at end of file diff --git a/buscribe-api/templates/busubs.jinja b/buscribe-api/templates/busubs.jinja index 258e009..0d4a47a 100644 --- a/buscribe-api/templates/busubs.jinja +++ b/buscribe-api/templates/busubs.jinja @@ -1,5 +1,7 @@ WEBVTT {% for row in results %} -{% include "busub.jinja" %} + {%- if row.transcription_line is not none %} + {%- include "busub.jinja" %} + {%- endif %} {% endfor %} \ No newline at end of file diff --git a/buscribe_data.sql b/buscribe_data.sql index b06607f..b2a4ff4 100644 --- a/buscribe_data.sql +++ b/buscribe_data.sql @@ -87,7 +87,8 @@ SELECT buscribe_transcriptions.id, names, verified_line AS transcription_line, setweight(to_tsvector('english', verified_line), 'C') AS transcription_line_ts, - setweight(to_tsvector(array_to_string(names, ' ')), 'C') AS names_ts + setweight(to_tsvector(array_to_string(names, ' ')), 'C') AS names_ts, + null AS transcription_json FROM buscribe_transcriptions LEFT OUTER JOIN buscribe_verified_lines ON buscribe_transcriptions.id = buscribe_verified_lines.line LEFT OUTER JOIN ( @@ -108,7 +109,8 @@ SELECT id, null AS names, transcription_line, to_tsvector('english', transcription_line) AS transcription_line_ts, - null AS names_ts + null AS names_ts, + transcription_json FROM buscribe_transcriptions; ROLLBACK; diff --git a/professor/script.js b/professor/script.js index 98fce30..3a42ba8 100644 --- a/professor/script.js +++ b/professor/script.js @@ -8,7 +8,7 @@ function pageReady() { controls: true, autoplay: false, width: 900, - height: 420, + height: 900 / 16 * 9, playbackRates: [0.5, 1, 1.25, 1.5, 2], inactivityTimeout: 0, controlBar: { @@ -16,24 +16,46 @@ function pageReady() { volumePanel: { inline: false, }, - }, - sources: [{src: `//localhost:8005/professor/line/${line_id}/playlist.m3u8`}] + } }); + // this changes the background color to red + const bgColorSelector = document.querySelector('.vjs-bg-color > select'); + bgColorSelector.value = "#000"; + + // this changes the background opacity to 0.5 + const bgOpacitySelector = document.querySelector('.vjs-bg-opacity > select'); + bgOpacitySelector.value = "0.5" + fetch(`//localhost:8005/professor/line/${line_id}`) .then(response => response.json()) - .then(fillLineInfo); + .then(fillLineInfo) + .then(initializePlayer); } function fillLineInfo(line_json) { // document.getElementById("original_transcription").innerText = line_json.line_data.text; + line = line_json document.getElementById("original_transcription").innerHTML = line_json.line_data.result .map(word => `${word.word}`).join(" "); document.getElementById("new_transcription") .attributes.getNamedItem("placeholder").value = line_json.line_data.text; } +function initializePlayer() { + videojs.getPlayer("player").src([ + {src: `//localhost:8005/professor/line/${line_id}/playlist.m3u8`} + ]); + videojs.getPlayer("player").addRemoteTextTrack({ + kind: "captions", + src: `//localhost:8010/buscribe/vtt?start_time=${line.start_time}&end_time=${line.end_time}`, + srclang: "en", + label: "English", + default: true + }, false); +} + async function submit() { document.getElementById("update_indicator").innerText = "⭯" diff --git a/professor/style.less b/professor/style.less index b6a5093..def9a0d 100644 --- a/professor/style.less +++ b/professor/style.less @@ -56,4 +56,8 @@ button { height: 1.3em; margin-left: 1em; vertical-align: middle; -} \ No newline at end of file +} + +span.verified_cc { + color: #c1ffc1; +}