pull/414/head
HeNine 3 years ago
parent 787ab3895b
commit ec1bbad7de

@ -0,0 +1,12 @@
import logging
import os
import argh
from buscribeapi.main import main
LOG_FORMAT = "[%(asctime)s] %(levelname)8s %(name)s(%(module)s:%(lineno)d): %(message)s"
level = os.environ.get('WUBLOADER_LOG_LEVEL', 'INFO').upper()
logging.basicConfig(level=level, format=LOG_FORMAT)
argh.dispatch_command(main)

@ -1,14 +1,26 @@
import json
from datetime import timedelta
import flask as flask import flask as flask
from common import dateutil from common import dateutil, database
from dateutil.parser import ParserError from dateutil.parser import ParserError
from flask import request from flask import request, jsonify, Response, render_template
app = flask.Flask('buscribe') app = flask.Flask('buscribe')
@app.template_filter()
def convert_vtt_timedelta(delta: timedelta):
return f'{delta.days * 24 + delta.seconds // 3600:02}:{(delta.seconds % 3600) // 60:02}:{delta.seconds % 60:02}.{delta.microseconds // 1000:03}'
@app.route('/buscribe/vtt') @app.route('/buscribe/vtt')
def get_vtt(): def get_vtt():
"""Returns WebVTT subtitle file for the period between start_time and end_time.""" """Returns WebVTT subtitle file for the period between start_time and end_time.
Times are relative to --bustime-start.
TODO: Figure out proper offsets."""
try: try:
start_time_string = request.args.get('start_time') start_time_string = request.args.get('start_time')
start_time = dateutil.parse(start_time_string) start_time = dateutil.parse(start_time_string)
@ -25,23 +37,66 @@ def get_vtt():
except ValueError: except ValueError:
return "Missing end time!", 400 return "Missing end time!", 400
db_conn = app.db_manager.get_conn()
results = fetch_lines(db_conn, start_time, end_time)
return Response(
render_template("busubs.jinja", results=results, bustime_start=app.bustime_start,
duration_extend=timedelta(seconds=0.3)),
mimetype="text/vtt"
)
@app.route('/buscribe/json') @app.route('/buscribe/json')
def get_json(): def get_json():
"""Searches the line database for *query*, with optional start_time and end_time boundaries. """Searches the line database for *query*, with optional start_time and end_time boundaries.
Search is done using PostgreSQL websearch_to_tsquery() (https://www.postgresql.org/docs/13/functions-textsearch.html)""" Search is done using PostgreSQL websearch_to_tsquery()
start_time_string = request.args.get('start_time') (https://www.postgresql.org/docs/13/functions-textsearch.html)"""
try:
start_time = dateutil.parse(start_time_string) start_time_string = request.args.get('start_time', default=None)
except ParserError: if start_time_string is not None:
return "Invalid start time!", 400 try:
start_time = dateutil.parse(start_time_string)
except ParserError:
return "Invalid start time!", 400
else:
start_time = None
end_time_string = request.args.get('end_time', default=None) end_time_string = request.args.get('end_time', default=None)
try: if end_time_string is not None:
end_time = dateutil.parse(end_time_string) try:
except ParserError: end_time = dateutil.parse(end_time_string)
return "Invalid end time!", 400 except ParserError:
return "Invalid end time!", 400
else:
end_time = None
# I think websearch_to_tsquery() sanitizes its own input. # I think websearch_to_tsquery() sanitizes its own input.
query = request.args.get('end_time', default=None) query = request.args.get('query', default=None)
db_conn = app.db_manager.get_conn()
results = fetch_lines(db_conn, start_time, end_time, query)
return jsonify([{"start_time": row.start_time.isoformat(),
"end_time": row.end_time.isoformat(),
"text": row.transcription_line} for row in results])
def fetch_lines(db_conn, start_time, end_time, query=None):
if query is None:
return database.query(db_conn, "SELECT * FROM buscribe_transcriptions WHERE "
"start_time > %s AND "
"end_time < %s;",
start_time if start_time is not None else '-infinity',
end_time if end_time is not None else 'infinity')
else:
return database.query(db_conn, "SELECT * FROM buscribe_transcriptions WHERE "
"start_time > %s AND "
"end_time < %s AND "
"to_tsvector(transcription_line) @@ websearch_to_tsquery(%s);",
start_time if start_time is not None else '-infinity',
end_time if end_time is not None else 'infinity',
query)

@ -0,0 +1,60 @@
import logging
import os
import argh
from common import dateutil
from common.database import DBManager
from dateutil.parser import ParserError
from gevent.pywsgi import WSGIServer
from buscribeapi.buscribeapi import app
def cors(app):
"""WSGI middleware that sets CORS headers"""
HEADERS = [
("Access-Control-Allow-Credentials", "false"),
("Access-Control-Allow-Headers", "*"),
("Access-Control-Allow-Methods", "GET,HEAD"),
("Access-Control-Allow-Origin", "*"),
("Access-Control-Max-Age", "86400"),
]
def handle(environ, start_response):
def _start_response(status, headers, exc_info=None):
headers += HEADERS
return start_response(status, headers, exc_info)
return app(environ, _start_response)
return handle
@argh.arg('--host',
help='Address or socket server will listen to. Default is 0.0.0.0 (everything on the local machine).')
@argh.arg('--port',
help='Port server will listen on. Default is 8004.')
@argh.arg('--database',
help='Postgres connection string, which is either a space-separated list of key=value pairs, or a URI like: '
'postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE')
@argh.arg('--bustime-start',
help='The start time in UTC for the event, for UTC-Bustime conversion')
def main(database="", host='0.0.0.0', port=8005, bustime_start=None):
if bustime_start is None:
logging.error("Missing --bustime-start!")
exit(1)
server = WSGIServer((host, port), cors(app))
try:
app.bustime_start = dateutil.parse(bustime_start)
except ParserError:
logging.error("Invalid --bustime-start!")
exit(1)
app.db_manager = DBManager(dsn=database)
logging.info('Starting up')
server.serve_forever()
logging.info("Gracefully shut down")

@ -7,6 +7,7 @@ setup(
install_requires = [ install_requires = [
"argh", "argh",
"psycopg2", "psycopg2",
"gevent",
"greenlet==0.4.16", "greenlet==0.4.16",
"psycogreen", "psycogreen",
"wubloader-common", "wubloader-common",

@ -0,0 +1,2 @@
{{ (row.start_time - bustime_start - duration_extend)|convert_vtt_timedelta }} --> {{ (row.end_time - bustime_start + duration_extend)|convert_vtt_timedelta }}
- {{ row.transcription_line }}

@ -0,0 +1,5 @@
WEBVTT
{% for row in results %}
{% include "busub.jinja" %}
{% endfor %}

@ -12,3 +12,7 @@ CREATE TABLE buscribe_transcriptions
CREATE INDEX buscribe_transcriptions_idx ON buscribe_transcriptions USING CREATE INDEX buscribe_transcriptions_idx ON buscribe_transcriptions USING
GIN (to_tsvector('english', transcription_line)); GIN (to_tsvector('english', transcription_line));
-- This might not actually be needed. Check once there is more data.
CREATE INDEX buscribe_start_time_idx ON buscribe_transcriptions (start_time);
CREATE INDEX buscribe_end_time_idx ON buscribe_transcriptions (end_time);
Loading…
Cancel
Save