You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
wubloader/buscribe-api/buscribeapi/buscribeapi.py

103 lines
3.6 KiB
Python

import json
from datetime import timedelta
import flask as flask
from common import dateutil, database
from dateutil.parser import ParserError
from flask import request, jsonify, Response, render_template
app = flask.Flask('buscribe')
@app.template_filter()
def convert_vtt_timedelta(delta: timedelta):
return f'{delta.days * 24 + delta.seconds // 3600:02}:{(delta.seconds % 3600) // 60:02}:{delta.seconds % 60:02}.{delta.microseconds // 1000:03}'
@app.route('/buscribe/vtt')
def get_vtt():
"""Returns WebVTT subtitle file for the period between start_time and end_time.
Times are relative to --bustime-start.
TODO: Figure out proper offsets."""
try:
start_time_string = request.args.get('start_time')
start_time = dateutil.parse(start_time_string)
except ParserError:
return "Invalid start time!", 400
except ValueError:
return "Missing start time!", 400
try:
end_time_string = request.args.get('end_time')
end_time = dateutil.parse(end_time_string)
except ParserError:
return "Invalid end time!", 400
except ValueError:
return "Missing end time!", 400
db_conn = app.db_manager.get_conn()
results = fetch_lines(db_conn, start_time, end_time)
return Response(
render_template("busubs.jinja", results=results, bustime_start=app.bustime_start,
duration_extend=timedelta(seconds=0.3)),
mimetype="text/vtt"
)
@app.route('/buscribe/json')
def get_json():
"""Searches the line database for *query*, with optional start_time and end_time boundaries.
Search is done using PostgreSQL websearch_to_tsquery()
(https://www.postgresql.org/docs/13/functions-textsearch.html)"""
start_time_string = request.args.get('start_time', default=None)
if start_time_string is not None:
try:
start_time = dateutil.parse(start_time_string)
except ParserError:
return "Invalid start time!", 400
else:
start_time = None
end_time_string = request.args.get('end_time', default=None)
if end_time_string is not None:
try:
end_time = dateutil.parse(end_time_string)
except ParserError:
return "Invalid end time!", 400
else:
end_time = None
# I think websearch_to_tsquery() sanitizes its own input.
query = request.args.get('query', default=None)
db_conn = app.db_manager.get_conn()
results = fetch_lines(db_conn, start_time, end_time, query)
return jsonify([{"start_time": row.start_time.isoformat(),
"end_time": row.end_time.isoformat(),
"text": row.transcription_line} for row in results])
def fetch_lines(db_conn, start_time, end_time, query=None):
if query is None:
return database.query(db_conn, "SELECT * FROM buscribe_transcriptions WHERE "
"start_time > %s AND "
"end_time < %s;",
start_time if start_time is not None else '-infinity',
end_time if end_time is not None else 'infinity')
else:
return database.query(db_conn, "SELECT * FROM buscribe_transcriptions WHERE "
"start_time > %s AND "
"end_time < %s AND "
"to_tsvector(transcription_line) @@ websearch_to_tsquery(%s);",
start_time if start_time is not None else '-infinity',
end_time if end_time is not None else 'infinity',
query)