diff --git a/buscribe-api/buscribeapi/__init.__.py b/buscribe-api/buscribeapi/__init.__.py new file mode 100644 index 0000000..e69de29 diff --git a/buscribe-api/buscribeapi/__main__.py b/buscribe-api/buscribeapi/__main__.py new file mode 100644 index 0000000..e69de29 diff --git a/buscribe-api/buscribeapi/buscribeapi.py b/buscribe-api/buscribeapi/buscribeapi.py new file mode 100644 index 0000000..202285e --- /dev/null +++ b/buscribe-api/buscribeapi/buscribeapi.py @@ -0,0 +1,47 @@ +import flask as flask +from common import dateutil +from dateutil.parser import ParserError +from flask import request + +app = flask.Flask('buscribe') + + +@app.route('/buscribe/vtt') +def get_vtt(): + """Returns WebVTT subtitle file for the period between start_time and end_time.""" + try: + start_time_string = request.args.get('start_time') + start_time = dateutil.parse(start_time_string) + except ParserError: + return "Invalid start time!", 400 + except ValueError: + return "Missing start time!", 400 + + try: + end_time_string = request.args.get('end_time') + end_time = dateutil.parse(end_time_string) + except ParserError: + return "Invalid end time!", 400 + except ValueError: + return "Missing end time!", 400 + + +@app.route('/buscribe/json') +def get_json(): + """Searches the line database for *query*, with optional start_time and end_time boundaries. + + Search is done using PostgreSQL websearch_to_tsquery() (https://www.postgresql.org/docs/13/functions-textsearch.html)""" + start_time_string = request.args.get('start_time') + try: + start_time = dateutil.parse(start_time_string) + except ParserError: + return "Invalid start time!", 400 + + end_time_string = request.args.get('end_time', default=None) + try: + end_time = dateutil.parse(end_time_string) + except ParserError: + return "Invalid end time!", 400 + + # I think websearch_to_tsquery() sanitizes its own input. + query = request.args.get('end_time', default=None) diff --git a/buscribe-api/buscribeapi/main.py b/buscribe-api/buscribeapi/main.py new file mode 100644 index 0000000..e69de29 diff --git a/buscribe-api/setup.py b/buscribe-api/setup.py new file mode 100644 index 0000000..33ab450 --- /dev/null +++ b/buscribe-api/setup.py @@ -0,0 +1,16 @@ +from setuptools import setup, find_packages + +setup( + name = "wubloader-buscribe-api", + version = "0.0.0", + packages = find_packages(), + install_requires = [ + "argh", + "psycopg2", + "greenlet==0.4.16", + "psycogreen", + "wubloader-common", + "python-dateutil", + "flask" + ], +) diff --git a/buscribe_data.sql b/buscribe_data.sql index 8459552..f3266ed 100644 --- a/buscribe_data.sql +++ b/buscribe_data.sql @@ -8,4 +8,7 @@ CREATE TABLE buscribe_transcriptions transcription_line text NOT NULL, line_speaker float[128], transcription_json jsonb NOT NULL -); \ No newline at end of file +); + +CREATE INDEX buscribe_transcriptions_idx ON buscribe_transcriptions USING + GIN (to_tsvector('english', transcription_line)); \ No newline at end of file