From fdb3f034ccdc73ac3f0266b92de4c1d940d405e9 Mon Sep 17 00:00:00 2001 From: HeNine <> Date: Mon, 20 Sep 2021 10:37:16 +0200 Subject: [PATCH] Graceful stop for transcription --- buscribe/buscribe/buscribe.py | 6 +++++- buscribe/buscribe/main.py | 16 ++++++++++++++-- buscribe/setup.py | 1 + 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/buscribe/buscribe/buscribe.py b/buscribe/buscribe/buscribe.py index a4ed41c..7c2a764 100644 --- a/buscribe/buscribe/buscribe.py +++ b/buscribe/buscribe/buscribe.py @@ -3,6 +3,7 @@ import logging import subprocess from datetime import timedelta, datetime +from gevent.event import Event from psycopg2._psycopg import cursor from buscribe.recognizer import BuscribeRecognizer @@ -13,7 +14,7 @@ class HitMissingSegment(Exception): def transcribe_segments(segments: list, sample_rate: int, recognizer: BuscribeRecognizer, start_of_transcript: datetime, - db_cursor: cursor): + db_cursor: cursor, stopping: Event): """Starts transcribing from a list of segments. Only starts committing new lines to the database after reaching start_of_transcript. @@ -55,6 +56,9 @@ def transcribe_segments(segments: list, sample_rate: int, recognizer: BuscribeRe if line_start_time > start_of_transcript: write_line(result_json, line_start_time, line_end_time, db_cursor) + if stopping.is_set(): + return segments_end_time + return segments_end_time diff --git a/buscribe/buscribe/main.py b/buscribe/buscribe/main.py index fd935a2..281f07c 100644 --- a/buscribe/buscribe/main.py +++ b/buscribe/buscribe/main.py @@ -5,8 +5,10 @@ from time import sleep import argh import common +import gevent from common import dateutil from common.database import DBManager +from gevent import signal from buscribe.buscribe import get_end_of_transcript, transcribe_segments, finish_off_recognizer from buscribe.recognizer import BuscribeRecognizer @@ -63,6 +65,15 @@ def main(database="", base_dir=".", # Start priming the recognizer if possible start_time -= timedelta(minutes=2) + stopping = gevent.event.Event() + + def stop(): + logging.info("Shutting down") + + stopping.set() + + gevent.signal_handler(signal.SIGTERM, stop) + while True: # If end time isn't given, use current time (plus fudge) to get a "live" segment list segments = common.get_best_segments(segments_dir, @@ -75,9 +86,10 @@ def main(database="", base_dir=".", if recognizer.segments_start_time is None: recognizer.segments_start_time = segments[0].start - segments_end_time = transcribe_segments(segments, SAMPLE_RATE, recognizer, start_time, db_cursor) + segments_end_time = transcribe_segments(segments, SAMPLE_RATE, recognizer, start_time, db_cursor, stopping) - if end_time is not None and segments_end_time >= end_time: + if end_time is not None and segments_end_time >= end_time \ + or stopping.is_set(): # Work's done! finish_off_recognizer(recognizer, db_cursor) db_conn.close() diff --git a/buscribe/setup.py b/buscribe/setup.py index d0de350..9b427b1 100644 --- a/buscribe/setup.py +++ b/buscribe/setup.py @@ -7,6 +7,7 @@ setup( install_requires = [ "argh", "psycopg2", + "gevent==1.5a2", "greenlet==0.4.16", "psycogreen", "wubloader-common",