You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
wubloader/buscribe/buscribe/main.py

133 lines
5.3 KiB
Python

import logging
import os
from datetime import timedelta, datetime, timezone
from time import sleep
import argh
import common
import gevent
from common import dateutil
from common.database import DBManager
from gevent import signal
from buscribe.buscribe import get_end_of_transcript, transcribe_segments, finish_off_recognizer
from buscribe.recognizer import BuscribeRecognizer
@argh.arg('channel',
help="Twitch channel to transcribe.")
@argh.arg('--database',
help='Postgres conection string for database to write transcribed lines to. Either a space-separated list of '
'key=value pairs, or a URI like: postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE .')
@argh.arg('--model',
help='Path to STT model files. Defaults to /usr/share/buscribe/vosk-model-en-us-0.22/')
@argh.arg('--spk-model',
help='Path to speaker recognition model files. Defaults to /usr/share/buscribe/vosk-model-spk-0.4/')
@argh.arg('--start-time',
help='Start time of the transcript. Buscript will try to start reading 2 min before this time, if available, '
'to prime the model. The transcripts for that time will not be written to the database. If not given '
'transcription will start after last already transcribed line.')
@argh.arg('--start-time-override',
help='Ignore database and force override the start time.')
@argh.arg('--end-time',
help='End of transcript. If not given continues to transcribe live.')
@argh.arg('--base-dir',
help='Directory from which segments will be grabbed. Default is current working directory.')
def main(channel, database="", base_dir=".",
model="/usr/share/buscribe/vosk-model-en-us-0.22/", spk_model="/usr/share/buscribe/vosk-model-spk-0.4/",
start_time=None, end_time=None, start_time_override=None):
SAMPLE_RATE = 48000
segments_dir = os.path.join(base_dir, channel, "source")
logging.debug("Grabbing database...")
db_manager = DBManager(dsn=database, register_types=False)
db_conn = db_manager.get_conn()
db_cursor = db_conn.cursor()
logging.debug("Got database cursor.")
logging.info("Figuring out starting time...")
db_start_time = get_end_of_transcript(db_cursor)
# ~~Database start time takes priority~~
# Overrride takes priority
if start_time_override is not None:
start_time = dateutil.parse(start_time_override)
elif db_start_time is not None:
start_time = db_start_time
elif start_time is not None:
start_time = dateutil.parse(start_time)
else:
# No start time argument AND no end of transcript (empty database)
logging.error("Couldn't figure out start time!")
db_conn.close()
exit(1)
logging.info(f"Start time: {start_time}")
if end_time is not None:
end_time = dateutil.parse(end_time)
logging.info(f"End time: {end_time}")
logging.info("Loading models...")
recognizer = BuscribeRecognizer(SAMPLE_RATE, model, spk_model)
logging.info("Models loaded.")
logging.info(f'Transcribing from {start_time}')
# Start priming the recognizer if possible
start_of_transcription = start_time
start_time -= timedelta(minutes=2)
stopping = gevent.event.Event()
def stop():
logging.info("Shutting down")
stopping.set()
gevent.signal_handler(signal.SIGTERM, stop)
while end_time is None or start_time < end_time:
# If end time isn't given, use current time (plus fudge) to get a "live" segment list
segments = common.get_best_segments(segments_dir,
start_time,
end_time if end_time is not None else
datetime.utcnow() + timedelta(minutes=2))
# If there is a hole at the start of the requested range because
if segments[0] is None:
# The hole is older than a minute, therefore
# - reset recognizer
# - continue from existing segments
if datetime.utcnow() - start_time > timedelta(minutes=1):
finish_off_recognizer(recognizer, db_cursor)
# If the hole is less than a minute old, or if we don't have new segments: wait for segments
if datetime.utcnow() - start_time <= timedelta(minutes=1) or \
segments == [None]:
logging.info("Waiting for new or backfilled segments.")
sleep(30)
continue # Retry
# Remove initial None segment (indicating segments start time is after desired start time) if it exists
if segments[0] is None:
segments = segments[1:]
# Recognizer is fresh or was reset
if recognizer.segments_start_time is None:
recognizer.segments_start_time = segments[0].start
logging.info(f"Starting from: {segments[0].start}")
segments_end_time = transcribe_segments(segments, SAMPLE_RATE, recognizer, start_of_transcription, db_cursor,
stopping)
if end_time is not None and segments_end_time >= end_time or \
stopping.is_set():
# Work's done!
finish_off_recognizer(recognizer, db_cursor)
db_conn.close()
exit(0)
start_time = segments_end_time