You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
wubloader/buscribe/buscribe/main.py

141 lines
5.6 KiB
Python

import logging
import os
from datetime import timedelta, datetime, timezone
from time import sleep
import argh
import common
import gevent
from common import dateutil
from common.database import DBManager
from gevent import signal
from buscribe.buscribe import get_end_of_transcript, transcribe_segments, finish_off_recognizer
from buscribe.recognizer import BuscribeRecognizer
@argh.arg('channel',
help="Twitch channel to transcribe.")
@argh.arg('--database',
help='Postgres conection string for database to write transcribed lines to. Either a space-separated list of '
'key=value pairs, or a URI like: postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE .')
@argh.arg('--model',
help='Path to STT model files. Defaults to /usr/share/buscribe/vosk-model-en-us-0.21/')
@argh.arg('--spk-model',
help='Path to speaker recognition model files. Defaults to /usr/share/buscribe/vosk-model-spk-0.4/')
@argh.arg('--start-time',
help='Start time of the transcript. Buscript will try to start reading 2 min before this time, if available, '
'to prime the model. The transcripts for that time will not be written to the database. If not given '
'transcription will start after last already transcribed line.')
@argh.arg('--end-time',
help='End of transcript. If not given continues to transcribe live.')
@argh.arg('--base-dir',
help='Directory from which segments will be grabbed. Default is current working directory.')
def main(channel, database="", base_dir=".",
model="/usr/share/buscribe/vosk-model-en-us-0.21/", spk_model="/usr/share/buscribe/vosk-model-spk-0.4/",
start_time=None, end_time=None):
SAMPLE_RATE = 48000
segments_dir = os.path.join(base_dir, channel, "source")
logging.debug("Grabbing database...")
db_manager = DBManager(dsn=database)
db_conn = db_manager.get_conn()
db_cursor = db_conn.cursor()
logging.debug("Got database cursor.")
logging.info("Figuring out starting time...")
db_start_time = get_end_of_transcript(db_cursor)
# Database start time takes priority
if db_start_time is not None:
start_time = db_start_time
elif start_time is not None:
start_time = dateutil.parse(start_time)
else:
# No start time argument AND no end of transcript (empty database)
logging.error("Couldn't figure out start time!")
db_conn.close()
exit(1)
if end_time is not None:
end_time = dateutil.parse(end_time)
logging.info("Loading models...")
recognizer = BuscribeRecognizer(SAMPLE_RATE, model, spk_model)
logging.info("Models loaded.")
logging.info('Transcribing from {}'.format(start_time))
# Start priming the recognizer if possible
start_of_transcription = start_time
start_time -= timedelta(minutes=2)
stopping = gevent.event.Event()
def stop():
logging.info("Shutting down")
stopping.set()
gevent.signal_handler(signal.SIGTERM, stop)
segments_end_time = None
while start_time < end_time:
# If end time isn't given, use current time (plus fudge) to get a "live" segment list
segments = common.get_best_segments(segments_dir,
start_time,
end_time if end_time is not None else
datetime.utcnow() + timedelta(minutes=2))
# Remove initial None segment (indicating segments start time is after desired start time) if it exists
if segments[0] is None:
segments = segments[1:]
# If there are no segments, we:
# - reached the live edge, or
# - reached a still-opening hole
# In both cases we can wait
if not segments:
# If we have waited for more than 1min we flush the pipes to commit the last line of the stream,
# or commit the last line before a 1min hole.
if datetime.utcnow() - segments_end_time > timedelta(minutes=1):
finish_off_recognizer(recognizer, db_cursor)
logging.info("Waiting for new segments.")
sleep(30)
continue # Retry
# If there are new segments, but they form a hole (judged somewhat arbitrarily as 2s) relative to
# segments_end_time, but the hole started less than 1min ago
elif segments_end_time is not None and \
segments[0].start - segments_end_time > timedelta(seconds=2) and \
datetime.utcnow() - segments_end_time < timedelta(minutes=1):
logging.info("Waiting for segments to be backfilled.")
sleep(30)
continue # Retry
# If we got new segments, but there was a hole that could not be backfilled
if segments_end_time is not None and \
segments[0].start - segments_end_time > timedelta(seconds=2):
finish_off_recognizer(recognizer, db_cursor)
# Recognizer is fresh or was reset
if recognizer.segments_start_time is None:
recognizer.segments_start_time = segments[0].start
logging.info(f"Starting from: {segments[0].start}")
segments_end_time = transcribe_segments(segments, SAMPLE_RATE, recognizer, start_of_transcription, db_cursor,
stopping)
if end_time is not None and segments_end_time >= end_time or \
stopping.is_set():
# Work's done!
finish_off_recognizer(recognizer, db_cursor)
db_conn.close()
exit(0)
start_time = segments_end_time