mirror of https://github.com/ekimekim/wubloader
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
141 lines
5.6 KiB
Python
141 lines
5.6 KiB
Python
import logging
|
|
import os
|
|
from datetime import timedelta, datetime, timezone
|
|
from time import sleep
|
|
|
|
import argh
|
|
import common
|
|
import gevent
|
|
from common import dateutil
|
|
from common.database import DBManager
|
|
from gevent import signal
|
|
|
|
from buscribe.buscribe import get_end_of_transcript, transcribe_segments, finish_off_recognizer
|
|
from buscribe.recognizer import BuscribeRecognizer
|
|
|
|
|
|
@argh.arg('channel',
|
|
help="Twitch channel to transcribe.")
|
|
@argh.arg('--database',
|
|
help='Postgres conection string for database to write transcribed lines to. Either a space-separated list of '
|
|
'key=value pairs, or a URI like: postgresql://USER:PASSWORD@HOST/DBNAME?KEY=VALUE .')
|
|
@argh.arg('--model',
|
|
help='Path to STT model files. Defaults to /usr/share/buscribe/vosk-model-en-us-0.21/')
|
|
@argh.arg('--spk-model',
|
|
help='Path to speaker recognition model files. Defaults to /usr/share/buscribe/vosk-model-spk-0.4/')
|
|
@argh.arg('--start-time',
|
|
help='Start time of the transcript. Buscript will try to start reading 2 min before this time, if available, '
|
|
'to prime the model. The transcripts for that time will not be written to the database. If not given '
|
|
'transcription will start after last already transcribed line.')
|
|
@argh.arg('--end-time',
|
|
help='End of transcript. If not given continues to transcribe live.')
|
|
@argh.arg('--base-dir',
|
|
help='Directory from which segments will be grabbed. Default is current working directory.')
|
|
def main(channel, database="", base_dir=".",
|
|
model="/usr/share/buscribe/vosk-model-en-us-0.21/", spk_model="/usr/share/buscribe/vosk-model-spk-0.4/",
|
|
start_time=None, end_time=None):
|
|
SAMPLE_RATE = 48000
|
|
segments_dir = os.path.join(base_dir, channel, "source")
|
|
|
|
logging.debug("Grabbing database...")
|
|
db_manager = DBManager(dsn=database)
|
|
db_conn = db_manager.get_conn()
|
|
db_cursor = db_conn.cursor()
|
|
logging.debug("Got database cursor.")
|
|
|
|
logging.info("Figuring out starting time...")
|
|
db_start_time = get_end_of_transcript(db_cursor)
|
|
|
|
# Database start time takes priority
|
|
if db_start_time is not None:
|
|
start_time = db_start_time
|
|
elif start_time is not None:
|
|
start_time = dateutil.parse(start_time)
|
|
else:
|
|
# No start time argument AND no end of transcript (empty database)
|
|
logging.error("Couldn't figure out start time!")
|
|
db_conn.close()
|
|
exit(1)
|
|
|
|
if end_time is not None:
|
|
end_time = dateutil.parse(end_time)
|
|
|
|
logging.info("Loading models...")
|
|
recognizer = BuscribeRecognizer(SAMPLE_RATE, model, spk_model)
|
|
logging.info("Models loaded.")
|
|
|
|
logging.info('Transcribing from {}'.format(start_time))
|
|
|
|
# Start priming the recognizer if possible
|
|
start_of_transcription = start_time
|
|
start_time -= timedelta(minutes=2)
|
|
|
|
stopping = gevent.event.Event()
|
|
|
|
def stop():
|
|
logging.info("Shutting down")
|
|
|
|
stopping.set()
|
|
|
|
gevent.signal_handler(signal.SIGTERM, stop)
|
|
|
|
segments_end_time = None
|
|
|
|
while start_time < end_time:
|
|
# If end time isn't given, use current time (plus fudge) to get a "live" segment list
|
|
segments = common.get_best_segments(segments_dir,
|
|
start_time,
|
|
end_time if end_time is not None else
|
|
datetime.utcnow() + timedelta(minutes=2))
|
|
|
|
# Remove initial None segment (indicating segments start time is after desired start time) if it exists
|
|
if segments[0] is None:
|
|
segments = segments[1:]
|
|
|
|
# If there are no segments, we:
|
|
# - reached the live edge, or
|
|
# - reached a still-opening hole
|
|
# In both cases we can wait
|
|
if not segments:
|
|
# If we have waited for more than 1min we flush the pipes to commit the last line of the stream,
|
|
# or commit the last line before a 1min hole.
|
|
if datetime.utcnow() - segments_end_time > timedelta(minutes=1):
|
|
finish_off_recognizer(recognizer, db_cursor)
|
|
|
|
logging.info("Waiting for new segments.")
|
|
sleep(30)
|
|
|
|
continue # Retry
|
|
# If there are new segments, but they form a hole (judged somewhat arbitrarily as 2s) relative to
|
|
# segments_end_time, but the hole started less than 1min ago
|
|
elif segments_end_time is not None and \
|
|
segments[0].start - segments_end_time > timedelta(seconds=2) and \
|
|
datetime.utcnow() - segments_end_time < timedelta(minutes=1):
|
|
|
|
logging.info("Waiting for segments to be backfilled.")
|
|
sleep(30)
|
|
|
|
continue # Retry
|
|
|
|
# If we got new segments, but there was a hole that could not be backfilled
|
|
if segments_end_time is not None and \
|
|
segments[0].start - segments_end_time > timedelta(seconds=2):
|
|
finish_off_recognizer(recognizer, db_cursor)
|
|
|
|
# Recognizer is fresh or was reset
|
|
if recognizer.segments_start_time is None:
|
|
recognizer.segments_start_time = segments[0].start
|
|
logging.info(f"Starting from: {segments[0].start}")
|
|
|
|
segments_end_time = transcribe_segments(segments, SAMPLE_RATE, recognizer, start_of_transcription, db_cursor,
|
|
stopping)
|
|
|
|
if end_time is not None and segments_end_time >= end_time or \
|
|
stopping.is_set():
|
|
# Work's done!
|
|
finish_off_recognizer(recognizer, db_cursor)
|
|
db_conn.close()
|
|
exit(0)
|
|
|
|
start_time = segments_end_time
|