current stat dump

trunk
Matija Rezar 3 months ago
parent 0355c59ee8
commit fbb6c4dca0

@ -0,0 +1 @@
models/

@ -2,7 +2,7 @@
VERSION=0.0.0 VERSION=0.0.0
bash fetch_models.sh #bash fetch_models.sh
docker build -f buscribe/Dockerfile -t buscribe:$VERSION . docker build -f buscribe/Dockerfile -t buscribe:$VERSION .
docker build -f buscribe-api/Dockerfile -t buscribe-api:$VERSION . docker build -f buscribe-api/Dockerfile -t buscribe-api:$VERSION .

@ -1,9 +1,9 @@
FROM debian:latest FROM debian:11
RUN apt update &&\ RUN apt update &&\
apt install -y python3 libpq-dev python3-pip curl unzip ffmpeg apt install -y python3 libpq-dev python3-pip curl unzip ffmpeg
COPY ../common /tmp/common COPY common /tmp/common
RUN pip install /tmp/common && rm -r /tmp/common RUN pip install /tmp/common && rm -r /tmp/common
COPY buscribe /tmp/buscribe COPY buscribe /tmp/buscribe

@ -27,13 +27,15 @@ from buscribe.recognizer import BuscribeRecognizer
help='Start time of the transcript. Buscript will try to start reading 2 min before this time, if available, ' help='Start time of the transcript. Buscript will try to start reading 2 min before this time, if available, '
'to prime the model. The transcripts for that time will not be written to the database. If not given ' 'to prime the model. The transcripts for that time will not be written to the database. If not given '
'transcription will start after last already transcribed line.') 'transcription will start after last already transcribed line.')
@argh.arg('--start-time-override',
help='Ignore database and force override the start time.')
@argh.arg('--end-time', @argh.arg('--end-time',
help='End of transcript. If not given continues to transcribe live.') help='End of transcript. If not given continues to transcribe live.')
@argh.arg('--base-dir', @argh.arg('--base-dir',
help='Directory from which segments will be grabbed. Default is current working directory.') help='Directory from which segments will be grabbed. Default is current working directory.')
def main(channel, database="", base_dir=".", def main(channel, database="", base_dir=".",
model="/usr/share/buscribe/vosk-model-en-us-0.21/", spk_model="/usr/share/buscribe/vosk-model-spk-0.4/", model="/usr/share/buscribe/vosk-model-en-us-0.21/", spk_model="/usr/share/buscribe/vosk-model-spk-0.4/",
start_time=None, end_time=None): start_time=None, end_time=None, start_time_override=None):
SAMPLE_RATE = 48000 SAMPLE_RATE = 48000
segments_dir = os.path.join(base_dir, channel, "source") segments_dir = os.path.join(base_dir, channel, "source")
@ -46,8 +48,11 @@ def main(channel, database="", base_dir=".",
logging.info("Figuring out starting time...") logging.info("Figuring out starting time...")
db_start_time = get_end_of_transcript(db_cursor) db_start_time = get_end_of_transcript(db_cursor)
# Database start time takes priority # ~~Database start time takes priority~~
if db_start_time is not None: # Overrride takes priority
if start_time_override is not None:
start_time = dateutil.parse(start_time_override)
elif db_start_time is not None:
start_time = db_start_time start_time = db_start_time
elif start_time is not None: elif start_time is not None:
start_time = dateutil.parse(start_time) start_time = dateutil.parse(start_time)
@ -56,10 +61,13 @@ def main(channel, database="", base_dir=".",
logging.error("Couldn't figure out start time!") logging.error("Couldn't figure out start time!")
db_conn.close() db_conn.close()
exit(1) exit(1)
logging.info("Start time: {}".format(start_time))
if end_time is not None: if end_time is not None:
end_time = dateutil.parse(end_time) end_time = dateutil.parse(end_time)
logging.info("End time: {}".format(end_time))
logging.info("Loading models...") logging.info("Loading models...")
recognizer = BuscribeRecognizer(SAMPLE_RATE, model, spk_model) recognizer = BuscribeRecognizer(SAMPLE_RATE, model, spk_model)
logging.info("Models loaded.") logging.info("Models loaded.")

@ -7,8 +7,10 @@ setup(
install_requires = [ install_requires = [
"argh", "argh",
"psycopg2", "psycopg2",
"gevent==1.5a2", #"gevent==1.5a2",
"greenlet==0.4.16", "gevent",
#"greenlet==0.4.16",
"greenlet",
"psycogreen", "psycogreen",
"wubloader-common", "wubloader-common",
"python-dateutil", "python-dateutil",

@ -9,24 +9,122 @@ services:
networks: networks:
- default - default
- wubloader_default - wubloader_default
- traefik_network
labels:
- "traefik.docker.network=traefik_network"
- "traefik.http.routers.buscribe-router.rule=Host(`wubloader.raptorpond.com`)"
- "traefik.http.routers.buscribe-redirect.rule=Host(`wubloader.raptorpond.com`)"
- "traefik.http.routers.buscribe-redirect.entrypoints=web"
- "traefik.http.routers.buscribe-router.tls=true"
- "traefik.http.routers.buscribe-router.tls.certresolver=leresolver"
- "traefik.http.middlewares.buscribe-redirectscheme.redirectscheme.scheme=https"
- "traefik.http.middlewares.buscribe-redirectscheme.redirectscheme.permanent=true"
- "traefik.http.routers.buscribe-redirect.middlewares=buscribe-redirectscheme@docker"
restart: "on-failure" restart: "on-failure"
buscribelrr: # buscribelrr:
# image: buscribe:0.0.0
# command: [ "loadingreadyrun",
# "--start-time=2022-11-11T12:00:00Z",
# "--end-time=2022-11-20T22:00:00Z",
# "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_lrr",
# "--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
# volumes:
# - /srv/wubloader/segments:/mnt
# buscribedb:
# image: buscribe:0.0.0
# command: [ "desertbus",
# "--start-time=2023-11-10T12:00:00Z",
# "--end-time=2023-11-15T00:00:00Z",
# "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
# "--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
# volumes:
# - /srv/wubloader/segments:/mnt
buscribedb0:
image: buscribe:0.0.0
command: [ "desertbus",
"--start-time-override=2023-11-19T00:00:00Z",
"--end-time=2023-11-19T06:00:00Z",
"--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
volumes:
- /srv/wubloader/segments:/mnt
buscribedb1:
image: buscribe:0.0.0
command: [ "desertbus",
"--start-time-override=2023-11-18T06:00:00Z",
"--end-time=2023-11-18T12:00:00Z",
"--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
volumes:
- /srv/wubloader/segments:/mnt
buscribedb2:
image: buscribe:0.0.0
command: [ "desertbus",
"--start-time-override=2023-11-18T12:00:00Z",
"--end-time=2023-11-18T18:00:00Z",
"--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
volumes:
- /srv/wubloader/segments:/mnt
buscribedb3:
image: buscribe:0.0.0 image: buscribe:0.0.0
command: [ "loadingreadyrun", command: [ "desertbus",
"--start-time='2021-11-05T00:00", "--start-time-override=2023-11-18T18:00:00Z",
"--end-time='2021-11-30T00:00", "--end-time=2023-11-19T00:00:00Z",
"--database=postgresql://vst:flnMSYPRf@mula.lan:6543/buscribe_lrr", "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ] "--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ]
volumes: volumes:
- /srv/wubloader/segments:/mnt - /srv/wubloader/segments:/mnt
buscribeapilrr: # buscribeapilrr:
# image: buscribe-api:0.0.0
# command: [
# "loadingreadyrun",
# "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_lrr",
# "--bustime-start=2023-11-11T22:00:00Z" ]
buscribeapidb:
image: buscribe-api:0.0.0 image: buscribe-api:0.0.0
command: [ command: [
"--database=postgresql://vst:flnMSYPRf@mula.lan:6543/buscribe_lrr", "desertbus",
"--bustime-start=2021-11-13T02:00:00" ] "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--bustime-start=2023-11-11T22:00:00Z" ]
volumes:
- /srv/wubloader/segments:/mnt
professorapidb:
image: professor-api:0.0.0
command: [
"--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db",
"--bustime-start=2023-11-11T22:00:00Z" ]
postgres:
image: postgres:13
ports:
- "7654:5432"
environment:
- POSTGRES_USER=vst
- POSTGRES_DB=postgres
- POSTGRES_PASSWORD=flnMSYPRf
volumes:
- /srv/buscribe/postgres:/var/lib/postgresql/data
restart: "unless-stopped"
postgres-prometheus:
image: quay.io/prometheuscommunity/postgres-exporter
ports:
- "9187:9187"
environment:
- DATA_SOURCE_NAME=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_lrr?sslmode=disable
networks: networks:
wubloader_default: wubloader_default:
external: true external: true
traefik_network:
external: true

@ -49,8 +49,8 @@ http {
alias /usr/share/nginx/html/professor; alias /usr/share/nginx/html/professor;
} }
location /buscribe/loadingreadyrun/json { proxy_pass http://buscribeapilrr:8010/buscribe/json; } #location /buscribe/loadingreadyrun/json { proxy_pass http://buscribeapilrr:8010/buscribe/json; }
location /buscribe/desertbus { proxy_pass http://buscribeapidb:8010/buscribe; } location /buscribe/desertbus/json { proxy_pass http://buscribeapidb:8010/buscribe/json; }
location /professor/desertbus { proxy_pass http://professorapidb:8011/professor; } location /professor/desertbus { proxy_pass http://professorapidb:8011/professor; }
} }

@ -97,7 +97,8 @@ function fillLineInfo(line_json) {
function initializePlayer() { function initializePlayer() {
videojs.getPlayer("player").src([ videojs.getPlayer("player").src([
{src: `/professor/desertbus/line/${line_id}/playlist.m3u8`} //{src: `/professor/desertbus/line/${line_id}/playlist.m3u8`}
{src: `/playlist/desertbus/source.m3u8?start=${line.start_time}&end=${line.end_time}`}
]); ]);
videojs.getPlayer("player").addRemoteTextTrack({ videojs.getPlayer("player").addRemoteTextTrack({
kind: "captions", kind: "captions",

@ -0,0 +1,12 @@
#!/bin/bash
docker run \
--rm \
-v /srv/wubloader/segments/:/mnt/ \
buscribe:0.0.0 \
loadingreadyrun \
--start-time='2021-11-05T00:00' \
--end-time='2021-11-07T00:00' \
--database=postgresql://vst:flnMSYPRf@mula.lan:6543/buscribe_lrr \
--model=/usr/share/buscribe/vosk-model-en-us-0.22/
# --model=/usr/share/buscribe/vosk-model-small-en-us-0.15/
Loading…
Cancel
Save