diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..2bcdfd9 --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +models/ diff --git a/build.sh b/build.sh index d1dd20b..fd01d6c 100644 --- a/build.sh +++ b/build.sh @@ -2,7 +2,7 @@ VERSION=0.0.0 -bash fetch_models.sh +#bash fetch_models.sh docker build -f buscribe/Dockerfile -t buscribe:$VERSION . docker build -f buscribe-api/Dockerfile -t buscribe-api:$VERSION . diff --git a/buscribe/Dockerfile b/buscribe/Dockerfile index a6ed372..5469d36 100644 --- a/buscribe/Dockerfile +++ b/buscribe/Dockerfile @@ -1,9 +1,9 @@ -FROM debian:latest +FROM debian:11 RUN apt update &&\ apt install -y python3 libpq-dev python3-pip curl unzip ffmpeg -COPY ../common /tmp/common +COPY common /tmp/common RUN pip install /tmp/common && rm -r /tmp/common COPY buscribe /tmp/buscribe diff --git a/buscribe/Dockerfile.dockerignore b/buscribe/Dockerfile.dockerignore new file mode 100644 index 0000000..8cef4e6 --- /dev/null +++ b/buscribe/Dockerfile.dockerignore @@ -0,0 +1 @@ +!models/ diff --git a/buscribe/buscribe/main.py b/buscribe/buscribe/main.py index 4696f54..6b7b68a 100644 --- a/buscribe/buscribe/main.py +++ b/buscribe/buscribe/main.py @@ -27,13 +27,15 @@ from buscribe.recognizer import BuscribeRecognizer help='Start time of the transcript. Buscript will try to start reading 2 min before this time, if available, ' 'to prime the model. The transcripts for that time will not be written to the database. If not given ' 'transcription will start after last already transcribed line.') +@argh.arg('--start-time-override', + help='Ignore database and force override the start time.') @argh.arg('--end-time', help='End of transcript. If not given continues to transcribe live.') @argh.arg('--base-dir', help='Directory from which segments will be grabbed. Default is current working directory.') def main(channel, database="", base_dir=".", model="/usr/share/buscribe/vosk-model-en-us-0.21/", spk_model="/usr/share/buscribe/vosk-model-spk-0.4/", - start_time=None, end_time=None): + start_time=None, end_time=None, start_time_override=None): SAMPLE_RATE = 48000 segments_dir = os.path.join(base_dir, channel, "source") @@ -46,8 +48,11 @@ def main(channel, database="", base_dir=".", logging.info("Figuring out starting time...") db_start_time = get_end_of_transcript(db_cursor) - # Database start time takes priority - if db_start_time is not None: + # ~~Database start time takes priority~~ + # Overrride takes priority + if start_time_override is not None: + start_time = dateutil.parse(start_time_override) + elif db_start_time is not None: start_time = db_start_time elif start_time is not None: start_time = dateutil.parse(start_time) @@ -56,10 +61,13 @@ def main(channel, database="", base_dir=".", logging.error("Couldn't figure out start time!") db_conn.close() exit(1) + logging.info("Start time: {}".format(start_time)) if end_time is not None: end_time = dateutil.parse(end_time) + logging.info("End time: {}".format(end_time)) + logging.info("Loading models...") recognizer = BuscribeRecognizer(SAMPLE_RATE, model, spk_model) logging.info("Models loaded.") diff --git a/buscribe/setup.py b/buscribe/setup.py index 9b427b1..877a98a 100644 --- a/buscribe/setup.py +++ b/buscribe/setup.py @@ -7,8 +7,10 @@ setup( install_requires = [ "argh", "psycopg2", - "gevent==1.5a2", - "greenlet==0.4.16", + #"gevent==1.5a2", + "gevent", + #"greenlet==0.4.16", + "greenlet", "psycogreen", "wubloader-common", "python-dateutil", diff --git a/docker-compose.yml b/docker-compose.yml index b80a0b5..d76cb35 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,24 +9,122 @@ services: networks: - default - wubloader_default + - traefik_network + labels: + - "traefik.docker.network=traefik_network" + - "traefik.http.routers.buscribe-router.rule=Host(`wubloader.raptorpond.com`)" + - "traefik.http.routers.buscribe-redirect.rule=Host(`wubloader.raptorpond.com`)" + - "traefik.http.routers.buscribe-redirect.entrypoints=web" + - "traefik.http.routers.buscribe-router.tls=true" + - "traefik.http.routers.buscribe-router.tls.certresolver=leresolver" + - "traefik.http.middlewares.buscribe-redirectscheme.redirectscheme.scheme=https" + - "traefik.http.middlewares.buscribe-redirectscheme.redirectscheme.permanent=true" + - "traefik.http.routers.buscribe-redirect.middlewares=buscribe-redirectscheme@docker" restart: "on-failure" - buscribelrr: + # buscribelrr: + # image: buscribe:0.0.0 + # command: [ "loadingreadyrun", + # "--start-time=2022-11-11T12:00:00Z", + # "--end-time=2022-11-20T22:00:00Z", + # "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_lrr", + # "--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ] + # volumes: + # - /srv/wubloader/segments:/mnt + + # buscribedb: + # image: buscribe:0.0.0 + # command: [ "desertbus", + # "--start-time=2023-11-10T12:00:00Z", + # "--end-time=2023-11-15T00:00:00Z", + # "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db", + # "--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ] + # volumes: + # - /srv/wubloader/segments:/mnt + + buscribedb0: + image: buscribe:0.0.0 + command: [ "desertbus", + "--start-time-override=2023-11-19T00:00:00Z", + "--end-time=2023-11-19T06:00:00Z", + "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db", + "--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ] + volumes: + - /srv/wubloader/segments:/mnt + + buscribedb1: + image: buscribe:0.0.0 + command: [ "desertbus", + "--start-time-override=2023-11-18T06:00:00Z", + "--end-time=2023-11-18T12:00:00Z", + "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db", + "--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ] + volumes: + - /srv/wubloader/segments:/mnt + + buscribedb2: + image: buscribe:0.0.0 + command: [ "desertbus", + "--start-time-override=2023-11-18T12:00:00Z", + "--end-time=2023-11-18T18:00:00Z", + "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db", + "--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ] + volumes: + - /srv/wubloader/segments:/mnt + + buscribedb3: image: buscribe:0.0.0 - command: [ "loadingreadyrun", - "--start-time='2021-11-05T00:00", - "--end-time='2021-11-30T00:00", - "--database=postgresql://vst:flnMSYPRf@mula.lan:6543/buscribe_lrr", + command: [ "desertbus", + "--start-time-override=2023-11-18T18:00:00Z", + "--end-time=2023-11-19T00:00:00Z", + "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db", "--model=/usr/share/buscribe/vosk-model-en-us-0.22/" ] volumes: - /srv/wubloader/segments:/mnt - buscribeapilrr: + # buscribeapilrr: + # image: buscribe-api:0.0.0 + # command: [ + # "loadingreadyrun", + # "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_lrr", + # "--bustime-start=2023-11-11T22:00:00Z" ] + + buscribeapidb: image: buscribe-api:0.0.0 command: [ - "--database=postgresql://vst:flnMSYPRf@mula.lan:6543/buscribe_lrr", - "--bustime-start=2021-11-13T02:00:00" ] + "desertbus", + "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db", + "--bustime-start=2023-11-11T22:00:00Z" ] + volumes: + - /srv/wubloader/segments:/mnt + + professorapidb: + image: professor-api:0.0.0 + command: [ + "--database=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_db", + "--bustime-start=2023-11-11T22:00:00Z" ] + + postgres: + image: postgres:13 + ports: + - "7654:5432" + environment: + - POSTGRES_USER=vst + - POSTGRES_DB=postgres + - POSTGRES_PASSWORD=flnMSYPRf + volumes: + - /srv/buscribe/postgres:/var/lib/postgresql/data + restart: "unless-stopped" + + postgres-prometheus: + image: quay.io/prometheuscommunity/postgres-exporter + ports: + - "9187:9187" + environment: + - DATA_SOURCE_NAME=postgresql://vst:flnMSYPRf@postgres:5432/buscribe_lrr?sslmode=disable networks: wubloader_default: external: true + traefik_network: + external: true diff --git a/nginx/nginx.conf b/nginx/nginx.conf index 610cb18..178b6e3 100644 --- a/nginx/nginx.conf +++ b/nginx/nginx.conf @@ -49,8 +49,8 @@ http { alias /usr/share/nginx/html/professor; } - location /buscribe/loadingreadyrun/json { proxy_pass http://buscribeapilrr:8010/buscribe/json; } - location /buscribe/desertbus { proxy_pass http://buscribeapidb:8010/buscribe; } + #location /buscribe/loadingreadyrun/json { proxy_pass http://buscribeapilrr:8010/buscribe/json; } + location /buscribe/desertbus/json { proxy_pass http://buscribeapidb:8010/buscribe/json; } location /professor/desertbus { proxy_pass http://professorapidb:8011/professor; } } diff --git a/professor/script.js b/professor/script.js index 8a50619..af1d561 100644 --- a/professor/script.js +++ b/professor/script.js @@ -97,7 +97,8 @@ function fillLineInfo(line_json) { function initializePlayer() { videojs.getPlayer("player").src([ - {src: `/professor/desertbus/line/${line_id}/playlist.m3u8`} + //{src: `/professor/desertbus/line/${line_id}/playlist.m3u8`} + {src: `/playlist/desertbus/source.m3u8?start=${line.start_time}&end=${line.end_time}`} ]); videojs.getPlayer("player").addRemoteTextTrack({ kind: "captions", @@ -219,4 +220,4 @@ function parseJwt(token) { }).join('')); return JSON.parse(jsonPayload); -} \ No newline at end of file +} diff --git a/test_buscribe_container.sh b/test_buscribe_container.sh new file mode 100755 index 0000000..6702a97 --- /dev/null +++ b/test_buscribe_container.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +docker run \ + --rm \ + -v /srv/wubloader/segments/:/mnt/ \ + buscribe:0.0.0 \ + loadingreadyrun \ + --start-time='2021-11-05T00:00' \ + --end-time='2021-11-07T00:00' \ + --database=postgresql://vst:flnMSYPRf@mula.lan:6543/buscribe_lrr \ + --model=/usr/share/buscribe/vosk-model-en-us-0.22/ +# --model=/usr/share/buscribe/vosk-model-small-en-us-0.15/