From 57962c8104f7376b8b38504e13e423e5b50dfe05 Mon Sep 17 00:00:00 2001 From: Christopher Usher Date: Mon, 23 Sep 2019 01:22:27 +0100 Subject: [PATCH] allow nodes and editors to be read from csv files on start up --- INSTALL.md | 61 +++++++++++++++++++++++++++++++++++---- docker-compose.jsonnet | 4 +-- postgres/setup.sh | 40 +++++++++++++++++++------ postgres/standby_setup.sh | 12 ++++---- 4 files changed, 93 insertions(+), 24 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index b0b1cb9..40b7d08 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -32,9 +32,9 @@ Alternatively if you have `git` installed you can clone the git repository: You can edit the `docker-compose.jsonnet` file to set the configuration options. Important options include: -* `channel`, the Twitch channel to capture from -* `segments_path`, the local path to save segments to -* `db_args`, the arguments for connecting to the wubloader database +* `channel`, the Twitch channel to capture from. +* `segments_path`, the local path to save segments to. +* `db_args`, the arguments for connecting to the wubloader database. You will likely need to update the `user`, `password` and `host` to match the database node that you are connecting to. * `ports`, the ports to expose each service on. Only the `nginx` port (default on port 80) needs to be externally accessible for a non-database node as all the other services are routed through `nginx`. To generate the `docker-compose.yml` file used by `docker-compose`, run `generate-docker-compose` @@ -43,7 +43,20 @@ To generate the `docker-compose.yml` file used by `docker-compose`, run `generat After making any changes to `docker-compose.jsonnet`, you will need to rerun `generate-docker-compose`. -By default the `downloader`, `restreamer`, `backfiller`, `cutter`, `thrimshim` and `nginx` services of the wubloader will be run. To change which services are run edit the `enabled` object in `docker-compose.jsonnet`. A complete wubloader set up also requires one and only one `database` service (though having a backup database is a good idea) and one and only one `sheetsync` service. TODO: explain how to setup database +By default the `downloader`, `restreamer`, `backfiller`, `cutter`, `thrimshim` and `nginx` services of the wubloader will be run. To change which services are run edit the `enabled` object in `docker-compose.jsonnet`. A complete wubloader set up also requires one and only one `database` service (though having a backup database is a good idea) and one and only one `sheetsync` service. + +### Database setup + +When setting up a database node, a number of database specific options can be set. + +* `database_path`, the local path to save the database to. If this directory is empty then the database setups scripts will be run to create a new database. Otherwise, the database container will load the database stored in this folder. +* `db_args.user`, `db_args.password`, the username and password for the database user that the rest of the wubloader will connect to. +* `db_super_user`, `super_password`, the username and password for the database superuser that is only accessible from the local machine. +* `db_replication_user`, `db_replication_password`, the username and password for the database user other nodes can connect as to replicate the database. If `db_replication_user` is an empty string, remote replication will be disabled. +* `db_standby`, If true this database node will replicate the database node given by `db_args.host`. + +It is recommended that the passwords be changed from the defaults in production. +A database node needs to expose its database on a port. By default this is `5432` but the port exposed to the outside can be changed in the `ports` object. ## Running the wubloader @@ -54,5 +67,41 @@ To start the wubloader, simply run To stop the wubloader and clean up, simply run `docker-compose down` - -To backfill from a node, the other nodes need to know about it. The best way to do this is to add the node to the database's nodes table. + +## Database setup + +When setting up a database node, a number of database specific options can be set. + +* `database_path`, the local path to save the database to. If this directory is empty then the database setups scripts will be run to create a new database. Otherwise, the database container will load the database stored in this folder. +* `db_args.user`, `db_args.password`, the username and password for the database user that the rest of the wubloader will connect to. +* `db_super_user`, `super_password`, the username and password for the database superuser that is only accessible from the local machine. +* `db_replication_user`, `db_replication_password`, the username and password for the database user other nodes can connect as to replicate the database. If `db_replication_user` is an empty string, remote replication will be disabled. +* `db_standby`, If true this database node will replicate the database node given by `db_args.host`. + +It is recommended that the passwords be changed from the defaults in production. +A database node needs to expose its database on a port. By default this is `5432` but the port exposed to the outside can be changed in the `ports` object. + +The `events` table will be automatically populated by the `sheetsync`. The startup script will attempt to populated the `nodes` and `editors` tables from the `nodes.csv` and `editors.csv` files in `segments_path` directory. The expected format for these files is: + +``` +nodes.csv + +name,url,backfill_from +example,http://example.com,TRUE +``` + +``` +editors.csv + +name,email +example,example@gmail.com +``` + +Alternatively, nodes can be added manually to the database's `nodes` table: + +`wubloader=> INSERT INTO nodes (name, url) VALUES ('example_name', 'http://example.com');` + +and editors to the database's `editors` table: + +`wubloader=> INSERT INTO editors (name, email) VALUES ('example', 'example@gmail.com');` + diff --git a/docker-compose.jsonnet b/docker-compose.jsonnet index 2a10702..8b903b3 100644 --- a/docker-compose.jsonnet +++ b/docker-compose.jsonnet @@ -247,14 +247,14 @@ POSTGRES_USER: $.db_super_user, POSTGRES_PASSWORD: $.db_super_password, POSTGRES_DB: $.db_args.dbname, - PGDATA: "/mnt", + PGDATA: "/mnt/database", WUBLOADER_USER: $.db_args.user, WUBLOADER_PASSWORD: $.db_args.password, REPLICATION_USER: $.db_replication_user, REPLICATION_PASSWORD: $.db_replication_password, MASTER_NODE: $.db_args.host, }, - volumes: ["%s:/mnt" % $.database_path], + volumes: ["%s:/mnt/database" % $.database_path, "%s:/mnt/wubloader" % $.segments_path], [if $.db_standby then "command"]: ["/standby_setup.sh"], }, diff --git a/postgres/setup.sh b/postgres/setup.sh index 4c0553f..79cd16c 100644 --- a/postgres/setup.sh +++ b/postgres/setup.sh @@ -18,18 +18,40 @@ if [ -n "$REPLICATION_USER" ]; then echo "host replication $REPLICATION_USER all md5" >> "$PGDATA/pg_hba.conf" psql -v ON_ERROR_STOP=1 -U postgres <<-EOSQL -CREATE USER $REPLICATION_USER LOGIN REPLICATION PASSWORD '$REPLICATION_PASSWORD'; + CREATE USER $REPLICATION_USER LOGIN REPLICATION PASSWORD '$REPLICATION_PASSWORD'; -EOSQL + EOSQL - cat >> ${PGDATA}/postgresql.conf <> ${PGDATA}/postgresql.conf <<-EOF + wal_level = replica + archive_mode = on + archive_command = 'cd .' + max_wal_senders = 8 + wal_keep_segments = 8 + EOF -wal_level = replica -archive_mode = on -archive_command = 'cd .' -max_wal_senders = 8 -wal_keep_segments = 8 +fi -EOF +if [ -a /mnt/wubloader/nodes.csv ]; then + echo "Loading nodes from nodes.csv" + psql -v -U postgres -d ${POSTGRES_DB} <<-EOF + CREATE TABLE IF NOT EXISTS nodes ( + name TEXT PRIMARY KEY, + url TEXT NOT NULL, + backfill_from BOOLEAN NOT NULL DEFAULT TRUE); + COPY nodes FROM '/mnt/wubloader/nodes.csv' DELIMITER ',' CSV HEADER; + ALTER TABLE nodes OWNER TO vst; + EOF +fi +if [ -a /mnt/wubloader/editors.csv ]; then + echo "Loading editors from editors.csv" + psql -v -U postgres -d ${POSTGRES_DB} <<-EOF + CREATE TABLE IF NOT EXISTS editors ( + email TEXT PRIMARY KEY, + name TEXT NOT NULL); + COPY editors FROM '/mnt/wubloader/editors.csv' DELIMITER ',' CSV HEADER; + ALTER TABLE editors OWNER TO vst; + EOF fi + diff --git a/postgres/standby_setup.sh b/postgres/standby_setup.sh index fee9828..da197b6 100644 --- a/postgres/standby_setup.sh +++ b/postgres/standby_setup.sh @@ -7,13 +7,11 @@ if [ ! -s "$PGDATA/PG_VERSION" ]; then set -e - cat > ${PGDATA}/recovery.conf < ${PGDATA}/recovery.conf <<-EOF + standby_mode = on + primary_conninfo = 'host=$MASTER_NODE password=$REPLICATION_PASSWORD port=5432 user=$REPLICATION_USER' + trigger_file = '/tmp/touch_to_promote_to_master' + EOF chown postgres. ${PGDATA} -R chmod 700 ${PGDATA} -R