From 28faff5c587ed263b79e743cfc1e2a4aa230cd08 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Sun, 20 Sep 2020 11:30:20 +1000 Subject: [PATCH 1/4] Add a k8s version of the docker-compose file Note this version is very simplified compared to the docker-compose and has some major limitations: * It relies on hostPath and a nodeSelector to put all the components on a shared storage node * It only supports use as a replication node (downloader, restreamer, backfiller, segment_coverage) * It uses the k8s Ingress instead of the built-in nginx for http routing. --- k8s.jsonnet | 231 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100644 k8s.jsonnet diff --git a/k8s.jsonnet b/k8s.jsonnet new file mode 100644 index 0000000..8329eec --- /dev/null +++ b/k8s.jsonnet @@ -0,0 +1,231 @@ +// This is a jsonnet file, it generates kubernetes manifests. +// To generate and apply, run "jsonnet k8s.jsonnet | kubectl apply -f -" + +// Note this file is only set up to generate manifests for a basic replication node, +// for the sake of simplicity. + +{ + + config: { + // These are the important top-level settings. + // Change these to configure the services. + + // Image tag (application version) to use. + // Note: "latest" is not recommended in production, as you can't be sure what version + // you're actually running, and must manually re-pull to get an updated copy. + image_tag: "latest", + + // Twitch channels to capture. + // Channels suffixed with a '!' are considered "important" and will be retried more aggressively + // and warned about if they're not currently streaming. + channels: ["desertbus!", "db_chief", "db_high", "db_audio", "db_bus"], + + // Stream qualities to capture + qualities: ["source", "480p"], + + // The node selector and hostPath to use. All pods must be on the same host + // and use this hostpath in order to share the disk. + node_selector: {}, + host_path: "/var/lib/wubloader", + + // The local port within each container to bind the backdoor server on. + // You can exec into the container and telnet to this port to get a python shell. + backdoor_port: 1234, + + // Other nodes to always backfill from. You should not include the local node. + // If you are using the database to find peers, you should leave this empty. + peers: [ + ], + + // This node's name in the nodes table of the database + localhost: "node_name", + + // The hostname to use in the Ingress + ingress_host: "wubloader.example.com", + + // Connection args for the database. + // If database is defined in this config, host and port should be postgres:5432. + db_args: { + user: "vst", + password: "dbfh2019", // don't use default in production. Must not contain ' or \ as these are not escaped. + host: "postgres", + port: 5432, + dbname: "wubloader", + }, + + // The timestamp corresponding to 00:00 in bustime + bustime_start: "1970-01-01T00:00:00Z", + + // Extra options to pass via environment variables, + // eg. log level, disabling stack sampling. + env: { + // Uncomment this to set log level to debug + // WUBLOADER_LOG_LEVEL: "DEBUG", + // Uncomment this to disable stacksampling performance monitoring + // WUBLOADER_DISABLE_STACKSAMPLER: "true", + }, + + }, + + // A few derived values. + + // The connection string for the database. Constructed from db_args. + db_connect: std.join(" ", [ + "%s='%s'" % [key, $.config.db_args[key]] + for key in std.objectFields($.config.db_args) + ]), + + // Cleaned up version of $.channels without importance markers + clean_channels: [std.split(c, '!')[0] for c in $.config.channels], + + // k8s-formatted version of env dict + env_list: [ + {name: key, value: $.config.env[key]} + for key in std.objectFields($.config.env) + ], + + // This function generates deployments for each service, since they only differ slightly, + // with only a different image, CLI args and possibly env vars. + // The image name is derived from the component name + // (eg. "downloader" is quay.io/ekimekim/wubloader-downloader) + // so we only pass in name, args and env vars (with the latter two optional). + // Optional kwargs work just like python. + deployment(name, args=[], env=[]):: { + kind: "Deployment", + apiVersion: "apps/v1", + metadata: { + name: "wubloader-%s" % name, + labels: {app: "wubloader", component: name}, + }, + spec: { + replicas: 1, + selector: { + matchLabels: {app: "wubloader", component: name}, + }, + template: { + metadata: { + labels: {app: "wubloader", component: name}, + }, + spec: { + containers: [ + { + name: name, + image: "quay.io/ekimekim/wubloader-%s:%s" % [name, $.config.image_tag], + args: args, + volumeMounts: [{name: "data", mountPath: "/mnt"}], + env: $.env_list + env, // main env list combined with any deployment-specific ones + }, + ], + volumes: [ + { + name: "data", + hostPath: {path: $.config.host_path}, + }, + ], + nodeSelector: $.config.node_selector, + }, + }, + }, + }, + + // This function generates a Service object for each service, since they're basically identical. + service(name): { + kind: "Service", + apiVersion: "v1", + metadata: { + name: "wubloader-%s" % name, + labels: {app: "wubloader", component: name}, + }, + spec: { + selector: {app: "wubloader", component: name}, + ports: [{name: "http", port: 80, targetPort: 80}], + }, + }, + + // The actual manifests. + // These are all deployments. Note that all components work fine if multiple are running + // (they may duplicate work, but not cause errors by stepping on each others' toes). + manifests: [ + // The downloader watches the twitch stream and writes the HLS segments to disk + $.deployment("downloader", args=$.config.channels + [ + "--base-dir", "/mnt", + "--qualities", std.join(",", $.config.qualities), + "--backdoor-port", std.toString($.config.backdoor_port), + "--metrics-port", "80", + ]), + // The restreamer is a http server that fields requests for checking what segments exist + // and allows HLS streaming of segments from any requested timestamp + $.deployment("restreamer", args=[ + "--base-dir", "/mnt", + "--backdoor-port", std.toString($.config.backdoor_port), + "--port", "80", + ]), + // The backfiller periodically compares what segments exist locally to what exists on + // other nodes. If it finds ones it doesn't have, it downloads them. + // It can talk to the database to discover other wubloader nodes, or be given a static list. + $.deployment("backfiller", args=$.clean_channels + [ + "--base-dir", "/mnt", + "--qualities", std.join(",", $.config.qualities), + "--static-nodes", std.join(",", $.config.peers), + "--backdoor-port", std.toString($.config.backdoor_port), + "--node-database", $.db_connect, + "--localhost", $.config.localhost, + "--metrics-port", "80", + ]), + // Segment coverage is a monitoring helper that periodically scans available segments + // and reports stats. It also creates a "coverage map" image to represent this info. + // It puts this in the segment directory where nginx will serve it. + $.deployment("segment_coverage", args=$.clean_channels + [ + "--base-dir", "/mnt", + "--qualities", std.join(",", $.config.qualities), + "--metrics-port", "80", + ]), + // Normally nginx would be responsible for proxying requests to different services, + // but in k8s we can use Ingress to do that. However nginx is still needed to serve + // static content - segments as well as thrimbletrimmer. + $.deployment("nginx", env=[ + {name: "THRIMBLETRIMMER", value: "true"}, + {name: "SEGMENTS", value: "/mnt"}, + ]), + // Ingress to direct requests to the correct services. + { + kind: "Ingress", + apiVersion: "v1", + metadata: { + name: "wubloader", + labels: {app: "wubloader"}, + }, + spec: { + rules: [ + { + host: $.config.ingress_host, + http: { + // Helper functions for defining the path rules below + local rule(name, path, type) = { + path: path, + pathType: type, + backend: { + serviceName: "wubloader-%s" % name, + servicePort: 80, + }, + }, + local metric_rule(name) = rule(name, "/metrics/%s" % name, "Exact"), + paths: [ + // Map /metrics/NAME to each service (except restreamer) + metric_rule("downloader"), + metric_rule("backfiller"), + metric_rule("segment_coverage"), + // Map /segments and /thrimbletrimmer to the static content nginx + rule("nginx", "/segments", "Prefix"), + rule("nginx", "/thrimbletrimmer", "Prefix"), + // Map everything else to restreamer + rule("restreamer", "/", "Prefix"), + ], + }, + }, + ], + }, + }, + ], + +}.manifests // final output is just the manifest list, none of the other fields From 9a95dc44ebf72e12f922e6784cf119ca869fc106 Mon Sep 17 00:00:00 2001 From: HubbeKing Date: Sun, 20 Sep 2020 10:05:21 +0300 Subject: [PATCH 2/4] Fix k8s.jsonnet to compile to something kubernetes accepts Add Service definitions to output Correct apiVersion for Ingress definition Output a v1 List, as kubectl doesn't parse JSON arrays --- k8s.jsonnet | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/k8s.jsonnet b/k8s.jsonnet index 8329eec..2c55f32 100644 --- a/k8s.jsonnet +++ b/k8s.jsonnet @@ -5,8 +5,9 @@ // for the sake of simplicity. { - - config: { + kind: "List", + apiVersion: "v1", + config:: { // These are the important top-level settings. // Change these to configure the services. @@ -70,16 +71,16 @@ // A few derived values. // The connection string for the database. Constructed from db_args. - db_connect: std.join(" ", [ + db_connect:: std.join(" ", [ "%s='%s'" % [key, $.config.db_args[key]] for key in std.objectFields($.config.db_args) ]), // Cleaned up version of $.channels without importance markers - clean_channels: [std.split(c, '!')[0] for c in $.config.channels], + clean_channels:: [std.split(c, '!')[0] for c in $.config.channels], // k8s-formatted version of env dict - env_list: [ + env_list:: [ {name: key, value: $.config.env[key]} for key in std.objectFields($.config.env) ], @@ -129,7 +130,7 @@ }, // This function generates a Service object for each service, since they're basically identical. - service(name): { + service(name):: { kind: "Service", apiVersion: "v1", metadata: { @@ -145,7 +146,7 @@ // The actual manifests. // These are all deployments. Note that all components work fine if multiple are running // (they may duplicate work, but not cause errors by stepping on each others' toes). - manifests: [ + items: [ // The downloader watches the twitch stream and writes the HLS segments to disk $.deployment("downloader", args=$.config.channels + [ "--base-dir", "/mnt", @@ -187,10 +188,16 @@ {name: "THRIMBLETRIMMER", value: "true"}, {name: "SEGMENTS", value: "/mnt"}, ]), + // Services for all deployments + $.service("downloader"), + $.service("backfiller"), + $.service("nginx"), + $.service("restreamer"), + $.service("segment_coverage"), // Ingress to direct requests to the correct services. { kind: "Ingress", - apiVersion: "v1", + apiVersion: "networking.k8s.io/v1beta1", metadata: { name: "wubloader", labels: {app: "wubloader"}, @@ -228,4 +235,4 @@ }, ], -}.manifests // final output is just the manifest list, none of the other fields +} From b47eb3865fb089149a1ba8164859e7e685bf3f15 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Sun, 20 Sep 2020 17:15:19 +1000 Subject: [PATCH 3/4] Fix segment_coverage -> segment-coverage because _ is illegal in k8s names. However the image name does contain a _ so we replace - with _ when constructing the image name. --- k8s.jsonnet | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/k8s.jsonnet b/k8s.jsonnet index 2c55f32..e91bf3c 100644 --- a/k8s.jsonnet +++ b/k8s.jsonnet @@ -111,7 +111,8 @@ containers: [ { name: name, - image: "quay.io/ekimekim/wubloader-%s:%s" % [name, $.config.image_tag], + // segment-coverage is called segment_coverage in the image, so replace - with _ + image: "quay.io/ekimekim/wubloader-%s:%s" % [std.strReplace(name, "-", "_"), $.config.image_tag], args: args, volumeMounts: [{name: "data", mountPath: "/mnt"}], env: $.env_list + env, // main env list combined with any deployment-specific ones @@ -176,7 +177,7 @@ // Segment coverage is a monitoring helper that periodically scans available segments // and reports stats. It also creates a "coverage map" image to represent this info. // It puts this in the segment directory where nginx will serve it. - $.deployment("segment_coverage", args=$.clean_channels + [ + $.deployment("segment-coverage", args=$.clean_channels + [ "--base-dir", "/mnt", "--qualities", std.join(",", $.config.qualities), "--metrics-port", "80", @@ -193,7 +194,7 @@ $.service("backfiller"), $.service("nginx"), $.service("restreamer"), - $.service("segment_coverage"), + $.service("segment-coverage"), // Ingress to direct requests to the correct services. { kind: "Ingress", @@ -221,7 +222,7 @@ // Map /metrics/NAME to each service (except restreamer) metric_rule("downloader"), metric_rule("backfiller"), - metric_rule("segment_coverage"), + metric_rule("segment-coverage"), // Map /segments and /thrimbletrimmer to the static content nginx rule("nginx", "/segments", "Prefix"), rule("nginx", "/thrimbletrimmer", "Prefix"), From d2d457a45f49a0ce8142e3535b38e0c308b96034 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Sat, 26 Sep 2020 07:45:05 +1000 Subject: [PATCH 4/4] Add k8s option to README --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index c910f2a..3b39948 100644 --- a/README.md +++ b/README.md @@ -30,4 +30,7 @@ A docker-compose file is provided to run all components. See `docker-compose.jso to set configuration options, then generate the compose file with `./generate-docker-compose`. Then run `docker-compose up`. +There is also a kubernetes-based option, but it is less configurable and only supports replication nodes. +See [k8s.jsonnet](./k8s.jsonnet) for details. + Further details of installing and configuring the backfiller are provided in [INSTALL.md](./INSTALL.md).