// This is a jsonnet file, it generates kubernetes manifests. // To generate and apply, run "jsonnet k8s.jsonnet | kubectl apply -f -" // Note that this file is currently not as advanced as its docker-compose variant // This file can only be used for replication nodes and editing nodes // see config.enabled for more info on what components can be used { kind: "List", apiVersion: "v1", config:: { // These are the important top-level settings. // Change these to configure the services. // Image tag (application version) to use. // Note: "latest" is not recommended in production, as you can't be sure what version // you're actually running, and must manually re-pull to get an updated copy. image_tag: "latest", image_base: "ghcr.io/dbvideostriketeam", // Change this to use images from a different source than the main one // image tag for postgres, which changes less // postgres shouldn't be restarted unless absolutely necessary database_tag: "bb05e37", // For each component, whether to deploy that component. enabled: { downloader: true, # fetching segments from twitch.tv restreamer: true, # serving segments for other wubloader nodes and/or thrimbletrimmer editor interface backfiller: true, # fetching segments from other wubloader nodes cutter: false, # performing cuts based on editor input sheetsync: false, # syncing google sheets and postgres thrimshim: false, # storing editor inputs in postgres segment_coverage: true, # generating segment coverage graphs playlist_manager: false, # auto-populating youtube playlists nginx: true, # proxying between the various pods postgres: false, # source-of-truth database chat_archiver: true, # records twitch chat messages and merges them with records from other nodes }, // Twitch channels to capture. // Channels suffixed with a '!' are considered "important" and will be retried more aggressively // and warned about if they're not currently streaming. channels: ["desertbus!", "db_chief", "db_high", "db_audio", "db_bus"], backfill_only_channels: [], // extra directories to backfill backfill_dirs: ["emotes"], // Cleaned up version of $.channels without importance/type markers. // General form is CHANNEL[!][:TYPE:URL]. clean_channels: [std.split(std.split(c, ":")[0], '!')[0] for c in $.config.channels] + $.config.backfill_only_channels, // Stream qualities to capture qualities: ["source", "480p"], // NFS settings for RWX (ReadWriteMany) volume for wubloader pods nfs_server: "nfs.example.com", # server IP or hostname nfs_path: "/mnt/segments", # path on server to mount nfs_capacity: "1T", # storage capacity to report to k8s # mount options to use (It is important to test these and adjust for optimal performance) # these options work reasonably well on a ZFS-backed NFS server with the default 128k block size nfs_mount_options: [ "fsc", # use FS-Cache to cache file data "noatime", # don't update inode access times "nodiratime", # don't update directory inode access times "vers=4", # use NFSv4 "proto=tcp", # use TCP (default for NFSv4) "hard", # retry NFS requests indefinitely "rsize=131072", # 128kb read size "wsize=131072", # 128kb write size ], // PVC template storage class for statefulset in postgres sts_storage_class_name: "longhorn", // Other nodes to always backfill from. You should not include the local node. // If you are using the database to find peers, you should leave this empty. peers: [ ], // This node's name in the nodes table of the database localhost: "node_name", // The hostname to use in the Ingress ingress_host: "wubloader.example.com", // Set to true to let the ingress handle TLS ingress_tls: true, // Ingress class for ingress ingress_class_name: "nginx", // Uncomment and give a secretName for ingress, if required for ingress TLS //ingress_secret_name: "wubloader-tls", // Additional metadata labels for Ingress (cert-manager, etc.) - adjust as needed for your setup ingress_labels: {}, // Connection args for the database. // If database is defined in this config, host and port should be wubloader-postgres:5432. db_args: { user: "vst", password: "dbfh2019", // don't use default in production. Must not contain ' or \ as these are not escaped. host: "postgres", port: 5432, dbname: "wubloader", }, // Other database arguments db_super_user: "postgres", // only accessible from localhost db_super_password: "postgres", // Must not contain ' or \ as these are not escaped. db_replication_user: "replicate", // if empty, don't allow replication db_replication_password: "standby", // don't use default in production. Must not contain ' or \ as these are not escaped. db_readonly_user: "vst-ro", // if empty, don't have a readonly account db_readonly_password: "volunteer", // don't use default in production. Must not contain ' or \ as these are not escaped. db_standby: false, // set to true to have this database replicate another server // path to a JSON file containing google credentials for cutter as keys // 'client_id', 'client_secret', and 'refresh_token'. cutter_creds: import "./google_creds.json", // Path to a JSON file containing google credentials for sheetsync as keys // 'client_id', 'client_secret' and 'refresh_token'. // May be the same as cutter_creds_file. sheetsync_creds: import "./google_creds.json", // Path to a file containing a twitch OAuth token to use when downloading streams. // This is optional (null to omit) but may be helpful to bypass ads. downloader_creds_file: null, // The URL to write to the sheet for edit links, with {} being replaced by the id edit_url: "https://wubloader.example.com/edit.html?id={}", // The spreadsheet ID and worksheet names for sheetsync to act on sheet_id: "your_id_here", worksheets: ["Tech Test & Preshow"] + ["Day %d" % n for n in std.range(1, 8)], playlist_worksheet: "Tags", // The archive worksheet, if given, points to a worksheet containing events with a different // schema and alternate behaviour suitable for long-term archival videos instead of uploads. archive_worksheet: "Video Trim Times", // Fixed tags to add to all videos video_tags: ["DB17", "DB2023", "2023", "Desert Bus", "Desert Bus for Hope", "Child's Play Charity", "Child's Play", "Charity Fundraiser"], // The timestamp corresponding to 00:00 in bustime bustime_start: "1970-01-01T00:00:00Z", // The timestamps to start/end segment coverage maps at. // Generally 1 day before and 7 days after bus start. coverage_start: "1969-12-31T00:00:00Z", coverage_end: "1970-01-07T00:00:00Z", // Max hours ago to backfill, ie. do not backfill for times before this many hours ago. // Set to null to disable. backfill_max_hours_ago: 24 * 14, // approx 14 days // Extra options to pass via environment variables, // eg. log level, disabling stack sampling. env: { // Uncomment this to set log level to debug // WUBLOADER_LOG_LEVEL: "DEBUG", // Uncomment this to enable stacksampling performance monitoring // WUBLOADER_ENABLE_STACKSAMPLER: "true", }, // A map from youtube playlist IDs to a list of tags. // Playlist manager will populate each playlist with all videos which have all those tags. // For example, tags ["Day 1", "Technical"] will populate the playlist with all Technical // youtube videos from Day 1. // Note that you can make an "all videos" playlist by specifying no tags (ie. []). playlists: { // Replaced entirely by tags sheet }, // Which upload locations should be added to playlists youtube_upload_locations: [ "desertbus", "desertbus_slow", "desertbus_emergency", "youtube-manual", ], // Config for cutter upload locations. See cutter docs for full detail. cutter_config: { // Default desertbus: {type: "youtube", cut_type: "smart"}, // Backup options for advanced use, if the smart cut breaks things. desertbus_slow: {type: "youtube", cut_type: "full"}, desertbus_emergency: {type: "youtube", cut_type: "fast"}, }, default_location: "desertbus", // archive location is the default location for archive events, // only revelant if $.archive_worksheet is set. archive_location: "archive", // The header to put at the front of video titles, eg. a video with a title // of "hello world" with title header "foo" becomes: "foo - hello world". title_header: "DB2023", // The footer to put at the bottom of descriptions, in its own paragraph description_footer: "Uploaded by the Desert Bus Video Strike Team", // Chat archiver settings chat_archiver: { // Twitch user to log in as and path to oauth token user: "dbvideostriketeam", token: importstr "./chat_token.txt", // Whether to enable backfilling of chat archives to this node (if backfiller enabled) backfill: true, // Channels to watch. Defaults to "all twitch channels in $.channels" but you can add extras. channels: [ std.split(c, '!')[0] for c in $.channels if std.length(std.split(c, ":")) == 1 ], }, }, // A few derived values. // The connection string for the database. Constructed from db_args. db_connect:: std.join(" ", [ "%s='%s'" % [key, $.config.db_args[key]] for key in std.objectFields($.config.db_args) ]), // Cleaned up version of $.channels without importance markers clean_channels:: [std.split(c, '!')[0] for c in $.config.channels], // k8s-formatted version of env dict env_list:: [ {name: key, value: $.config.env[key]} for key in std.objectFields($.config.env) ], // Which upload locations have type youtube, needed for playlist_manager youtube_upload_locations:: [ location for location in std.objectFields($.config.cutter_config) if $.config.cutter_config[location].type == "youtube" ], // This function generates deployments for each service, since they only differ slightly, // with only a different image, CLI args and possibly env vars. // The image name is derived from the component name // (eg. "downloader" is ghcr.io/dbvideostriketeam/wubloader-downloader) // so we only pass in name as a required arg. // Optional kwargs work just like python. deployment(name, args=[], env=[], volumes=[], volumeMounts=[], resources={}):: { kind: "Deployment", apiVersion: "apps/v1", metadata: { namespace: "wubloader", name: name, labels: {app: "wubloader", component: name}, }, spec: { replicas: 1, selector: { matchLabels: {app: "wubloader", component: name}, }, template: { metadata: { labels: {app: "wubloader", component: name}, }, spec: { containers: [ { name: name, // segment-coverage is called segment_coverage in the image, so replace - with _ // ditto for playlist-manager image: "%s/wubloader-%s:%s" % [$.config.image_base, std.strReplace(name, "-", "_"), $.config.image_tag], args: args, resources: resources, volumeMounts: [{name: "data", mountPath: "/mnt"}] + volumeMounts, env: $.env_list + env, // main env list combined with any deployment-specific ones }, ], volumes: [ { name: "data", persistentVolumeClaim: {"claimName": "segments"}, }, ] + volumes }, }, }, }, // This function generates a Service object for each service service(name):: { kind: "Service", apiVersion: "v1", metadata: { namespace: "wubloader", name: name, labels: {app: "wubloader", component: name}, }, spec: { selector: {app: "wubloader", component: name}, ports: if name == "postgres" then [{name: "postgres", port: 5432, targetPort: 5432},] else [{name: "http", port: 80, targetPort: 80}], }, }, // This function generates a StatefulSet object (for postgres) statefulset(name, args=[], env=[]):: { kind: "StatefulSet", apiVersion: "apps/v1", metadata: { namespace: "wubloader", name: name, labels: {app: "wubloader", component: name}, }, spec: { replicas: 1, selector: { matchLabels: {app: "wubloader", component: name}, }, serviceName: name, template: { metadata: { labels: {app: "wubloader", component: name}, }, spec: { containers: [ { name: name, image: "%s/wubloader-%s:%s" % [$.config.image_base, name, $.config.database_tag], args: args, env: $.env_list + env, // main env list combined with any statefulset-specific ones volumeMounts: [ // tell use a subfolder in the newly provisioned PVC to store postgres DB // a newly provisioned ext4 PVC will be non-empty, so postgres fails to start if we don't use a subfolder {name: "database", mountPath: "/mnt/database", subPath: "postgres"}, {name: "segments", mountPath: "/mnt/wubloader"} ], }, ], volumes: [ { name: "segments", persistentVolumeClaim: {"claimName": "segments"}, }, ], }, }, volumeClaimTemplates: [ { metadata: { namespace: "wubloader", name: "database" }, spec: { accessModes: ["ReadWriteOnce"], resources: { requests: { storage: "50GiB" }, }, storageClassName: $.config.sts_storage_class_name }, }, ], }, }, // The actual manifests to output, filtering out "null" from disabled components. items: [comp for comp in $.components if comp != null], // These are all the deployments and services. // Note that all components work fine if multiple are running // (they may duplicate work, but not cause errors by stepping on each others' toes). components:: [ // A namespace where all the things go { "apiVersion": "v1", "kind": "Namespace", "metadata": { "name": "wubloader" }, }, // The downloader watches the twitch stream and writes the HLS segments to disk if $.config.enabled.downloader then $.deployment("downloader", args=$.config.channels + [ "--base-dir", "/mnt", "--qualities", std.join(",", $.config.qualities), "--metrics-port", "80", ]+ if $.config.downloader_creds_file != null then ["--auth-file", "/etc/creds/downloader_token.txt"] else [], volumes=[ {name:"credentials", secret: {secretName: "credentials"}} ], volumeMounts=[ {mountPath: "/etc/creds", name: "credentials"}, ]), // The restreamer is a http server that fields requests for checking what segments exist // and allows HLS streaming of segments from any requested timestamp if $.config.enabled.restreamer then $.deployment("restreamer", args=[ "--base-dir", "/mnt", "--port", "80", ]), // The backfiller periodically compares what segments exist locally to what exists on // other nodes. If it finds ones it doesn't have, it downloads them. // It can talk to the database to discover other wubloader nodes, or be given a static list. if $.config.enabled.backfiller then $.deployment("backfiller", args=$.config.clean_channels + [ "--base-dir", "/mnt", "--qualities", std.join(",", $.config.qualities + (if $.config.chat_archiver.backfill then ["chat"] else [])), "--extras", std.join(",", $.config.backfill_dirs), "--static-nodes", std.join(",", $.config.peers), "--node-database", $.db_connect, "--localhost", $.config.localhost, "--metrics-port", "80", ] + (if $.config.backfill_max_hours_ago == null then [] else [ "--start", std.toString($.config.backfill_max_hours_ago), ])), // Segment coverage is a monitoring helper that periodically scans available segments // and reports stats. It also creates a "coverage map" image to represent this info. // It puts this in the segment directory where nginx will serve it. if $.config.enabled.segment_coverage then $.deployment("segment-coverage", args=$.config.clean_channels + [ "--base-dir", "/mnt", "--qualities", std.join(",", $.config.qualities), "--metrics-port", "80", "--first-hour", $.config.coverage_start, "--last-hour", $.config.coverage_end, // Render a html page showing all the images from all nodes "--make-page", "--connection-string", $.db_connect, ]), // Thrimshim acts as an interface between the thrimbletrimmer editor and the database // It is needed for thrimbletrimmer to be able to get unedited videos and submit edits if $.config.enabled.thrimshim then $.deployment("thrimshim", args=[ "--port", "80", "--title-header", $.config.title_header, "--description-footer", $.config.description_footer, "--upload-locations", std.join(",", [$.config.default_location] + [ location for location in std.objectFields($.config.cutter_config) if location != $.config.default_location ]), $.db_connect, $.config.clean_channels[0], // use first element as default channel $.config.bustime_start, ]), // Cutter interacts with the database to perform cutting jobs if $.config.enabled.cutter then $.deployment("cutter", args=[ "--base-dir", "/mnt", "--metrics-port", "80", "--name", $.config.localhost, "--tags", std.join(",", $.config.video_tags), $.db_connect, std.manifestJson($.config.cutter_config), "/etc/creds/cutter_creds.json" ], volumes=[ {name:"credentials", secret: {secretName: "credentials"}} ], volumeMounts=[ {mountPath: "/etc/creds", name: "credentials"}, ]), // Sheetsync syncs database columns to the google docs sheet which is the primary operator interface if $.config.enabled.sheetsync then $.deployment("sheetsync", args=[ "--allocate-ids", "--metrics-port", "80", $.config.db_connect, "/etc/creds/sheetsync_creds.json", $.config.edit_url, $.config.bustime_start, $.config.sheet_id ] + $.config.worksheets, volumes=[ {name:"credentials", secret: {secretName: "credentials"}} ], volumeMounts=[ {mountPath: "/etc/creds", name: "credentials"}, ]), // playlist_manager adds videos to youtube playlists depending on tags if $.config.enabled.playlist_manager then $.deployment("playlist-manager", args=[ "--metrics-port", "80", "--upload-location-allowlist", std.join(",", $.youtube_upload_locations), $.config.db_connect, "/etc/creds/cutter_creds.json" ] + [ "%s=%s" % [playlist, std.join(",", $.playlists[playlist])] for playlist in std.objectFields($.playlists) ], volumes=[ {name:"credentials", secret: {secretName: "credentials"}} ], volumeMounts=[ {mountPath: "/etc/creds", name: "credentials"}, ]), // chat_archiver records twitch chat messages and merges them with records from other nodes. if $.config.enabled.chat_archiver then $.deployment("chat-archiver", args=[ $.config.chat_archiver.user, "/etc/creds/chat_token.txt", ] + $.config.clean_channels + [ "--name", $.config.localhost, "--metrics-port", "80" ], volumes=[ {name:"credentials", secret: {secretName: "credentials"}} ], volumeMounts=[ {mountPath: "/etc/creds", name: "credentials"}, ]), // Normally nginx would be responsible for proxying requests to different services, // but in k8s we can use Ingress to do that. However nginx is still needed to serve // static content - segments as well as thrimbletrimmer. if $.config.enabled.nginx then $.deployment("nginx", env=[ {name: "THRIMBLETRIMMER", value: "true"}, {name: "SEGMENTS", value: "/mnt"}, ]), // postgres statefulset if $.config.enabled.postgres then $.statefulset("postgres", args=if $.config.db_standby then ["/standby_setup.sh"] else [], env=[ {name: "POSTGRES_USER", value: $.config.db_super_user}, {name: "POSTGRES_PASSWORD", value: $.config.db_super_password}, {name: "POSTGRES_DB", value: $.config.db_args.dbname}, {name: "PGDATA", value: "/mnt/database"}, {name: "WUBLOADER_USER", value: $.config.db_args.user}, {name: "WUBLOADER_PASSWORD", value: $.config.db_args.password}, {name: "REPLICATION_USER", value: $.config.db_replication_user}, {name: "REPLICATION_PASSWORD", value: $.config.db_replication_password}, {name: "READONLY_USER", value: $.config.db_readonly_user}, {name: "READONLY_PASSWORD", value: $.config.db_readonly_password}, {name: "MASTER_NODE", value: $.config.db_args.host}, ]), // Services for all deployments if $.config.enabled.downloader then $.service("downloader"), if $.config.enabled.backfiller then $.service("backfiller"), if $.config.enabled.nginx then $.service("nginx"), if $.config.enabled.restreamer then $.service("restreamer"), if $.config.enabled.segment_coverage then $.service("segment-coverage"), if $.config.enabled.thrimshim then $.service("thrimshim"), if $.config.enabled.cutter then $.service("cutter"), if $.config.enabled.playlist_manager then $.service("playlist-manager"), if $.config.enabled.sheetsync then $.service("sheetsync"), if $.config.enabled.postgres then $.service("postgres"), if $.config.enabled.chat_archiver then $.service("chat-archiver"), // Secret for credentials { apiVersion: "v1", kind: "Secret", metadata: { namespace: "wubloader", name: "credentials", labels: {app: "wubloader"} }, type: "Opaque", stringData: { "cutter_creds.json": std.toString($.config.cutter_creds), "sheetsync_creds.json": std.toString($.config.sheetsync_creds), "chat_token.txt": $.config.chat_archiver.token, "downloader_token.txt": std.toString($.config.downloader_creds_file) }, }, // PV manifest for segments { apiVersion: "v1", kind: "PersistentVolume", metadata: { namespace: "wubloader", name: "segments", labels: {app: "wubloader"}, }, spec: { accessModes: ["ReadWriteMany"], capacity: { storage: $.config.nfs_capacity }, mountOptions: $.config.nfs_mount_options, nfs: { server: $.config.nfs_server, path: $.config.nfs_path, readOnly: false }, persistentVolumeReclaimPolicy: "Retain", volumeMode: "Filesystem" }, }, // PVC manifest for segments { apiVersion: "v1", kind: "PersistentVolumeClaim", metadata: { namespace: "wubloader", name: "segments", labels: {app: "wubloader"}, }, spec: { accessModes: ["ReadWriteMany"], resources: { requests: { storage: $.config.nfs_capacity }, }, storageClassName: "", volumeName: "segments" }, }, // Ingress to direct requests to the correct services. { kind: "Ingress", apiVersion: "networking.k8s.io/v1", metadata: { namespace: "wubloader", name: "wubloader", labels: {app: "wubloader"} + $.config.ingress_labels, }, spec: { ingressClassName: $.config.ingress_class_name, rules: [ { host: $.config.ingress_host, http: { // Helper functions for defining the path rules below local rule(name, path, type) = { path: path, pathType: type, backend: { service: { name: std.strReplace(name, "_", "-"), port: { number: 80 }, }, }, }, local metric_rule(name) = rule(name, "/metrics/%s" % name, "Exact"), paths: [ // Map /metrics/NAME to each service metric_rule("downloader"), metric_rule("backfiller"), metric_rule("restreamer"), metric_rule("segment_coverage"), metric_rule("thrimshim"), metric_rule("cutter"), metric_rule("sheetsync"), metric_rule("playlist_manager"), metric_rule("chat_archiver"), // Map /segments and /thrimbletrimmer to the static content nginx rule("nginx", "/segments", "Prefix"), rule("nginx", "/thrimbletrimmer", "Prefix"), // Map /thrimshim to the thrimshim service rule("thrimshim", "/thrimshim", "Prefix"), // Map everything else to restreamer rule("restreamer", "/", "Prefix"), ], }, }, ], [if $.config.ingress_tls then 'tls']: [ { hosts: [ $.config.ingress_host, ], [if "ingress_secret_name" in $.config then 'secretName']: $.config.ingress_secret_name, }, ], }, }, ], }