From c50224415ce208b27250cfbfc5ab53e5f182e088 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Tue, 9 Aug 2022 18:23:15 +1000 Subject: [PATCH] more backfiller chat fixes fixup: more backfiller fixes Enable backfilling of chat logs --- backfiller/backfiller/main.py | 44 +++++++++++++++++++---------------- docker-compose.jsonnet | 4 +++- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/backfiller/backfiller/main.py b/backfiller/backfiller/main.py index a5e6560..f0ac18b 100644 --- a/backfiller/backfiller/main.py +++ b/backfiller/backfiller/main.py @@ -21,7 +21,7 @@ import common from common import dateutil from common import database from common.requests import InstrumentedSession -from common.segments import list_segment_files +from common.segments import list_segment_files, unpadded_b64_decode # Wraps all requests in some metric collection requests = InstrumentedSession() @@ -133,8 +133,9 @@ def get_remote_segment(base_dir, node, channel, quality, hour, missing_segment, dir_name = os.path.dirname(path) if quality == "chat": # chat segment - _, filename_hash = os.path.basename(path).split('-', 1) - temp_name = "{}.{}.temp".format(os.path.basename(path), uuid4()) + _, filename_hash = os.path.splitext(os.path.basename(path))[0].split('-', 1) + filename_hash = unpadded_b64_decode(filename_hash) + temp_name = "{}.{}.temp".format(os.path.basename(path), uuid.uuid4()) else: # video segment date, duration, _ = os.path.basename(path).split('-', 2) @@ -486,23 +487,26 @@ class BackfillerWorker(object): pass continue - # test to see if file is a segment and get the segments start time - try: - segment = common.parse_segment_path(path) - except ValueError as e: - self.logger.warning('File {} invalid: {}'.format(path, e)) - continue - - # Ignore temp segments as they may go away by the time we fetch them. - if segment.type == "temp": - self.logger.debug('Skipping {} as it is a temp segment'.format(path)) - continue - - # to avoid getting in the downloader's way ignore segments - # less than recent_cutoff old - if datetime.datetime.utcnow() - segment.start < datetime.timedelta(seconds=self.recent_cutoff): - self.logger.debug('Skipping {} as too recent'.format(path)) - continue + # For chat archives, just download whatever is there. + # Otherwise, do some basic checks. + if quality != 'chat': + # test to see if file is a segment and get the segments start time + try: + segment = common.parse_segment_path(path) + except ValueError as e: + self.logger.warning('File {} invalid: {}'.format(path, e)) + continue + + # Ignore temp segments as they may go away by the time we fetch them. + if segment.type == "temp": + self.logger.debug('Skipping {} as it is a temp segment'.format(path)) + continue + + # to avoid getting in the downloader's way ignore segments + # less than recent_cutoff old + if datetime.datetime.utcnow() - segment.start < datetime.timedelta(seconds=self.recent_cutoff): + self.logger.debug('Skipping {} as too recent'.format(path)) + continue # start segment as soon as a pool slot opens up, then track it in workers workers.append(pool.spawn( diff --git a/docker-compose.jsonnet b/docker-compose.jsonnet index ebb5aa9..0778d69 100644 --- a/docker-compose.jsonnet +++ b/docker-compose.jsonnet @@ -178,6 +178,8 @@ // Twitch user to log in as and path to oauth token user: "dbvideostriketeam", token_path: "./chat_token.txt", + // Whether to enable backfilling of chat archives to this node (if backfiller enabled) + backfill: true, }, // Extra options to pass via environment variables, @@ -253,7 +255,7 @@ command: $.clean_channels + [ "--base-dir", "/mnt", - "--qualities", std.join(",", $.qualities), + "--qualities", std.join(",", $.qualities + (if $.chat_archiver.backfill then ["chat"] else [])), "--static-nodes", std.join(",", $.peers), "--backdoor-port", std.toString($.backdoor_port), "--node-database", $.db_connect,