From d8a9b5ddf0d8ed77a62f5dbbe8ff5785375915e0 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Fri, 19 Aug 2022 12:57:56 +1000 Subject: [PATCH] chat_archiver: Always sort json object keys to ensure canonical output --- chat_archiver/chat_archiver/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chat_archiver/chat_archiver/main.py b/chat_archiver/chat_archiver/main.py index e09b278..4a808ac 100644 --- a/chat_archiver/chat_archiver/main.py +++ b/chat_archiver/chat_archiver/main.py @@ -306,9 +306,9 @@ def write_batch(path, batch_time, messages, size_histogram=None): def format_batch(messages): # We need to take some care to have a consistent ordering and format here. # We use a "canonicalised JSON" format, which is really just whatever the python encoder does, - # with compact separators. + # with compact separators and sorted keys. messages = [ - (message, json.dumps(message, separators=(',', ':'))) + (message, json.dumps(message, separators=(',', ':'), sort_keys=True)) for message in messages ] # We sort by timestamp, then timestamp range, then if all else fails, lexiographically