diff --git a/merge_v1.py b/merge_v1.py index af6acd7..fbc757a 100644 --- a/merge_v1.py +++ b/merge_v1.py @@ -87,14 +87,25 @@ def load(file): def main(*files): + out = False + if files and files[0] == "--out": + files = files[1:] + out = True batches = [load(file) for file in files] result = batches[0] start = time.monotonic() for batch in batches[1:]: result = merge_messages(result, batch) interval = time.monotonic() - start - hash = hashlib.sha256(json.dumps(result).encode()).hexdigest() - print(f"Merged {len(batches)} batches in {interval:.3f}s to hash {hash}") + # v1 version must be corrected to have messages in time order, which v2 does by default + # but causes problems when comparing. + # We don't count this against the execution time. + result.sort(key=lambda o: o["time"]) + if out: + print(json.dumps(result)) + else: + hash = hashlib.sha256(json.dumps(result).encode()).hexdigest() + print(f"Merged {len(batches)} batches in {interval:.3f}s to hash {hash}") if __name__ == '__main__': import sys diff --git a/merge_v2.py b/merge_v2.py index a2d0e69..2751d92 100644 --- a/merge_v2.py +++ b/merge_v2.py @@ -88,14 +88,21 @@ def load(file): def main(*files): + out = False + if files and files[0] == "--out": + files = files[1:] + out = True batches = [load(file) for file in files] result = batches[0] start = time.monotonic() for batch in batches[1:]: result = merge_messages(result, batch) interval = time.monotonic() - start - hash = hashlib.sha256(json.dumps(result).encode()).hexdigest() - print(f"Merged {len(batches)} batches in {interval:.3f}s to hash {hash}") + if out: + print(json.dumps(result)) + else: + hash = hashlib.sha256(json.dumps(result).encode()).hexdigest() + print(f"Merged {len(batches)} batches in {interval:.3f}s to hash {hash}") if __name__ == '__main__': import sys