From 21856c68aaf70074dc757285dc5b77b64fc7e577 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Sun, 12 Sep 2021 16:20:16 +1000 Subject: [PATCH] Fix all instances of file.write() for py3 In python 3, file.write() may do a partial write and returns the number of characters written. In order to not lose data, we need to wrap every instance of file.write() with our new common.writeall() wrapper that loops until the data is actually written. --- backfiller/backfiller/main.py | 2 +- common/common/__init__.py | 19 +++++++++++++++++++ cutter/cutter/upload_backends.py | 5 +++-- downloader/downloader/main.py | 2 +- segment_coverage/segment_coverage/main.py | 2 +- 5 files changed, 25 insertions(+), 5 deletions(-) diff --git a/backfiller/backfiller/main.py b/backfiller/backfiller/main.py index fe4cc2e..bc3fdbc 100644 --- a/backfiller/backfiller/main.py +++ b/backfiller/backfiller/main.py @@ -149,7 +149,7 @@ def get_remote_segment(base_dir, node, channel, quality, hour, missing_segment, with open(temp_path, 'wb') as f: for chunk in resp.iter_content(8192): - f.write(chunk) + common.writeall(f.write, chunk) hash.update(chunk) filename_hash = common.parse_segment_path(missing_segment).hash diff --git a/common/common/__init__.py b/common/common/__init__.py index 47bf044..299db19 100644 --- a/common/common/__init__.py +++ b/common/common/__init__.py @@ -103,3 +103,22 @@ def jitter(interval): smooth out patterns and prevent everything from retrying at the same time. """ return interval * (0.9 + 0.2 * random.random()) + + +def writeall(write, value): + """Helper for writing a complete string to a file-like object. + Pass the write function and the value to write, and it will loop if needed to ensure + all data is written. + Works for both text and binary files, as long as you pass the right value type for + the write function. + """ + while value: + n = write(value) + if n is None: + # The write func doesn't return the amount written, assume it always writes everything + break + if n == 0: + # This would cause an infinite loop...blow up instead so it's clear what the problem is + raise Exception("Wrote 0 chars while calling {} with {}-char {}".format(write, len(value), type(value).__name__)) + # remove the first n chars and go again if we have anything left + value = value[n:] diff --git a/cutter/cutter/upload_backends.py b/cutter/cutter/upload_backends.py index 346eb58..4aa991f 100644 --- a/cutter/cutter/upload_backends.py +++ b/cutter/cutter/upload_backends.py @@ -6,6 +6,7 @@ import os import re import uuid +import common from common.googleapis import GoogleAPIClient @@ -242,14 +243,14 @@ class Local(UploadBackend): try: if self.write_info: with open(os.path.join(self.path, '{}-{}.json'.format(safe_title, video_id)), 'w') as f: - f.write(json.dumps({ + common.writeall(f.write, json.dumps({ 'title': title, 'description': description, 'tags': tags, }) + '\n') with open(filepath, 'wb') as f: for chunk in data: - f.write(chunk) + common.writeall(f.write, chunk) except (OSError, IOError) as e: # Because duplicate videos don't actually matter with this backend, # we consider all disk errors retryable. diff --git a/downloader/downloader/main.py b/downloader/downloader/main.py index 337d157..f11b34b 100644 --- a/downloader/downloader/main.py +++ b/downloader/downloader/main.py @@ -565,7 +565,7 @@ class SegmentGetter(object): # we may lose part of the last chunk even though we did receive it. # This is a small enough amount of data that we don't really care. for chunk in resp.iter_content(8192): - f.write(chunk) + common.writeall(f.write, chunk) hash.update(chunk) except Exception as e: if file_created: diff --git a/segment_coverage/segment_coverage/main.py b/segment_coverage/segment_coverage/main.py index 28523d3..b1f2273 100644 --- a/segment_coverage/segment_coverage/main.py +++ b/segment_coverage/segment_coverage/main.py @@ -232,7 +232,7 @@ class CoverageChecker(object): final_path = '{}_coverage.html'.format(path_prefix) common.ensure_directory(temp_path) with open(temp_path, 'w') as f: - f.write(html) + common.writeall(f.write, html) os.rename(temp_path, final_path) self.logger.info('Coverage page for {} created'.format(quality))