From ed2395d08e31d2d3cf0720aff393afffbc006d34 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Sun, 20 Oct 2019 02:22:33 +1100 Subject: [PATCH 1/8] cutter: Fix typo that prevented backends from being configured --- cutter/cutter/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cutter/cutter/main.py b/cutter/cutter/main.py index 7a38379..46d9c31 100644 --- a/cutter/cutter/main.py +++ b/cutter/cutter/main.py @@ -618,12 +618,12 @@ def main( backend_type = backend_config.pop('type') no_transcode_check = backend_config.pop('no_transcode_check', False) cut_type = backend_config.pop('cut_type', 'fast') - if type == 'youtube': + if backend_type == 'youtube': backend_type = Youtube - elif type == 'local': + elif backend_type == 'local': backend_type = Local else: - raise ValueError("Unknown upload backend type: {!r}".format(type)) + raise ValueError("Unknown upload backend type: {!r}".format(backend_type)) backend = backend_type(credentials, **backend_config) if cut_type == 'fast': # mark for fast cut by clearing encoding settings From 12decf015e376ee780a335efb232b07c50044eb6 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Sun, 20 Oct 2019 02:35:52 +1100 Subject: [PATCH 2/8] Fix multiple typos and mistakes with full cuts --- common/common/__init__.py | 2 +- common/common/segments.py | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/common/common/__init__.py b/common/common/__init__.py index 3d8edb8..85529ba 100644 --- a/common/common/__init__.py +++ b/common/common/__init__.py @@ -7,7 +7,7 @@ import errno import os import random -from .segments import get_best_segments, cut_segments, parse_segment_path, SegmentInfo +from .segments import get_best_segments, fast_cut_segments, full_cut_segments, parse_segment_path, SegmentInfo from .stats import timed, PromLogCountsHandler, install_stacksampler diff --git a/common/common/segments.py b/common/common/segments.py index 950c212..ef115a6 100644 --- a/common/common/segments.py +++ b/common/common/segments.py @@ -286,18 +286,19 @@ def ffmpeg_cut_segment(segment, cut_start=None, cut_end=None): return subprocess.Popen(args, stdout=subprocess.PIPE) -def ffmpeg_cut_stdin(cut_start, cut_end, encode_args): +def ffmpeg_cut_stdin(cut_start, duration, encode_args): """Return a Popen object which is ffmpeg cutting from stdin. This is used when doing a full cut.""" args = [ 'ffmpeg', '-hide_banner', '-loglevel', 'fatal', # suppress noisy output - '-i', '-' + '-i', '-', '-ss', cut_start, - '-to', cut_end, + '-t', duration, ] + list(encode_args) + [ '-', # output to stdout ] + args = map(str, args) logging.info("Running full cut with args: {}".format(" ".join(args))) return subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) @@ -400,13 +401,13 @@ def feed_input(segments, pipe): def full_cut_segments(segments, start, end, encode_args): # how far into the first segment to begin cut_start = max(0, (start - segments[0].start).total_seconds()) - # how much of final segment should be cut off - cut_end = max(0, (segments[-1].end - end).total_seconds()) + # duration + duration = (end - start).total_seconds() ffmpeg = None input_feeder = None try: - ffmpeg = ffmpeg_cut_stdin(cut_start, cut_end, encode_args) + ffmpeg = ffmpeg_cut_stdin(cut_start, duration, encode_args) input_feeder = gevent.spawn(feed_input, segments, ffmpeg.stdin) # stream the output until it is closed for chunk in read_chunks(ffmpeg.stdout): From ba53172cbc8beee9d82f4b316b6f08d362eb815e Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Wed, 23 Oct 2019 22:25:29 +1100 Subject: [PATCH 3/8] cutter: Get video link from backend instead of hard-coding youtube --- cutter/cutter/main.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cutter/cutter/main.py b/cutter/cutter/main.py index 46d9c31..7943ac3 100644 --- a/cutter/cutter/main.py +++ b/cutter/cutter/main.py @@ -350,7 +350,7 @@ class Cutter(object): # from requests.post() are not recoverable. try: - video_id = upload_backend.upload_video( + video_id, video_link = upload_backend.upload_video( title=( "{} - {}".format(self.title_header, job.video_title) if self.title_header else job.video_title @@ -418,8 +418,7 @@ class Cutter(object): # Success! Set TRANSCODING or DONE and clear any previous error. success_state = 'TRANSCODING' if upload_backend.needs_transcode else 'DONE' - link = "https://youtu.be/{}".format(video_id) - if not set_row(state=success_state, video_id=video_id, video_link=link, error=None): + if not set_row(state=success_state, video_id=video_id, video_link=video_link, error=None): # This will result in it being stuck in FINALIZING, and an operator will need to go # confirm it was really uploaded. raise JobConsistencyError( @@ -427,7 +426,7 @@ class Cutter(object): .format(job.id, self.name, success_state) ) - self.logger.info("Successfully cut and uploaded job {} as {}".format(format_job(job), link)) + self.logger.info("Successfully cut and uploaded job {} as {}".format(format_job(job), video_link)) videos_uploaded.labels(video_channel=job.video_channel, video_quality=job.video_quality, upload_location=job.upload_location).inc() From b0a71fd9f113b082ffd277a17f12de802fac52b7 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Thu, 24 Oct 2019 03:44:54 +1100 Subject: [PATCH 4/8] cutter: Fix mistake in check_candidate error handling --- cutter/cutter/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cutter/cutter/main.py b/cutter/cutter/main.py index 7943ac3..dc8fa76 100644 --- a/cutter/cutter/main.py +++ b/cutter/cutter/main.py @@ -146,7 +146,7 @@ class Cutter(object): UPDATE events SET error = %s WHERE id = %s AND state = 'EDITED' AND error IS NULL - """, id=candidate.id, error=error) + """, candidate.id, error) except Exception: self.logger.exception("Failed to set error for candidate {}, ignoring".format(format_job(candidate))) self.refresh_conn() From 3fbbe59b00d21eca6b197f9d778352558f9f3d60 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Thu, 24 Oct 2019 03:45:20 +1100 Subject: [PATCH 5/8] cutter Local backend: Fix typo and file extension when full cutting --- cutter/cutter/upload_backends.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cutter/cutter/upload_backends.py b/cutter/cutter/upload_backends.py index bd3a056..bb59562 100644 --- a/cutter/cutter/upload_backends.py +++ b/cutter/cutter/upload_backends.py @@ -172,10 +172,12 @@ class Local(UploadBackend): video_id = uuid.uuid4() # make title safe by removing offending characters, replacing with '-' title = re.sub('[^A-Za-z0-9_]', '-', title) - filename = '{}-{}.ts'.format(title, video_id) # TODO with re-encoding, this ext must change + # If fast cut enabled, use .ts, otherwise use .mp4 + ext = 'ts' if self.encoding_settings is None else 'mp4' + filename = '{}-{}.{}'.format(title, video_id, ext) filepath = os.path.join(self.path, filename) if self.write_info: - with open(os.path.join(self.path, '{}-{}.json'.format(title, video_id))) as f: + with open(os.path.join(self.path, '{}-{}.json'.format(title, video_id)), 'w') as f: f.write(json.dumps({ 'title': title, 'description': description, From 3a9543a4b50837768c85a4f8dc6b25b793ce1327 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Thu, 24 Oct 2019 04:00:13 +1100 Subject: [PATCH 6/8] Suppress less ffmpeg output when cutting The "fatal" level was causing some useful errors to be suppressed. --- common/common/segments.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/common/segments.py b/common/common/segments.py index ef115a6..234ca58 100644 --- a/common/common/segments.py +++ b/common/common/segments.py @@ -262,7 +262,7 @@ def ffmpeg_cut_segment(segment, cut_start=None, cut_end=None): """ args = [ 'ffmpeg', - '-hide_banner', '-loglevel', 'fatal', # suppress noisy output + '-hide_banner', '-loglevel', 'error', # suppress noisy output '-i', segment.path, ] # output from ffprobe is generally already sorted but let's be paranoid, @@ -291,7 +291,7 @@ def ffmpeg_cut_stdin(cut_start, duration, encode_args): This is used when doing a full cut.""" args = [ 'ffmpeg', - '-hide_banner', '-loglevel', 'fatal', # suppress noisy output + '-hide_banner', '-loglevel', 'error', # suppress noisy output '-i', '-', '-ss', cut_start, '-t', duration, From 80d829b83be87a2c20bce6e52c578f74c8c80579 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Thu, 24 Oct 2019 04:20:15 +1100 Subject: [PATCH 7/8] full cut: ffmpeg requires a seekable output file Most formats like mp4 require ffmpeg to make changes at the start of the file throughout writing. Unfortunately, this prevents us from streaming the upload as we cut it. Instead, we spool to a temporary file until ffmpeg exits, then upload that all at once. --- common/common/segments.py | 63 ++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/common/common/segments.py b/common/common/segments.py index 234ca58..9634d32 100644 --- a/common/common/segments.py +++ b/common/common/segments.py @@ -13,6 +13,7 @@ import shutil import sys from collections import namedtuple from contextlib import closing +from tempfile import TemporaryFile import gevent from gevent import subprocess @@ -286,9 +287,12 @@ def ffmpeg_cut_segment(segment, cut_start=None, cut_end=None): return subprocess.Popen(args, stdout=subprocess.PIPE) -def ffmpeg_cut_stdin(cut_start, duration, encode_args): +def ffmpeg_cut_stdin(output_file, cut_start, duration, encode_args): """Return a Popen object which is ffmpeg cutting from stdin. - This is used when doing a full cut.""" + This is used when doing a full cut. + Note the explicit output file object instead of using a pipe, + because most video formats require a seekable file. + """ args = [ 'ffmpeg', '-hide_banner', '-loglevel', 'error', # suppress noisy output @@ -296,11 +300,18 @@ def ffmpeg_cut_stdin(cut_start, duration, encode_args): '-ss', cut_start, '-t', duration, ] + list(encode_args) + [ - '-', # output to stdout + # We want ffmpeg to write to our tempfile, which is its stdout. + # However, it assumes that '-' means the output is not seekable. + # We trick it into understanding that its stdout is seekable by + # telling it to write to the fd via its /proc/self filename. + '/proc/self/fd/1', + # But of course, that file "already exists", so we need to give it + # permission to "overwrite" it. + '-y', ] args = map(str, args) logging.info("Running full cut with args: {}".format(" ".join(args))) - return subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + return subprocess.Popen(args, stdin=subprocess.PIPE, stdout=output_file) def read_chunks(fileobj, chunk_size=16*1024): @@ -384,20 +395,6 @@ def fast_cut_segments(segments, start, end): yield chunk -def feed_input(segments, pipe): - """Write each segment's data into the given pipe in order. - This is used to provide input to ffmpeg in a full cut.""" - for segment in segments: - with open(segment.path) as f: - try: - shutil.copyfileobj(f, pipe) - except OSError as e: - # ignore EPIPE, as this just means the end cut meant we didn't need all input - if e.errno != errno.EPIPE: - raise - pipe.close() - - def full_cut_segments(segments, start, end, encode_args): # how far into the first segment to begin cut_start = max(0, (start - segments[0].start).total_seconds()) @@ -405,21 +402,33 @@ def full_cut_segments(segments, start, end, encode_args): duration = (end - start).total_seconds() ffmpeg = None - input_feeder = None try: - ffmpeg = ffmpeg_cut_stdin(cut_start, duration, encode_args) - input_feeder = gevent.spawn(feed_input, segments, ffmpeg.stdin) - # stream the output until it is closed - for chunk in read_chunks(ffmpeg.stdout): - yield chunk + # Most ffmpeg output formats require a seekable file. + # For the same reason, it's not safe to begin uploading until ffmpeg + # has finished. We create a temporary file for this. + tempfile = TemporaryFile() + ffmpeg = ffmpeg_cut_stdin(tempfile, cut_start, duration, encode_args) + + # stream the input + for segment in segments: + with open(segment.path) as f: + try: + shutil.copyfileobj(f, ffmpeg.stdin) + except OSError as e: + # ignore EPIPE, as this just means the end cut meant we didn't need all input + if e.errno != errno.EPIPE: + raise + ffmpeg.stdin.close() + # check if any errors occurred in input writing, or if ffmpeg exited non-success. if ffmpeg.wait() != 0: raise Exception("Error while streaming cut: ffmpeg exited {}".format(ffmpeg.returncode)) - input_feeder.get() # re-raise any errors from feed_input() + + # Now actually yield the resulting file + for chunk in read_chunks(tempfile): + yield chunk finally: # if something goes wrong, try to clean up ignoring errors - if input_feeder is not None: - input_feeder.kill() if ffmpeg is not None and ffmpeg.poll() is None: for action in (ffmpeg.kill, ffmpeg.stdin.close, ffmpeg.stdout.close): try: From 40458d9d7f590aba6c9b200b177f390ec08d7881 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Thu, 24 Oct 2019 04:21:46 +1100 Subject: [PATCH 8/8] local backend: Use original version of title in write_info instead of safe version --- cutter/cutter/upload_backends.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cutter/cutter/upload_backends.py b/cutter/cutter/upload_backends.py index bb59562..7d8e38d 100644 --- a/cutter/cutter/upload_backends.py +++ b/cutter/cutter/upload_backends.py @@ -171,13 +171,13 @@ class Local(UploadBackend): def upload_video(self, title, description, tags, data): video_id = uuid.uuid4() # make title safe by removing offending characters, replacing with '-' - title = re.sub('[^A-Za-z0-9_]', '-', title) + safe_title = re.sub('[^A-Za-z0-9_]', '-', title) # If fast cut enabled, use .ts, otherwise use .mp4 ext = 'ts' if self.encoding_settings is None else 'mp4' - filename = '{}-{}.{}'.format(title, video_id, ext) + filename = '{}-{}.{}'.format(safe_title, video_id, ext) filepath = os.path.join(self.path, filename) if self.write_info: - with open(os.path.join(self.path, '{}-{}.json'.format(title, video_id)), 'w') as f: + with open(os.path.join(self.path, '{}-{}.json'.format(safe_title, video_id)), 'w') as f: f.write(json.dumps({ 'title': title, 'description': description,