From 066d10f94a5c17cd3d6ca85c4d894b0f28c839fe Mon Sep 17 00:00:00 2001
From: Mike Lang <mikelang3000@gmail.com>
Date: Mon, 27 May 2024 07:37:42 +1000
Subject: [PATCH] Full cut: Support video transitions

We support all preset transitions in the xfade filter,
as well as a handful of "custom" ones we define.

We only support an audio cross-fade. We may want to support J and L audio cuts (switch audio
before/after the transition) later.
---
 common/common/segments.py     | 151 +++++++++++++++++++++++++++++++---
 cutter/cutter/main.py         |   2 +-
 restreamer/restreamer/main.py |  30 ++++++-
 3 files changed, 168 insertions(+), 15 deletions(-)

diff --git a/common/common/segments.py b/common/common/segments.py
index fb02962..0f068c8 100644
--- a/common/common/segments.py
+++ b/common/common/segments.py
@@ -24,6 +24,91 @@ from .stats import timed
 from .fixts import FixTS
 
 
+# These are the set of transition names from the ffmpeg xfade filter that we allow.
+# This is mainly here to prevent someone putting in arbitrary strings and causing weird problems.
+KNOWN_XFADE_TRANSITIONS = [
+	"fade",
+	"wipeleft",
+	"wiperight",
+	"wipeup",
+	"wipedown",
+	"slideleft",
+	"slideright",
+	"slideup",
+	"slidedown",
+	"circlecrop",
+	"rectcrop",
+	"distance",
+	"fadeblack",
+	"fadewhite",
+	"radial",
+	"smoothleft",
+	"smoothright",
+	"smoothup",
+	"smoothdown",
+	"circleopen",
+	"circleclose",
+	"vertopen",
+	"vertclose",
+	"horzopen",
+	"horzclose",
+	"dissolve",
+	"pixelize",
+	"diagtl",
+	"diagtr",
+	"diagbl",
+	"diagbr",
+	"hlslice",
+	"hrslice",
+	"vuslice",
+	"vdslice",
+	"hblur",
+	"fadegrays",
+	"wipetl",
+	"wipetr",
+	"wipebl",
+	"wipebr",
+	"squeezeh",
+	"squeezev",
+	"zoomin",
+	"fadefast",
+	"fadeslow",
+	"hlwind",
+	"hrwind",
+	"vuwind",
+	"vdwind",
+	"coverleft",
+	"coverright",
+	"coverup",
+	"coverdown",
+	"revealleft",
+	"revealright",
+	"revealup",
+	"revealdown",
+]
+
+# These are custom transitions implemented using xfade's custom transition support.
+# It maps from name to the "expr" value to use.
+# In these expressions:
+#  X and Y are pixel coordinates
+#  A and B are the old and new video's pixel values
+#  W and H are screen width and height
+#  P is a "progress" number from 0 to 1 that increases over the course of the wipe
+CUSTOM_XFADE_TRANSITIONS = {
+	# A clock wipe is a 360 degree clockwise sweep around the center of the screen, starting at the top.
+	# It is intended to mimic an analog clock and insinuate a passing of time.
+	# It is implemented by calculating the angle of the point off a center line (using atan2())
+	# then using the new video if progress > that angle (normalized to 0-1).
+	"clockwipe": "if(lt((1-atan2(W/2-X,Y-H/2)/PI) / 2, P), A, B)",
+	# The classic star wipe is an expanding 5-pointed star from the center.
+	# It's mostly a meme.
+	# It is implemented by converting to polar coordinates (distance and angle off center),
+	# then comparing distance to a star formula derived from here: https://math.stackexchange.com/questions/4293250/how-to-write-a-polar-equation-for-a-five-pointed-star
+	# Made by SenseAmidstMadness.
+	"starwipe": "if(lt(sqrt(pow(X-W/2,2)+pow(Y-H/2,2))/sqrt(pow(W/2,2)+pow(H/2,2)),pow((1-P),2)*(0.75)*1/cos((2*asin(cos(5*(atan2(Y-H/2,X-W/2)+PI/2)))+PI*3)/(10))), B, A)",
+}
+
+
 def unpadded_b64_decode(s):
 	"""Decode base64-encoded string that has had its padding removed.
 	Note it takes a unicode and returns a bytes."""
@@ -654,13 +739,18 @@ def feed_input(segments, pipe):
 	cut_type=lambda _, segment_ranges, ranges, encode_args, stream=False: ("full-streamed" if stream else "full-buffered"),
 	normalize=lambda _, segment_ranges, ranges, *a, **k: range_total(ranges),
 )
-def full_cut_segments(segment_ranges, ranges, encode_args, stream=False):
+def full_cut_segments(segment_ranges, ranges, transitions, encode_args, stream=False):
 	"""If stream=true, assume encode_args gives a streamable format,
 	and begin returning output immediately instead of waiting for ffmpeg to finish
 	and buffering to disk."""
 
-	# for now, hard-code no transitions
-	transitions = [None] * (len(ranges) - 1)
+	# validate input lengths match up
+	if not (len(segment_ranges) == len(ranges) == len(transitions) + 1):
+		raise ValueError("Full cut input length mismatch: {} segment ranges, {} time ranges, {} transitions".format(
+			len(segment_ranges),
+			len(ranges),
+			len(transitions),
+		))
 
 	inputs = []
 	for segments, (start, end) in zip(segment_ranges, ranges):
@@ -677,24 +767,59 @@ def full_cut_segments(segment_ranges, ranges, encode_args, stream=False):
 		inputs.append((segments, args))
 
 	filters = []
-	# with no transitions, the output stream is just the first input stream
+	# with no additional ranges, the output stream is just the first input stream
 	output_video_stream = "0:v"
 	output_audio_stream = "0:a"
-	for i, transition in enumerate(transitions):
+	for i, (transition, prev_range) in enumerate(zip(transitions, ranges)):
 		# combine the current output stream with the next input stream
-		input_streams = [
-			output_video_stream, output_audio_stream,
-			f"{i+1}:v", f"{i+1}:a"
-		]
-		input_streams = "".join(f"[{stream}]" for stream in input_streams)
+		prev_video_stream = output_video_stream
+		prev_audio_stream = output_audio_stream
+		next_video_stream = f"{i+1}:v"
+		next_audio_stream = f"{i+1}:a"
+
 		# set new output streams
 		output_video_stream = f"v{i}"
 		output_audio_stream = f"a{i}"
-		outputs = f"[{output_video_stream}][{output_audio_stream}]"
+
+		# small helper for dealing with filter formatting
+		def add_filter(name, inputs, outputs, **kwargs):
+			inputs = "".join(f"[{stream}]" for stream in inputs)
+			outputs = "".join(f"[{stream}]" for stream in outputs)
+			kwargs = ":".join(f"{k}={v}" for k, v in kwargs.items())
+			filters.append(f"{inputs}{name}={kwargs}{outputs}")
+
 		if transition is None:
-			filters.append(f"{input_streams}concat=n=2:v=1:a=1{outputs}")
+			input_streams = [
+				prev_video_stream,
+				prev_audio_stream,
+				next_video_stream,
+				next_audio_stream,
+			]
+			output_streams = [output_video_stream, output_audio_stream]
+			add_filter("concat", input_streams, output_streams, n=2, v=1, a=1)
 		else:
-			raise NotImplementedError
+			video_type, duration = transition
+
+			# transition should start at DURATION seconds before prev_range ends,
+			# which is timed relative to prev_range start. So if prev_range is 60s long
+			# and duration is 2s, we should start at 58s.
+			prev_length = (prev_range[1] - prev_range[0]).total_seconds()
+			offset = prev_length - duration
+			kwargs = {
+				"duration": duration,
+				"offset": offset,
+			}
+			if video_type in CUSTOM_XFADE_TRANSITIONS:
+				kwargs["transition"] = "custom"
+				kwargs["expr"] = f"'{CUSTOM_XFADE_TRANSITIONS[video_type]}'" # wrap in '' for quoting
+			elif video_type in KNOWN_XFADE_TRANSITIONS:
+				kwargs["transition"] = video_type
+			else:
+				raise ValueError(f"Unknown video transition type: {video_type}")
+			add_filter("xfade", [prev_video_stream, next_video_stream], [output_video_stream], **kwargs)
+
+			# audio cross-fade across the same period
+			add_filter("acrossfade", [prev_audio_stream, next_audio_stream], [output_audio_stream], duration=duration)
 
 	if stream:
 		# When streaming, we can just use a pipe
diff --git a/cutter/cutter/main.py b/cutter/cutter/main.py
index 6b908bf..887a4c6 100644
--- a/cutter/cutter/main.py
+++ b/cutter/cutter/main.py
@@ -419,7 +419,7 @@ class Cutter(object):
 						upload_backend.encoding_settings,
 					))
 					cut = full_cut_segments(
-						job.segment_ranges, job.video_ranges,
+						job.segment_ranges, job.video_ranges, job.video_transitions,
 						upload_backend.encoding_settings, stream=upload_backend.encoding_streamable,
 					)
 
diff --git a/restreamer/restreamer/main.py b/restreamer/restreamer/main.py
index c982824..50aed08 100644
--- a/restreamer/restreamer/main.py
+++ b/restreamer/restreamer/main.py
@@ -332,6 +332,14 @@ def cut(channel, quality):
 			This option may be given multiple times.
 			The final video will consist of all the ranges cut back to back,
 			in the order given, with hard cuts between each range.
+		transition: A pair "TYPE,DURATION", or empty string "".
+			TYPE is a transition identifier, see common.segments for valid values.
+			DURATION is a float number of seconds for the transition to last.
+			Empty string indicates a hard cut.
+			This option may be given multiple times, with each time applying to the transition
+			between the next pair of ranges. It may be given a number of times up to 1 less
+			than the number of range args. If given less times than that (or not at all),
+			remaining ranges default to a hard cut.
 		allow_holes: Optional, default false. If false, errors out with a 406 Not Acceptable
 			if any holes are detected, rather than producing a video with missing parts.
 			Set to true by passing "true" (case insensitive).
@@ -369,6 +377,20 @@ def cut(channel, quality):
 		if end <= start:
 			return "Ends must be after starts", 400
 
+	transitions = []
+	for part in request.args.getlist('transition'):
+		if part == "":
+			transitions.append(None)
+		else:
+			video_type, duration = part.split(",")
+			duration = float(duration)
+			transitions.append((video_type, duration))
+	if len(transitions) >= len(ranges):
+		return "Too many transitions", 400
+	# pad with None
+	transitions = transitions + [None] * (len(ranges) - 1 - len(transitions))
+	has_transitions = any(t is not None for t in transitions)
+
 	allow_holes = request.args.get('allow_holes', 'false').lower()
 	if allow_holes not in ["true", "false"]:
 		return "allow_holes must be one of: true, false", 400
@@ -389,10 +411,16 @@ def cut(channel, quality):
 
 	type = request.args.get('type', 'fast')
 	if type == 'rough':
+		if has_transitions:
+			return "Cannot do rough cut with transitions", 400
 		return Response(rough_cut_segments(segment_ranges, ranges), mimetype='video/MP2T')
 	elif type == 'fast':
+		if has_transitions:
+			return "Cannot do fast cut with transitions", 400
 		return Response(fast_cut_segments(segment_ranges, ranges), mimetype='video/MP2T')
 	elif type == 'smart':
+		if has_transitions:
+			return "Cannot do smart cut with transitions", 400
 		return Response(smart_cut_segments(segment_ranges, ranges), mimetype='video/MP2T')
 	elif type in ('mpegts', 'mp4'):
 		if type == 'mp4':
@@ -400,7 +428,7 @@ def cut(channel, quality):
 		# encode as high-quality, without wasting too much cpu on encoding
 		stream, muxer, mimetype = (True, 'mpegts', 'video/MP2T') if type == 'mpegts' else (False, 'mp4', 'video/mp4')
 		encoding_args = ['-c:v', 'libx264', '-preset', 'ultrafast', '-crf', '0', '-f', muxer]
-		return Response(full_cut_segments(segment_ranges, ranges, encoding_args, stream=stream), mimetype=mimetype)
+		return Response(full_cut_segments(segment_ranges, ranges, transitions, encoding_args, stream=stream), mimetype=mimetype)
 	else:
 		return "Unknown type {!r}".format(type), 400