From a719e8df70a3d23ea59d32fe8bd4b92daa0e80c8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 15 Nov 2020 05:58:41 +0530 Subject: [PATCH] Sponskrub integration --- README.md | 11 ++++ youtube_dlc/__init__.py | 10 ++++ youtube_dlc/options.py | 21 +++++++ youtube_dlc/postprocessor/__init__.py | 2 + youtube_dlc/postprocessor/sponskrub.py | 81 ++++++++++++++++++++++++++ 5 files changed, 125 insertions(+) create mode 100644 youtube_dlc/postprocessor/sponskrub.py diff --git a/README.md b/README.md index f884ad067..2e025c2c4 100644 --- a/README.md +++ b/README.md @@ -492,6 +492,17 @@ I will add some memorable short links to the binaries so you can download them e --convert-subs FORMAT Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc) +## SponSkrub Options (SponsorBlock) + --sponskrub Use sponskrub to mark sponsored sections + with the data available in SponsorBlock API + (Youtube only) + --sponskrub-cut Cut out the sponsor sections instead of + simply marking them (Experimental) + --sponskrub-location Location of the sponskrub binary; + either the path to the binary or its + containing directory + --sponskrub-args Give these arguments to sponskrub + ## Extractor Options: --ignore-dynamic-mpd Do not process dynamic DASH manifests diff --git a/youtube_dlc/__init__.py b/youtube_dlc/__init__.py index 105786bc0..88860b515 100644 --- a/youtube_dlc/__init__.py +++ b/youtube_dlc/__init__.py @@ -305,6 +305,16 @@ def _real_main(argv=None): # contents if opts.xattrs: postprocessors.append({'key': 'XAttrMetadata'}) + # This should be below all ffmpeg PP because it may cut parts out from the video + # If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found + if opts.sponskrub is not False: + postprocessors.append({ + 'key': 'SponSkrub', + 'path': opts.sponskrub_path, + 'args': opts.sponskrub_args, + 'cut': opts.sponskrub_cut, + 'ignoreerror': opts.sponskrub is None, + }) # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way. # So if the user is able to remove the file before your postprocessor runs it might cause a few problems. if opts.exec_cmd: diff --git a/youtube_dlc/options.py b/youtube_dlc/options.py index 3c8a1305e..e3a0c821d 100644 --- a/youtube_dlc/options.py +++ b/youtube_dlc/options.py @@ -878,6 +878,27 @@ def parseOpts(overrideArguments=None): metavar='FORMAT', dest='convertsubtitles', default=None, help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)') + extractor = optparse.OptionGroup(parser, 'SponSkrub Options (SponsorBlock)') + extractor.add_option( + '--sponskrub', + action='store_true', dest='sponskrub', default=False, # should default be None instead? + help='Use sponskrub to mark sponsored sections with the data available in SponsorBlock API (Youtube only)') + extractor.add_option( + '--no-sponskrub', + action='store_false', dest='sponskrub', + help=optparse.SUPPRESS_HELP) + extractor.add_option( + '--sponskrub-cut', default=False, + action='store_true', dest='sponskrub_cut', + help='Cut out the sponsor sections instead of simply marking them (Experimental)') + extractor.add_option( + '--sponskrub-location', metavar='PATH', + dest='sponskrub_path', default='', + help='Location of the sponskrub binary; either the path to the binary or its containing directory.') + extractor.add_option( + '--sponskrub-args', dest='sponskrub_args', + help='Give these arguments to sponskrub') + extractor = optparse.OptionGroup(parser, 'Extractor Options') extractor.add_option( '--allow-dynamic-mpd', diff --git a/youtube_dlc/postprocessor/__init__.py b/youtube_dlc/postprocessor/__init__.py index 2c4702823..e160909a7 100644 --- a/youtube_dlc/postprocessor/__init__.py +++ b/youtube_dlc/postprocessor/__init__.py @@ -17,6 +17,7 @@ from .ffmpeg import ( from .xattrpp import XAttrMetadataPP from .execafterdownload import ExecAfterDownloadPP from .metadatafromtitle import MetadataFromTitlePP +from .sponskrub import SponSkrubPP def get_postprocessor(key): @@ -38,5 +39,6 @@ __all__ = [ 'FFmpegVideoConvertorPP', 'FFmpegVideoRemuxerPP', 'MetadataFromTitlePP', + 'SponSkrubPP', 'XAttrMetadataPP', ] diff --git a/youtube_dlc/postprocessor/sponskrub.py b/youtube_dlc/postprocessor/sponskrub.py new file mode 100644 index 000000000..1d7663a26 --- /dev/null +++ b/youtube_dlc/postprocessor/sponskrub.py @@ -0,0 +1,81 @@ +from __future__ import unicode_literals +import os +import subprocess + +from .common import PostProcessor +from ..compat import compat_shlex_split +from ..utils import ( + check_executable, + encodeArgument, + shell_quote, + PostProcessingError, +) + + +class SponSkrubPP(PostProcessor): + _temp_ext = 'spons' + _def_args = [] + _exe_name = 'sponskrub' + + def __init__(self, downloader, path='', args=None, ignoreerror=False, cut=False): + PostProcessor.__init__(self, downloader) + self.cutout = cut + self.args = ['-chapter'] if not cut else [] + self.args += self._def_args if args is None else compat_shlex_split(args) + self.path = self.get_exe(path) + + if not ignoreerror and self.path is None: + if path: + raise PostProcessingError('sponskrub not found in "%s"' % path) + else: + raise PostProcessingError('sponskrub not found. Please install or provide the path using --sponskrub-path.') + + def get_exe(self, path=''): + if not path or not check_executable(path, ['-h']): + path = os.path.join(path, self._exe_name) + if not check_executable(path, ['-h']): + return None + return path + + def run(self, information): + if self.path is None: + return [], information + + if information['extractor_key'].lower() != 'youtube': + self._downloader.to_screen('[sponskrub] Skipping SponSkrub since it is not a YouTube video') + return [], information + + self._downloader.to_screen('[sponskrub] Trying to %s sponsor sections' % ('remove' if self.cutout else 'mark')) + if self.cutout: + self._downloader.to_screen( + 'WARNING: The sponsor segments are cut out from the video based on timestamp. ' + 'This will cause the subtitles to go out of sync. ' + 'Also, if run multiple times, unintended parts of the video could be cut out.') + + filename = information['filepath'] + temp_filename = filename + '.' + self._temp_ext + os.path.splitext(filename)[1] + if os.path.exists(temp_filename): + os.remove(temp_filename) + + cmd = [self.path] + if self.args: + cmd += self.args + cmd += ['--', information['id'], filename, temp_filename] + cmd = [encodeArgument(i) for i in cmd] + + if self._downloader.params.get('verbose', False): + self._downloader.to_screen('[debug] SponSkrub command line: %s' % shell_quote(cmd)) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + stdout, stderr = p.communicate() + + if p.returncode == 0: + os.remove(filename) + os.rename(temp_filename, filename) + self._downloader.to_screen('[sponskrub] Sponsor sections have been %s' % ('removed' if self.cutout else 'marked')) + elif p.returncode != 3: # error code 3 means there was no info about the video + stderr = stderr.decode('utf-8', 'replace') + msg = stderr.strip().split('\n')[-1] + raise PostProcessingError(msg if msg else 'Sponskrub failed with error code %s!' % p.returncode) + else: + self._downloader.to_screen('[sponskrub] No segments in the SponsorBlock database') + return [], information