From b7b623ac4db6d4a8c99c0822ff7ccc74de5a3875 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 18 Nov 2020 02:08:20 +0530 Subject: [PATCH] Detect when file was already downloaded --- README.md | 4 +++- youtube_dlc/YoutubeDL.py | 7 +++++-- youtube_dlc/__init__.py | 1 + youtube_dlc/downloader/common.py | 4 ++-- youtube_dlc/options.py | 6 +++++- youtube_dlc/postprocessor/sponskrub.py | 21 +++++++++++++-------- 6 files changed, 29 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 2e025c2c4..da50a771a 100644 --- a/README.md +++ b/README.md @@ -497,7 +497,9 @@ I will add some memorable short links to the binaries so you can download them e with the data available in SponsorBlock API (Youtube only) --sponskrub-cut Cut out the sponsor sections instead of - simply marking them (Experimental) + simply marking them + --sponskrub-force Run sponskrub even if the video was + already downloaded. Use with caution --sponskrub-location Location of the sponskrub binary; either the path to the binary or its containing directory diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py index dd55ba0f2..3a5bbb02b 100644 --- a/youtube_dlc/YoutubeDL.py +++ b/youtube_dlc/YoutubeDL.py @@ -2002,13 +2002,16 @@ class YoutubeDL(object): if not ensure_dir_exists(fname): return downloaded.append(fname) - partial_success = dl(fname, new_info) + partial_success, real_download = dl(fname, new_info) success = success and partial_success info_dict['__postprocessors'] = postprocessors info_dict['__files_to_merge'] = downloaded + # Even if there were no downloads, it is being merged only now + info_dict['__real_download'] = True else: # Just a single file - success = dl(filename, info_dict) + success, real_download = dl(filename, info_dict) + info_dict['__real_download'] = real_download except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_error('unable to download video data: %s' % error_to_compat_str(err)) return diff --git a/youtube_dlc/__init__.py b/youtube_dlc/__init__.py index 88860b515..a5592a7cb 100644 --- a/youtube_dlc/__init__.py +++ b/youtube_dlc/__init__.py @@ -313,6 +313,7 @@ def _real_main(argv=None): 'path': opts.sponskrub_path, 'args': opts.sponskrub_args, 'cut': opts.sponskrub_cut, + 'force': opts.sponskrub_force, 'ignoreerror': opts.sponskrub is None, }) # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way. diff --git a/youtube_dlc/downloader/common.py b/youtube_dlc/downloader/common.py index 7d303be1c..a0acb6556 100644 --- a/youtube_dlc/downloader/common.py +++ b/youtube_dlc/downloader/common.py @@ -351,7 +351,7 @@ class FileDownloader(object): 'status': 'finished', 'total_bytes': os.path.getsize(encodeFilename(filename)), }) - return True + return True, False if subtitle is False: min_sleep_interval = self.params.get('sleep_interval') @@ -372,7 +372,7 @@ class FileDownloader(object): '[download] Sleeping %s seconds...' % ( sleep_interval_sub)) time.sleep(sleep_interval_sub) - return self.real_download(filename, info_dict) + return self.real_download(filename, info_dict), True def real_download(self, filename, info_dict): """Real download process. Redefine in subclasses.""" diff --git a/youtube_dlc/options.py b/youtube_dlc/options.py index e3a0c821d..f3b243050 100644 --- a/youtube_dlc/options.py +++ b/youtube_dlc/options.py @@ -890,7 +890,11 @@ def parseOpts(overrideArguments=None): extractor.add_option( '--sponskrub-cut', default=False, action='store_true', dest='sponskrub_cut', - help='Cut out the sponsor sections instead of simply marking them (Experimental)') + help='Cut out the sponsor sections instead of simply marking them') + extractor.add_option( + '--sponskrub-force', default=False, + action='store_true', dest='sponskrub_force', + help='Run sponskrub even if the video was already downloaded') extractor.add_option( '--sponskrub-location', metavar='PATH', dest='sponskrub_path', default='', diff --git a/youtube_dlc/postprocessor/sponskrub.py b/youtube_dlc/postprocessor/sponskrub.py index 1d7663a26..8ef612050 100644 --- a/youtube_dlc/postprocessor/sponskrub.py +++ b/youtube_dlc/postprocessor/sponskrub.py @@ -17,8 +17,9 @@ class SponSkrubPP(PostProcessor): _def_args = [] _exe_name = 'sponskrub' - def __init__(self, downloader, path='', args=None, ignoreerror=False, cut=False): + def __init__(self, downloader, path='', args=None, ignoreerror=False, cut=False, force=False): PostProcessor.__init__(self, downloader) + self.force = force self.cutout = cut self.args = ['-chapter'] if not cut else [] self.args += self._def_args if args is None else compat_shlex_split(args) @@ -42,15 +43,19 @@ class SponSkrubPP(PostProcessor): return [], information if information['extractor_key'].lower() != 'youtube': - self._downloader.to_screen('[sponskrub] Skipping SponSkrub since it is not a YouTube video') + self._downloader.to_screen('[sponskrub] Skipping sponskrub since it is not a YouTube video') + return [], information + if self.cutout and not self.force and not information.get('__real_download', False): + self._downloader.to_screen( + '[sponskrub] Skipping sponskrub since the video was already downloaded. ' + 'Use --sponskrub-force to run sponskrub anyway') return [], information self._downloader.to_screen('[sponskrub] Trying to %s sponsor sections' % ('remove' if self.cutout else 'mark')) if self.cutout: - self._downloader.to_screen( - 'WARNING: The sponsor segments are cut out from the video based on timestamp. ' - 'This will cause the subtitles to go out of sync. ' - 'Also, if run multiple times, unintended parts of the video could be cut out.') + self._downloader.to_screen('WARNING: Cutting out sponsor segments will cause the subtitles to go out of sync.') + if not information.get('__real_download', False): + self._downloader.to_screen('WARNING: If sponskrub is run multiple times, unintended parts of the video could be cut out.') filename = information['filepath'] temp_filename = filename + '.' + self._temp_ext + os.path.splitext(filename)[1] @@ -64,7 +69,7 @@ class SponSkrubPP(PostProcessor): cmd = [encodeArgument(i) for i in cmd] if self._downloader.params.get('verbose', False): - self._downloader.to_screen('[debug] SponSkrub command line: %s' % shell_quote(cmd)) + self._downloader.to_screen('[debug] sponskrub command line: %s' % shell_quote(cmd)) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) stdout, stderr = p.communicate() @@ -75,7 +80,7 @@ class SponSkrubPP(PostProcessor): elif p.returncode != 3: # error code 3 means there was no info about the video stderr = stderr.decode('utf-8', 'replace') msg = stderr.strip().split('\n')[-1] - raise PostProcessingError(msg if msg else 'Sponskrub failed with error code %s!' % p.returncode) + raise PostProcessingError(msg if msg else 'sponskrub failed with error code %s!' % p.returncode) else: self._downloader.to_screen('[sponskrub] No segments in the SponsorBlock database') return [], information