From b7b623ac4db6d4a8c99c0822ff7ccc74de5a3875 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan@gmail.com>
Date: Wed, 18 Nov 2020 02:08:20 +0530
Subject: [PATCH] Detect when file was already downloaded

---
 README.md                              |  4 +++-
 youtube_dlc/YoutubeDL.py               |  7 +++++--
 youtube_dlc/__init__.py                |  1 +
 youtube_dlc/downloader/common.py       |  4 ++--
 youtube_dlc/options.py                 |  6 +++++-
 youtube_dlc/postprocessor/sponskrub.py | 21 +++++++++++++--------
 6 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 2e025c2c4..da50a771a 100644
--- a/README.md
+++ b/README.md
@@ -497,7 +497,9 @@ I will add some memorable short links to the binaries so you can download them e
                                      with the data available in SponsorBlock API
                                      (Youtube only)
     --sponskrub-cut                  Cut out the sponsor sections instead of
-                                     simply marking them (Experimental)
+                                     simply marking them
+    --sponskrub-force                Run sponskrub even if the video was
+                                     already downloaded. Use with caution
     --sponskrub-location             Location of the sponskrub binary;
                                      either the path to the binary or its
                                      containing directory
diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py
index dd55ba0f2..3a5bbb02b 100644
--- a/youtube_dlc/YoutubeDL.py
+++ b/youtube_dlc/YoutubeDL.py
@@ -2002,13 +2002,16 @@ class YoutubeDL(object):
                             if not ensure_dir_exists(fname):
                                 return
                             downloaded.append(fname)
-                            partial_success = dl(fname, new_info)
+                            partial_success, real_download = dl(fname, new_info)
                             success = success and partial_success
                         info_dict['__postprocessors'] = postprocessors
                         info_dict['__files_to_merge'] = downloaded
+                        # Even if there were no downloads, it is being merged only now
+                        info_dict['__real_download'] = True
                 else:
                     # Just a single file
-                    success = dl(filename, info_dict)
+                    success, real_download = dl(filename, info_dict)
+                    info_dict['__real_download'] = real_download
             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
                 return
diff --git a/youtube_dlc/__init__.py b/youtube_dlc/__init__.py
index 88860b515..a5592a7cb 100644
--- a/youtube_dlc/__init__.py
+++ b/youtube_dlc/__init__.py
@@ -313,6 +313,7 @@ def _real_main(argv=None):
             'path': opts.sponskrub_path,
             'args': opts.sponskrub_args,
             'cut': opts.sponskrub_cut,
+            'force': opts.sponskrub_force,
             'ignoreerror': opts.sponskrub is None,
         })
     # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
diff --git a/youtube_dlc/downloader/common.py b/youtube_dlc/downloader/common.py
index 7d303be1c..a0acb6556 100644
--- a/youtube_dlc/downloader/common.py
+++ b/youtube_dlc/downloader/common.py
@@ -351,7 +351,7 @@ class FileDownloader(object):
                     'status': 'finished',
                     'total_bytes': os.path.getsize(encodeFilename(filename)),
                 })
-                return True
+                return True, False
 
         if subtitle is False:
             min_sleep_interval = self.params.get('sleep_interval')
@@ -372,7 +372,7 @@ class FileDownloader(object):
                     '[download] Sleeping %s seconds...' % (
                         sleep_interval_sub))
                 time.sleep(sleep_interval_sub)
-        return self.real_download(filename, info_dict)
+        return self.real_download(filename, info_dict), True
 
     def real_download(self, filename, info_dict):
         """Real download process. Redefine in subclasses."""
diff --git a/youtube_dlc/options.py b/youtube_dlc/options.py
index e3a0c821d..f3b243050 100644
--- a/youtube_dlc/options.py
+++ b/youtube_dlc/options.py
@@ -890,7 +890,11 @@ def parseOpts(overrideArguments=None):
     extractor.add_option(
         '--sponskrub-cut', default=False,
         action='store_true', dest='sponskrub_cut',
-        help='Cut out the sponsor sections instead of simply marking them (Experimental)')
+        help='Cut out the sponsor sections instead of simply marking them')
+    extractor.add_option(
+        '--sponskrub-force', default=False,
+        action='store_true', dest='sponskrub_force',
+        help='Run sponskrub even if the video was already downloaded')
     extractor.add_option(
         '--sponskrub-location', metavar='PATH',
         dest='sponskrub_path', default='',
diff --git a/youtube_dlc/postprocessor/sponskrub.py b/youtube_dlc/postprocessor/sponskrub.py
index 1d7663a26..8ef612050 100644
--- a/youtube_dlc/postprocessor/sponskrub.py
+++ b/youtube_dlc/postprocessor/sponskrub.py
@@ -17,8 +17,9 @@ class SponSkrubPP(PostProcessor):
     _def_args = []
     _exe_name = 'sponskrub'
 
-    def __init__(self, downloader, path='', args=None, ignoreerror=False, cut=False):
+    def __init__(self, downloader, path='', args=None, ignoreerror=False, cut=False, force=False):
         PostProcessor.__init__(self, downloader)
+        self.force = force
         self.cutout = cut
         self.args = ['-chapter'] if not cut else []
         self.args += self._def_args if args is None else compat_shlex_split(args)
@@ -42,15 +43,19 @@ class SponSkrubPP(PostProcessor):
             return [], information
 
         if information['extractor_key'].lower() != 'youtube':
-            self._downloader.to_screen('[sponskrub] Skipping SponSkrub since it is not a YouTube video')
+            self._downloader.to_screen('[sponskrub] Skipping sponskrub since it is not a YouTube video')
+            return [], information
+        if self.cutout and not self.force and not information.get('__real_download', False):
+            self._downloader.to_screen(
+                '[sponskrub] Skipping sponskrub since the video was already downloaded. '
+                'Use --sponskrub-force to run sponskrub anyway')
             return [], information
 
         self._downloader.to_screen('[sponskrub] Trying to %s sponsor sections' % ('remove' if self.cutout else 'mark'))
         if self.cutout:
-            self._downloader.to_screen(
-                'WARNING: The sponsor segments are cut out from the video based on timestamp. '
-                'This will cause the subtitles to go out of sync. '
-                'Also, if run multiple times, unintended parts of the video could be cut out.')
+            self._downloader.to_screen('WARNING: Cutting out sponsor segments will cause the subtitles to go out of sync.')
+            if not information.get('__real_download', False):
+                self._downloader.to_screen('WARNING: If sponskrub is run multiple times, unintended parts of the video could be cut out.')
 
         filename = information['filepath']
         temp_filename = filename + '.' + self._temp_ext + os.path.splitext(filename)[1]
@@ -64,7 +69,7 @@ class SponSkrubPP(PostProcessor):
         cmd = [encodeArgument(i) for i in cmd]
 
         if self._downloader.params.get('verbose', False):
-            self._downloader.to_screen('[debug] SponSkrub command line: %s' % shell_quote(cmd))
+            self._downloader.to_screen('[debug] sponskrub command line: %s' % shell_quote(cmd))
         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
         stdout, stderr = p.communicate()
 
@@ -75,7 +80,7 @@ class SponSkrubPP(PostProcessor):
         elif p.returncode != 3:  # error code 3 means there was no info about the video
             stderr = stderr.decode('utf-8', 'replace')
             msg = stderr.strip().split('\n')[-1]
-            raise PostProcessingError(msg if msg else 'Sponskrub failed with error code %s!' % p.returncode)
+            raise PostProcessingError(msg if msg else 'sponskrub failed with error code %s!' % p.returncode)
         else:
             self._downloader.to_screen('[sponskrub] No segments in the SponsorBlock database')
         return [], information