From c1c9a79c49e8656f3244744e6f4e336e47a03206 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 6 Oct 2013 04:27:09 +0200 Subject: [PATCH 1/2] Add basic --download-archive option Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time. When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped. --- youtube_dl/YoutubeDL.py | 32 ++++++++++++++ youtube_dl/__init__.py | 4 ++ youtube_dl/utils.py | 96 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2503fd09b9..1f5f75e309 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -3,6 +3,7 @@ from __future__ import absolute_import +import errno import io import os import re @@ -84,6 +85,9 @@ class YoutubeDL(object): cachedir: Location of the cache files in the filesystem. None to disable filesystem cache. noplaylist: Download single video instead of a playlist if in doubt. + downloadarchive: File name of a file where all downloads are recorded. + Videos already present in the file are not downloaded + again. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -309,6 +313,9 @@ class YoutubeDL(object): dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + if self.in_download_archive(info_dict): + return (u'%(title)s) has already been recorded in archive' + % info_dict) return None def extract_info(self, url, download=True, ie_key=None, extra_info={}): @@ -578,6 +585,8 @@ class YoutubeDL(object): self.report_error(u'postprocessing: %s' % str(err)) return + self.record_download_archive(info_dict) + def download(self, url_list): """Download a given list of URLs.""" if len(url_list) > 1 and self.fixed_template(): @@ -617,3 +626,26 @@ class YoutubeDL(object): os.remove(encodeFilename(filename)) except (IOError, OSError): self.report_warning(u'Unable to remove downloaded video file') + + def in_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return False + vid_id = info_dict['extractor'] + u' ' + info_dict['id'] + try: + with locked_file(fn, 'r', encoding='utf-8') as archive_file: + for line in archive_file: + if line.strip() == vid_id: + return True + except IOError as ioe: + if ioe.errno != errno.ENOENT: + raise + return False + + def record_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return + vid_id = info_dict['extractor'] + u' ' + info_dict['id'] + with locked_file(fn, 'a', encoding='utf-8') as archive_file: + archive_file.write(vid_id + u'\n') diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 03df835f23..a680d7c557 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -188,6 +188,9 @@ def parseOpts(overrideArguments=None): selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None) selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None) selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) + selection.add_option('--download-archive', metavar='FILE', + dest='download_archive', + help='Download only videos not present in the archive file. Record all downloaded videos in it.') authentication.add_option('-u', '--username', @@ -631,6 +634,7 @@ def _real_main(argv=None): 'daterange': date, 'cachedir': opts.cachedir, 'youtube_print_sig_code': opts.youtube_print_sig_code, + 'download_archive': opts.download_archive, }) if opts.verbose: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f5f9cde99b..a463049a4d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -830,3 +830,99 @@ def get_cachedir(params={}): cache_root = os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache')) return params.get('cachedir', os.path.join(cache_root, 'youtube-dl')) + + +# Cross-platform file locking +if sys.platform == 'win32': + import ctypes.wintypes + import msvcrt + + class OVERLAPPED(ctypes.Structure): + _fields_ = [ + ('Internal', ctypes.wintypes.LPVOID), + ('InternalHigh', ctypes.wintypes.LPVOID), + ('Offset', ctypes.wintypes.DWORD), + ('OffsetHigh', ctypes.wintypes.DWORD), + ('hEvent', ctypes.wintypes.HANDLE), + ] + + kernel32 = ctypes.windll.kernel32 + LockFileEx = kernel32.LockFileEx + LockFileEx.argtypes = [ + ctypes.wintypes.HANDLE, # hFile + ctypes.wintypes.DWORD, # dwFlags + ctypes.wintypes.DWORD, # dwReserved + ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow + ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh + ctypes.POINTER(OVERLAPPED) # Overlapped + ] + LockFileEx.restype = ctypes.wintypes.BOOL + UnlockFileEx = kernel32.UnlockFileEx + UnlockFileEx.argtypes = [ + ctypes.wintypes.HANDLE, # hFile + ctypes.wintypes.DWORD, # dwReserved + ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow + ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh + ctypes.POINTER(OVERLAPPED) # Overlapped + ] + UnlockFileEx.restype = ctypes.wintypes.BOOL + whole_low = 0xffffffff + whole_high = 0x7fffffff + + def _lock_file(f, exclusive): + overlapped = OVERLAPPED() + overlapped.Offset = 0 + overlapped.OffsetHigh = 0 + overlapped.hEvent = 0 + f._lock_file_overlapped_p = ctypes.pointer(overlapped) + handle = msvcrt.get_osfhandle(f.fileno()) + if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0, + whole_low, whole_high, f._lock_file_overlapped_p): + raise OSError('Locking file failed: %r' % ctypes.FormatError()) + + def _unlock_file(f): + assert f._lock_file_overlapped_p + handle = msvcrt.get_osfhandle(f.fileno()) + if not UnlockFileEx(handle, 0, + whole_low, whole_high, f._lock_file_overlapped_p): + raise OSError('Unlocking file failed: %r' % ctypes.FormatError()) + +else: + import fcntl + + def _lock_file(f, exclusive): + fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH) + + def _unlock_file(f): + fcntl.lockf(f, fcntl.LOCK_UN) + + +class locked_file(object): + def __init__(self, filename, mode, encoding=None): + assert mode in ['r', 'a', 'w'] + self.f = io.open(filename, mode, encoding=encoding) + self.mode = mode + + def __enter__(self): + exclusive = self.mode != 'r' + try: + _lock_file(self.f, exclusive) + except IOError: + self.f.close() + raise + return self + + def __exit__(self, etype, value, traceback): + try: + _unlock_file(self.f) + finally: + self.f.close() + + def __iter__(self): + return iter(self.f) + + def write(self, *args): + return self.f.write(*args) + + def read(self, *args): + return self.f.read(*args) From ee6c9f95e1e5cf118b0bdf6abc8376bd95bc7dcf Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 6 Oct 2013 16:28:36 +0200 Subject: [PATCH 2/2] Remove superfluous parenthesis --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 1f5f75e309..856e9ac929 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -314,7 +314,7 @@ class YoutubeDL(object): if date not in dateRange: return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) if self.in_download_archive(info_dict): - return (u'%(title)s) has already been recorded in archive' + return (u'%(title)s has already been recorded in archive' % info_dict) return None