Allow to select videos to download by their upload dates (related #137)

Only absolute dates.
pull/2/head
Jaime Marquínez Ferrándiz 12 years ago
parent 4c9f7a9988
commit bd55852517

@ -14,6 +14,7 @@ from youtube_dl.utils import timeconvert
from youtube_dl.utils import sanitize_filename from youtube_dl.utils import sanitize_filename
from youtube_dl.utils import unescapeHTML from youtube_dl.utils import unescapeHTML
from youtube_dl.utils import orderedSet from youtube_dl.utils import orderedSet
from youtube_dl.utils import DateRange
if sys.version_info < (3, 0): if sys.version_info < (3, 0):
_compat_str = lambda b: b.decode('unicode-escape') _compat_str = lambda b: b.decode('unicode-escape')
@ -95,6 +96,14 @@ class TestUtil(unittest.TestCase):
def test_unescape_html(self): def test_unescape_html(self):
self.assertEqual(unescapeHTML(_compat_str('%20;')), _compat_str('%20;')) self.assertEqual(unescapeHTML(_compat_str('%20;')), _compat_str('%20;'))
def test_daterange(self):
_20century = DateRange("19000101","20000101")
self.assertFalse("17890714" in _20century)
_ac = DateRange("00010101")
self.assertTrue("19690721" in _ac)
_firstmilenium = DateRange(end="10000101")
self.assertTrue("07110427" in _firstmilenium)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

@ -89,6 +89,7 @@ class FileDownloader(object):
keepvideo: Keep the video file after post-processing keepvideo: Keep the video file after post-processing
min_filesize: Skip files smaller than this size min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size max_filesize: Skip files larger than this size
daterange: A DateRange object, download only if the upload_date is in the range.
""" """
params = None params = None
@ -424,6 +425,11 @@ class FileDownloader(object):
if rejecttitle: if rejecttitle:
if re.search(rejecttitle, title, re.IGNORECASE): if re.search(rejecttitle, title, re.IGNORECASE):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
date = info_dict.get('upload_date', None)
if date is not None:
dateRange = self.params.get('daterange', DateRange())
if date not in dateRange:
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
return None return None
def extract_info(self, url, download = True, ie_name = None): def extract_info(self, url, download = True, ie_name = None):

@ -157,6 +157,9 @@ def parseOpts(overrideArguments=None):
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None) selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None) selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None) selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
authentication.add_option('-u', '--username', authentication.add_option('-u', '--username',
@ -447,6 +450,10 @@ def _real_main(argv=None):
if opts.recodevideo is not None: if opts.recodevideo is not None:
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg']: if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg']:
parser.error(u'invalid video recode format specified') parser.error(u'invalid video recode format specified')
if opts.date is not None:
date = DateRange.day(opts.date)
else:
date = DateRange(opts.dateafter, opts.datebefore)
if sys.version_info < (3,): if sys.version_info < (3,):
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
@ -513,7 +520,8 @@ def _real_main(argv=None):
'test': opts.test, 'test': opts.test,
'keepvideo': opts.keepvideo, 'keepvideo': opts.keepvideo,
'min_filesize': opts.min_filesize, 'min_filesize': opts.min_filesize,
'max_filesize': opts.max_filesize 'max_filesize': opts.max_filesize,
'daterange': date
}) })
if opts.verbose: if opts.verbose:

@ -12,6 +12,7 @@ import traceback
import zlib import zlib
import email.utils import email.utils
import json import json
import datetime
try: try:
import urllib.request as compat_urllib_request import urllib.request as compat_urllib_request
@ -568,3 +569,32 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
https_request = http_request https_request = http_request
https_response = http_response https_response = http_response
def date_from_str(date_str):
"""Return a datetime object from a string in the format YYYYMMDD"""
return datetime.datetime.strptime(date_str, "%Y%m%d").date()
class DateRange(object):
"""Represents a time interval between two dates"""
def __init__(self, start=None, end=None):
"""start and end must be strings in the format accepted by date"""
if start is not None:
self.start = date_from_str(start)
else:
self.start = datetime.datetime.min.date()
if end is not None:
self.end = date_from_str(end)
else:
self.end = datetime.datetime.max.date()
if self.start >= self.end:
raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
@classmethod
def day(cls, day):
"""Returns a range that only contains the given day"""
return cls(day,day)
def __contains__(self, date):
"""Check if the date is in the range"""
date = date_from_str(date)
return self.start <= date and date <= self.end
def __str__(self):
return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())

Loading…
Cancel
Save