diff --git a/test/test_compat.py b/test/test_compat.py index 3aa9c0c51..6cc27d487 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # Allow direct execution +import datetime as dt import os import sys import unittest @@ -12,7 +13,7 @@ import struct from yt_dlp import compat from yt_dlp.compat import urllib # isort: split -from yt_dlp.compat import compat_etree_fromstring, compat_expanduser +from yt_dlp.compat import compat_etree_fromstring, compat_expanduser, compat_datetime_from_timestamp from yt_dlp.compat.urllib.request import getproxies @@ -59,6 +60,45 @@ class TestCompat(unittest.TestCase): def test_struct_unpack(self): self.assertEqual(struct.unpack('!B', b'\x00'), (0,)) + def test_compat_datetime_from_timestamp(self): + self.assertEqual( + compat_datetime_from_timestamp(0), + dt.datetime(1970, 1, 1, 0, 0, 0, tzinfo=dt.timezone.utc)) + self.assertEqual( + compat_datetime_from_timestamp(1), + dt.datetime(1970, 1, 1, 0, 0, 1, tzinfo=dt.timezone.utc)) + self.assertEqual( + compat_datetime_from_timestamp(3600), + dt.datetime(1970, 1, 1, 1, 0, 0, tzinfo=dt.timezone.utc)) + + self.assertEqual( + compat_datetime_from_timestamp(-1), + dt.datetime(1969, 12, 31, 23, 59, 59, tzinfo=dt.timezone.utc)) + self.assertEqual( + compat_datetime_from_timestamp(-86400), + dt.datetime(1969, 12, 31, 0, 0, 0, tzinfo=dt.timezone.utc)) + + self.assertEqual( + compat_datetime_from_timestamp(0.5), + dt.datetime(1970, 1, 1, 0, 0, 0, 500000, tzinfo=dt.timezone.utc)) + self.assertEqual( + compat_datetime_from_timestamp(1.000001), + dt.datetime(1970, 1, 1, 0, 0, 1, 1, tzinfo=dt.timezone.utc)) + self.assertEqual( + compat_datetime_from_timestamp(-1.25), + dt.datetime(1969, 12, 31, 23, 59, 58, 750000, tzinfo=dt.timezone.utc)) + + self.assertEqual( + compat_datetime_from_timestamp(-1577923200), + dt.datetime(1920, 1, 1, 0, 0, 0, tzinfo=dt.timezone.utc)) + self.assertEqual( + compat_datetime_from_timestamp(4102444800), + dt.datetime(2100, 1, 1, 0, 0, 0, tzinfo=dt.timezone.utc)) + + self.assertEqual( + compat_datetime_from_timestamp(173568960000), + dt.datetime(7470, 3, 8, 0, 0, 0, tzinfo=dt.timezone.utc)) + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index dce07c362..9e70ad480 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -101,11 +101,13 @@ from yt_dlp.utils import ( remove_start, render_table, replace_extension, + datetime_round, rot47, sanitize_filename, sanitize_path, sanitize_url, shell_quote, + strftime_or_none, smuggle_url, str_to_int, strip_jsonp, @@ -409,6 +411,25 @@ class TestUtil(unittest.TestCase): self.assertEqual(datetime_from_str('now+1day', precision='hour'), datetime_from_str('now+24hours', precision='auto')) self.assertEqual(datetime_from_str('now+23hours', precision='hour'), datetime_from_str('now+23hours', precision='auto')) + def test_datetime_round(self): + self.assertEqual(datetime_round(dt.datetime.strptime('1820-05-12T01:23:45Z', '%Y-%m-%dT%H:%M:%SZ')), + dt.datetime(1820, 5, 12, tzinfo=dt.timezone.utc)) + self.assertEqual(datetime_round(dt.datetime.strptime('1969-12-31T23:34:45Z', '%Y-%m-%dT%H:%M:%SZ'), 'hour'), + dt.datetime(1970, 1, 1, 0, tzinfo=dt.timezone.utc)) + self.assertEqual(datetime_round(dt.datetime.strptime('2024-12-25T01:23:45Z', '%Y-%m-%dT%H:%M:%SZ'), 'minute'), + dt.datetime(2024, 12, 25, 1, 24, tzinfo=dt.timezone.utc)) + self.assertEqual(datetime_round(dt.datetime.strptime('2024-12-25T01:23:45.123Z', '%Y-%m-%dT%H:%M:%S.%fZ'), 'second'), + dt.datetime(2024, 12, 25, 1, 23, 45, tzinfo=dt.timezone.utc)) + self.assertEqual(datetime_round(dt.datetime.strptime('2024-12-25T01:23:45.678Z', '%Y-%m-%dT%H:%M:%S.%fZ'), 'second'), + dt.datetime(2024, 12, 25, 1, 23, 46, tzinfo=dt.timezone.utc)) + + def test_strftime_or_none(self): + self.assertEqual(strftime_or_none(-4722192000), '18200512') + self.assertEqual(strftime_or_none(0), '19700101') + self.assertEqual(strftime_or_none(1735084800), '20241225') + # Throws OverflowError + self.assertEqual(strftime_or_none(1735084800000), None) + def test_daterange(self): _20century = DateRange('19000101', '20000101') self.assertFalse('17890714' in _20century) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 76a760a5a..08a1dc493 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2717,11 +2717,7 @@ class YoutubeDL: ('modified_timestamp', 'modified_date'), ): if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None: - # Working around out-of-range timestamp values (e.g. negative ones on Windows, - # see http://bugs.python.org/issue1646728) - with contextlib.suppress(ValueError, OverflowError, OSError): - upload_date = dt.datetime.fromtimestamp(info_dict[ts_key], dt.timezone.utc) - info_dict[date_key] = upload_date.strftime('%Y%m%d') + info_dict[date_key] = strftime_or_none(info_dict[ts_key]) if not info_dict.get('release_year'): info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])})) diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py index d77962068..ad1268143 100644 --- a/yt_dlp/compat/__init__.py +++ b/yt_dlp/compat/__init__.py @@ -1,3 +1,4 @@ +import datetime as dt import os import xml.etree.ElementTree as etree @@ -27,6 +28,13 @@ def compat_ord(c): return c if isinstance(c, int) else ord(c) +def compat_datetime_from_timestamp(timestamp): + # Calling dt.datetime.fromtimestamp with negative timestamps throws error in Windows + # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/81708, + # https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642 + return (dt.datetime.fromtimestamp(0, dt.timezone.utc) + dt.timedelta(seconds=timestamp)) + + # Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl # See https://github.com/yt-dlp/yt-dlp/issues/792 # https://docs.python.org/3/library/os.path.html#os.path.expanduser diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 3adc1d6be..6f6d85a7f 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -47,6 +47,7 @@ import xml.etree.ElementTree from . import traversal from ..compat import ( + compat_datetime_from_timestamp, compat_etree_fromstring, compat_expanduser, compat_HTMLParseError, @@ -1376,6 +1377,7 @@ def datetime_round(dt_, precision='day'): if precision == 'microsecond': return dt_ + time_scale = 1_000_000 unit_seconds = { 'day': 86400, 'hour': 3600, @@ -1383,8 +1385,8 @@ def datetime_round(dt_, precision='day'): 'second': 1, } roundto = lambda x, n: ((x + n / 2) // n) * n - timestamp = roundto(calendar.timegm(dt_.timetuple()), unit_seconds[precision]) - return dt.datetime.fromtimestamp(timestamp, dt.timezone.utc) + timestamp = roundto(calendar.timegm(dt_.timetuple()) + dt_.microsecond / time_scale, unit_seconds[precision]) + return compat_datetime_from_timestamp(timestamp) def hyphenate_date(date_str): @@ -2056,18 +2058,13 @@ def strftime_or_none(timestamp, date_format='%Y%m%d', default=None): datetime_object = None try: if isinstance(timestamp, (int, float)): # unix timestamp - # Using naive datetime here can break timestamp() in Windows - # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414 - # Also, dt.datetime.fromtimestamp breaks for negative timestamps - # Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642 - datetime_object = (dt.datetime.fromtimestamp(0, dt.timezone.utc) - + dt.timedelta(seconds=timestamp)) + datetime_object = compat_datetime_from_timestamp(timestamp) elif isinstance(timestamp, str): # assume YYYYMMDD datetime_object = dt.datetime.strptime(timestamp, '%Y%m%d') date_format = re.sub( # Support %s on windows r'(?{int(datetime_object.timestamp())}', date_format) return datetime_object.strftime(date_format) - except (ValueError, TypeError, AttributeError): + except (ValueError, TypeError, AttributeError, OverflowError, OSError): return default