From bcda6e49b060e9e8512fcb831f6bcff1eb493a98 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Wed, 25 Dec 2024 09:40:13 +0000 Subject: [PATCH 1/6] [core] Calculate date by `strftime_or_none`; fix seconds in `datetime_round` - Negative datetime is not acceptable on Windows. - Scale the timestamp up 1000000 times to calculate milliseconds. --- test/test_utils.py | 19 +++++++++++++++++++ yt_dlp/YoutubeDL.py | 6 +----- yt_dlp/utils/_utils.py | 23 +++++++++++++++-------- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index b3de14198e..b7c458b74d 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -98,11 +98,13 @@ from yt_dlp.utils import ( remove_start, render_table, replace_extension, + datetime_round, rot47, sanitize_filename, sanitize_path, sanitize_url, shell_quote, + strftime_or_none, smuggle_url, str_to_int, strip_jsonp, @@ -392,6 +394,23 @@ class TestUtil(unittest.TestCase): self.assertEqual(datetime_from_str('now+1day', precision='hour'), datetime_from_str('now+24hours', precision='auto')) self.assertEqual(datetime_from_str('now+23hours', precision='hour'), datetime_from_str('now+23hours', precision='auto')) + def test_datetime_round(self): + self.assertEqual(datetime_round(dt.datetime.strptime('1820-05-12T01:23:45Z', '%Y-%m-%dT%H:%M:%SZ')), + dt.datetime(1820, 5, 12, tzinfo=dt.timezone.utc)) + self.assertEqual(datetime_round(dt.datetime.strptime('1969-12-31T23:34:45Z', '%Y-%m-%dT%H:%M:%SZ'), 'hour'), + dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc)) + self.assertEqual(datetime_round(dt.datetime.strptime('2024-12-25T01:23:45Z', '%Y-%m-%dT%H:%M:%SZ'), 'minute'), + dt.datetime(2024, 12, 25, 1, 24, tzinfo=dt.timezone.utc)) + self.assertEqual(datetime_round(dt.datetime.strptime('2024-12-25T01:23:45.123Z', '%Y-%m-%dT%H:%M:%S.%fZ'), 'second'), + dt.datetime(2024, 12, 25, 1, 23, 45, tzinfo=dt.timezone.utc)) + self.assertEqual(datetime_round(dt.datetime.strptime('2024-12-25T01:23:45.678Z', '%Y-%m-%dT%H:%M:%S.%fZ'), 'second'), + dt.datetime(2024, 12, 25, 1, 23, 46, tzinfo=dt.timezone.utc)) + + def test_strftime_or_none(self): + self.assertEqual(strftime_or_none(-4722192000), '18200512') + self.assertEqual(strftime_or_none(0), '19700101') + self.assertEqual(strftime_or_none(1735084800), '20241225') + def test_daterange(self): _20century = DateRange('19000101', '20000101') self.assertFalse('17890714' in _20century) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 764baf3a00..d07671de57 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2673,11 +2673,7 @@ class YoutubeDL: ('modified_timestamp', 'modified_date'), ): if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None: - # Working around out-of-range timestamp values (e.g. negative ones on Windows, - # see http://bugs.python.org/issue1646728) - with contextlib.suppress(ValueError, OverflowError, OSError): - upload_date = dt.datetime.fromtimestamp(info_dict[ts_key], dt.timezone.utc) - info_dict[date_key] = upload_date.strftime('%Y%m%d') + info_dict[date_key] = strftime_or_none(info_dict[ts_key]) if not info_dict.get('release_year'): info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])})) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 699bf1e7f6..a75af33c5b 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -1364,6 +1364,17 @@ def datetime_add_months(dt_, months): return dt_.replace(year, month, day) +def datetime_from_timestamp(timestamp): + # Working around out-of-range timestamp values (e.g. negative ones on Windows, + # see http://bugs.python.org/issue1646728) + # Using naive datetime here can break timestamp() in Windows + # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414 + # Also, dt.datetime.fromtimestamp breaks for negative timestamps + # Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642 + return (dt.datetime.fromtimestamp(0, dt.timezone.utc) + + dt.timedelta(seconds=timestamp)) + + def datetime_round(dt_, precision='day'): """ Round a datetime object's time to a specific precision @@ -1371,6 +1382,7 @@ def datetime_round(dt_, precision='day'): if precision == 'microsecond': return dt_ + time_scale = 1000000 unit_seconds = { 'day': 86400, 'hour': 3600, @@ -1378,8 +1390,8 @@ def datetime_round(dt_, precision='day'): 'second': 1, } roundto = lambda x, n: ((x + n / 2) // n) * n - timestamp = roundto(calendar.timegm(dt_.timetuple()), unit_seconds[precision]) - return dt.datetime.fromtimestamp(timestamp, dt.timezone.utc) + timestamp = roundto(calendar.timegm(dt_.timetuple()) * time_scale + dt_.microsecond, unit_seconds[precision] * time_scale) / time_scale + return datetime_from_timestamp(timestamp) def hyphenate_date(date_str): @@ -2047,12 +2059,7 @@ def strftime_or_none(timestamp, date_format='%Y%m%d', default=None): datetime_object = None try: if isinstance(timestamp, (int, float)): # unix timestamp - # Using naive datetime here can break timestamp() in Windows - # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414 - # Also, dt.datetime.fromtimestamp breaks for negative timestamps - # Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642 - datetime_object = (dt.datetime.fromtimestamp(0, dt.timezone.utc) - + dt.timedelta(seconds=timestamp)) + datetime_object = datetime_from_timestamp(timestamp) elif isinstance(timestamp, str): # assume YYYYMMDD datetime_object = dt.datetime.strptime(timestamp, '%Y%m%d') date_format = re.sub( # Support %s on windows From 5b6ad82daef68b7f37531a307c36050b0941f1e5 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Wed, 25 Dec 2024 15:02:25 +0000 Subject: [PATCH 2/6] grouping digits with underscores More readable. Since Python 3.6: https://docs.python.org/3/library/functions.html#int Co-authored-by: pukkandan --- yt_dlp/utils/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index a75af33c5b..526c3d9d04 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -1382,7 +1382,7 @@ def datetime_round(dt_, precision='day'): if precision == 'microsecond': return dt_ - time_scale = 1000000 + time_scale = 1_000_000 unit_seconds = { 'day': 86400, 'hour': 3600, From 58f7f5479b35b191e2df5d80389be134f2fd6c69 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Tue, 7 Jan 2025 10:37:38 +0000 Subject: [PATCH 3/6] cleaner formula Co-authored-by: pukkandan --- yt_dlp/utils/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 526c3d9d04..53a0b6b00b 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -1390,7 +1390,7 @@ def datetime_round(dt_, precision='day'): 'second': 1, } roundto = lambda x, n: ((x + n / 2) // n) * n - timestamp = roundto(calendar.timegm(dt_.timetuple()) * time_scale + dt_.microsecond, unit_seconds[precision] * time_scale) / time_scale + timestamp = roundto(calendar.timegm(dt_.timetuple()) + dt_.microsecond / time_scale, unit_seconds[precision]) return datetime_from_timestamp(timestamp) From 9cde8563a65976ceeb786eff98aff8214a2842e0 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Tue, 7 Jan 2025 10:40:17 +0000 Subject: [PATCH 4/6] round to hours --- test/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index b7c458b74d..a847d9ad29 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -398,7 +398,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(datetime_round(dt.datetime.strptime('1820-05-12T01:23:45Z', '%Y-%m-%dT%H:%M:%SZ')), dt.datetime(1820, 5, 12, tzinfo=dt.timezone.utc)) self.assertEqual(datetime_round(dt.datetime.strptime('1969-12-31T23:34:45Z', '%Y-%m-%dT%H:%M:%SZ'), 'hour'), - dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc)) + dt.datetime(1970, 1, 1, 0, tzinfo=dt.timezone.utc)) self.assertEqual(datetime_round(dt.datetime.strptime('2024-12-25T01:23:45Z', '%Y-%m-%dT%H:%M:%SZ'), 'minute'), dt.datetime(2024, 12, 25, 1, 24, tzinfo=dt.timezone.utc)) self.assertEqual(datetime_round(dt.datetime.strptime('2024-12-25T01:23:45.123Z', '%Y-%m-%dT%H:%M:%S.%fZ'), 'second'), From 0fcc7521b76dc779246936fc7a42e437e2546f7e Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Tue, 7 Jan 2025 10:46:22 +0000 Subject: [PATCH 5/6] rewording the comment for now --- yt_dlp/utils/_utils.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 53a0b6b00b..548ebdd635 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -1365,12 +1365,9 @@ def datetime_add_months(dt_, months): def datetime_from_timestamp(timestamp): - # Working around out-of-range timestamp values (e.g. negative ones on Windows, - # see http://bugs.python.org/issue1646728) - # Using naive datetime here can break timestamp() in Windows - # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414 - # Also, dt.datetime.fromtimestamp breaks for negative timestamps - # Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642 + # Calling dt.datetime.fromtimestamp with negative timestamps throws error in Windows + # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414, + # https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642 return (dt.datetime.fromtimestamp(0, dt.timezone.utc) + dt.timedelta(seconds=timestamp)) From 24954d0542cad8244f078209cc7c5e35d8b0f728 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Wed, 29 Jan 2025 13:43:18 +0000 Subject: [PATCH 6/6] the line is not too long --- yt_dlp/utils/_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 2feb54f41b..de61e14fd1 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -1371,8 +1371,7 @@ def datetime_from_timestamp(timestamp): # Calling dt.datetime.fromtimestamp with negative timestamps throws error in Windows # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414, # https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642 - return (dt.datetime.fromtimestamp(0, dt.timezone.utc) - + dt.timedelta(seconds=timestamp)) + return (dt.datetime.fromtimestamp(0, dt.timezone.utc) + dt.timedelta(seconds=timestamp)) def datetime_round(dt_, precision='day'):