From 9eb3124219dd7bbe9c9c15c5479cc5a57587d883 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Tue, 7 Jan 2025 00:23:24 -0500 Subject: [PATCH 01/16] WIP commit --- yt_dlp/YoutubeDL.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 764baf3a00..5856a9cbd4 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1424,8 +1424,20 @@ class YoutubeDL: return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs): + print(outtmpl) outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) - return self.escape_outtmpl(outtmpl) % info_dict + ext_suffix = '.%(ext\x00s)s' # not sure why this has null char + suffix = '' + if outtmpl.endswith(ext_suffix): + outtmpl = outtmpl[:-len(ext_suffix)] + suffix = ext_suffix % info_dict + outtmpl = self.escape_outtmpl(outtmpl) + filename = outtmpl % info_dict + encoding = sys.getfilesystemencoding() # make option to override + filename = filename.encode(encoding) + filename = filename[:255 - len('.flac.part')] # make option to override + filename = filename.decode(encoding, 'ignore') + return filename + suffix @_catch_unsafe_extension_error def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None): @@ -1435,6 +1447,7 @@ class YoutubeDL: try: outtmpl = self._outtmpl_expandpath(outtmpl) filename = self.evaluate_outtmpl(outtmpl, info_dict, True) + print(filename) if not filename: return None From 8c01f4a92d7b0e101c72506d9a30c127f28dccad Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Tue, 7 Jan 2025 13:44:12 -0500 Subject: [PATCH 02/16] Complete implementation --- yt_dlp/YoutubeDL.py | 53 ++++++++++++++++++++++++++++++++++++------ yt_dlp/__init__.py | 2 ++ yt_dlp/options.py | 8 +++++++ yt_dlp/utils/_utils.py | 6 +++++ 4 files changed, 62 insertions(+), 7 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 5856a9cbd4..eb2e5211aa 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -70,6 +70,7 @@ from .update import ( ) from .utils import ( DEFAULT_OUTTMPL, + DEFAULT_MAX_FILE_NAME, IDENTITY, LINK_TEMPLATES, MEDIA_EXTENSIONS, @@ -266,6 +267,8 @@ class YoutubeDL: outtmpl_na_placeholder: Placeholder for unavailable meta fields. restrictfilenames: Do not allow "&" and spaces in file names trim_file_name: Limit length of filename (extension excluded) + max_file_name: Limit length of filename (extension included) + filesystem_encoding: Encoding to use when calculating filename length in bytes windowsfilenames: True: Force filenames to be Windows compatible False: Sanitize filenames only minimally This option has no effect when running on Windows @@ -1424,7 +1427,10 @@ class YoutubeDL: return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs): - print(outtmpl) + outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) + return self.escape_outtmpl(outtmpl) % info_dict + + def evaluate_outtmpl_for_filename(self, outtmpl, info_dict, *args, **kwargs): outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) ext_suffix = '.%(ext\x00s)s' # not sure why this has null char suffix = '' @@ -1433,10 +1439,44 @@ class YoutubeDL: suffix = ext_suffix % info_dict outtmpl = self.escape_outtmpl(outtmpl) filename = outtmpl % info_dict - encoding = sys.getfilesystemencoding() # make option to override - filename = filename.encode(encoding) - filename = filename[:255 - len('.flac.part')] # make option to override - filename = filename.decode(encoding, 'ignore') + + def parse_max_file_name(max_file_name: str): + try: + max_length = int(max_file_name[:-1]) + except ValueError: + raise ValueError('Invalid --max-filename-length specified') + + if max_file_name[-1].lower() == 'c': + return 'c', max_length + elif max_file_name[-1].lower() == 'b': + return 'b', max_length + else: + raise ValueError("--max-filename-length must end with 'b' or 'c'") + + max_file_name = self.params.get('max_file_name', DEFAULT_MAX_FILE_NAME) + mode, max_file_name = parse_max_file_name(max_file_name) + encoding = self.params.get('filesystem_encoding', sys.getfilesystemencoding()) + + # extension may be replaced later + if mode == 'b': + max_suffix_len = len('.annotations.xml'.encode(encoding)) + else: + max_suffix_len = len('.annotations.xml') + + def trim_filename(name: str, length: int): + if length < 1: + raise ValueError('Cannot trim filename to such short length') + if mode == 'b': + name = name.encode(encoding) + name = name[:length] + return name.decode(encoding, 'ignore') + else: + return name[:length] + + # only trim last component of path - assume the directories are valid names + head, tail = os.path.split(filename) + tail = trim_filename(tail, max_file_name - max_suffix_len) + filename = os.path.join(head, tail) return filename + suffix @_catch_unsafe_extension_error @@ -1446,8 +1486,7 @@ class YoutubeDL: outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default']) try: outtmpl = self._outtmpl_expandpath(outtmpl) - filename = self.evaluate_outtmpl(outtmpl, info_dict, True) - print(filename) + filename = self.evaluate_outtmpl_for_filename(outtmpl, info_dict, True) if not filename: return None diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 20111175b1..00817b0e5e 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -886,6 +886,8 @@ def parse_options(argv=None): 'max_downloads': opts.max_downloads, 'prefer_free_formats': opts.prefer_free_formats, 'trim_file_name': opts.trim_file_name, + 'max_file_name': opts.max_file_name, + 'filesystem_encoding': opts.filesystem_encoding, 'verbose': opts.verbose, 'dump_intermediate_pages': opts.dump_intermediate_pages, 'write_pages': opts.write_pages, diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 06b65e0eac..7025928a05 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1380,6 +1380,14 @@ def create_parser(): '--trim-filenames', '--trim-file-names', metavar='LENGTH', dest='trim_file_name', default=0, type=int, help='Limit the filename length (excluding extension) to the specified number of characters') + filesystem.add_option( + '--max-filename-length', metavar='LENGTH', + dest='max_file_name', + help='Limit the filename length (including extension) to the specified number of characters or bytes') + filesystem.add_option( + '--filesystem-encoding', metavar='ENCODING', + dest='filesystem_encoding', + help='Override filesystem encoding used when calculating filename length in bytes') filesystem.add_option( '-w', '--no-overwrites', action='store_false', dest='overwrites', default=None, diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 699bf1e7f6..90172125cd 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -2852,6 +2852,12 @@ OUTTMPL_TYPES = { 'pl_infojson': 'info.json', } +# https://en.m.wikipedia.org/wiki/Comparison_of_file_systems#Limits +if platform.system() in ('Darwin', 'Windows'): + DEFAULT_MAX_FILE_NAME = '255c' +else: + DEFAULT_MAX_FILE_NAME = '255b' + # As of [1] format syntax is: # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting From fa6e2f83aa7633fc1de86f8156f613157195c98f Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Tue, 7 Jan 2025 14:42:31 -0500 Subject: [PATCH 03/16] Use --trim-filenames option --- yt_dlp/YoutubeDL.py | 34 +++++++++++++--------------------- yt_dlp/__init__.py | 1 - yt_dlp/options.py | 8 ++------ yt_dlp/utils/_utils.py | 4 ++-- 4 files changed, 17 insertions(+), 30 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index eb2e5211aa..9b58d80395 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -267,7 +267,6 @@ class YoutubeDL: outtmpl_na_placeholder: Placeholder for unavailable meta fields. restrictfilenames: Do not allow "&" and spaces in file names trim_file_name: Limit length of filename (extension excluded) - max_file_name: Limit length of filename (extension included) filesystem_encoding: Encoding to use when calculating filename length in bytes windowsfilenames: True: Force filenames to be Windows compatible False: Sanitize filenames only minimally @@ -1441,31 +1440,31 @@ class YoutubeDL: filename = outtmpl % info_dict def parse_max_file_name(max_file_name: str): + # old --trim-filenames format + try: + return 'c', int(max_file_name) + except ValueError: + pass + try: max_length = int(max_file_name[:-1]) except ValueError: - raise ValueError('Invalid --max-filename-length specified') + raise ValueError('Invalid --trim-filenames specified') if max_file_name[-1].lower() == 'c': return 'c', max_length elif max_file_name[-1].lower() == 'b': return 'b', max_length else: - raise ValueError("--max-filename-length must end with 'b' or 'c'") + raise ValueError("--trim-filenames must end with 'b' or 'c'") - max_file_name = self.params.get('max_file_name', DEFAULT_MAX_FILE_NAME) + max_file_name = self.params.get('trim_file_name') or DEFAULT_MAX_FILE_NAME mode, max_file_name = parse_max_file_name(max_file_name) - encoding = self.params.get('filesystem_encoding', sys.getfilesystemencoding()) - - # extension may be replaced later - if mode == 'b': - max_suffix_len = len('.annotations.xml'.encode(encoding)) - else: - max_suffix_len = len('.annotations.xml') + if max_file_name < 1: + raise ValueError('Invalid --trim-filenames specified') + encoding = self.params.get('filesystem_encoding') or sys.getfilesystemencoding() def trim_filename(name: str, length: int): - if length < 1: - raise ValueError('Cannot trim filename to such short length') if mode == 'b': name = name.encode(encoding) name = name[:length] @@ -1475,7 +1474,7 @@ class YoutubeDL: # only trim last component of path - assume the directories are valid names head, tail = os.path.split(filename) - tail = trim_filename(tail, max_file_name - max_suffix_len) + tail = trim_filename(tail, max_file_name) filename = os.path.join(head, tail) return filename + suffix @@ -1498,13 +1497,6 @@ class YoutubeDL: force_ext = OUTTMPL_TYPES[tmpl_type] if force_ext: filename = replace_extension(filename, force_ext, info_dict.get('ext')) - - # https://github.com/blackjack4494/youtube-dlc/issues/85 - trim_file_name = self.params.get('trim_file_name', False) - if trim_file_name: - no_ext, *ext = filename.rsplit('.', 2) - filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.') - return filename except ValueError as err: self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 00817b0e5e..69d1e9ed36 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -886,7 +886,6 @@ def parse_options(argv=None): 'max_downloads': opts.max_downloads, 'prefer_free_formats': opts.prefer_free_formats, 'trim_file_name': opts.trim_file_name, - 'max_file_name': opts.max_file_name, 'filesystem_encoding': opts.filesystem_encoding, 'verbose': opts.verbose, 'dump_intermediate_pages': opts.dump_intermediate_pages, diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 7025928a05..49387a8216 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1378,12 +1378,8 @@ def create_parser(): help='Sanitize filenames only minimally') filesystem.add_option( '--trim-filenames', '--trim-file-names', metavar='LENGTH', - dest='trim_file_name', default=0, type=int, - help='Limit the filename length (excluding extension) to the specified number of characters') - filesystem.add_option( - '--max-filename-length', metavar='LENGTH', - dest='max_file_name', - help='Limit the filename length (including extension) to the specified number of characters or bytes') + dest='trim_file_name', + help='Limit the filename length (excluding extension) to the specified number of characters or bytes') filesystem.add_option( '--filesystem-encoding', metavar='ENCODING', dest='filesystem_encoding', diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 90172125cd..15e92dfecf 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -2854,9 +2854,9 @@ OUTTMPL_TYPES = { # https://en.m.wikipedia.org/wiki/Comparison_of_file_systems#Limits if platform.system() in ('Darwin', 'Windows'): - DEFAULT_MAX_FILE_NAME = '255c' + DEFAULT_MAX_FILE_NAME = f'{255 - len(".annotations.xml")}c' else: - DEFAULT_MAX_FILE_NAME = '255b' + DEFAULT_MAX_FILE_NAME = f'{255 - len(".annotations.xml".encode(sys.getfilesystemencoding()))}b' # As of [1] format syntax is: # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type From 1e82fe3bfed438732c8735f8612a1e3e34b5f596 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Tue, 7 Jan 2025 14:50:44 -0500 Subject: [PATCH 04/16] Allow --trim-filenames=0 and update tests --- test/test_YoutubeDL.py | 4 ++-- yt_dlp/YoutubeDL.py | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 6b022a7eaa..48164a7b5a 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -717,7 +717,7 @@ class TestYoutubeDL(unittest.TestCase): ydl._num_downloads = 1 self.assertEqual(ydl.validate_outtmpl(tmpl), None) - out = ydl.evaluate_outtmpl(tmpl, info or self.outtmpl_info) + out = ydl.evaluate_outtmpl_for_filename(tmpl, info or self.outtmpl_info) fname = ydl.prepare_filename(info or self.outtmpl_info) if not isinstance(expected, (list, tuple)): @@ -791,7 +791,7 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(got_dict.get(info_field), expected, info_field) return True - test('%()j', (expect_same_infodict, None)) + test('%()j', (expect_same_infodict, None), trim_file_name=0) # NA placeholder NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s' diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9b58d80395..54cd2b66ab 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1458,10 +1458,16 @@ class YoutubeDL: else: raise ValueError("--trim-filenames must end with 'b' or 'c'") - max_file_name = self.params.get('trim_file_name') or DEFAULT_MAX_FILE_NAME + max_file_name = self.params.get('trim_file_name') + if max_file_name is None: + max_file_name = DEFAULT_MAX_FILE_NAME mode, max_file_name = parse_max_file_name(max_file_name) - if max_file_name < 1: + if max_file_name < 0: raise ValueError('Invalid --trim-filenames specified') + if max_file_name == 0: + # no maximum + return filename + suffix + encoding = self.params.get('filesystem_encoding') or sys.getfilesystemencoding() def trim_filename(name: str, length: int): From a6068e695cb768a17d772185a20fbd77f73b8e2a Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Tue, 7 Jan 2025 15:19:52 -0500 Subject: [PATCH 05/16] Update tests --- test/test_YoutubeDL.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 48164a7b5a..bc3b63233d 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -697,6 +697,7 @@ class TestYoutubeDL(unittest.TestCase): 'title3': 'foo/bar\\test', 'title4': 'foo "bar" test', 'title5': 'Ñéí 𝐀', + 'title6': 'あ' * 10, 'timestamp': 1618488000, 'duration': 100000, 'playlist_index': 1, @@ -712,6 +713,8 @@ class TestYoutubeDL(unittest.TestCase): def test_prepare_outtmpl_and_filename(self): def test(tmpl, expected, *, info=None, **params): + if 'trim_file_name' not in params: + params['trim_file_name'] = 0 # disable trimming params['outtmpl'] = tmpl ydl = FakeYDL(params) ydl._num_downloads = 1 @@ -791,7 +794,7 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(got_dict.get(info_field), expected, info_field) return True - test('%()j', (expect_same_infodict, None), trim_file_name=0) + test('%()j', (expect_same_infodict, None)) # NA placeholder NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s' @@ -924,6 +927,16 @@ class TestYoutubeDL(unittest.TestCase): test('%(title3)s', ('foo/bar\\test', 'fooβ§Έbarβ§Ήtest')) test('folder/%(title3)s', ('folder/foo/bar\\test', f'folder{os.path.sep}fooβ§Έbarβ§Ήtest')) + # --trim-filenames + test('%(title6)s.%(ext)s', 'あ' * 10 + '.mp4') + test('%(title6)s.%(ext)s', 'あ' * 3 + '.mp4', trim_file_name='3c') + test('%(title6)s.%(ext)s', 'あ' * 3 + '.mp4', trim_file_name='9b', filesystem_encoding='utf-8') + test('%(title6)s.%(ext)s', 'あ' * 3 + '.mp4', trim_file_name='10b', filesystem_encoding='utf-8') + test('%(title6)s.%(ext)s', 'あ' * 3 + '.mp4', trim_file_name='11b', filesystem_encoding='utf-8') + test('%(title6)s.%(ext)s', 'あ' * 4 + '.mp4', trim_file_name='12b', filesystem_encoding='utf-8') + test('%(title6)s.%(ext)s', 'あ' * 6 + '.mp4', trim_file_name='12b', filesystem_encoding='utf-16le') + test('folder/%(title6)s.%(ext)s', f'folder{os.path.sep}あああ.mp4', trim_file_name='3c') + def test_format_note(self): ydl = YoutubeDL() self.assertEqual(ydl._format_note({}), '') From 200d27682d8c624c308eb3a3ad814fe499dbe71f Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Tue, 7 Jan 2025 15:49:50 -0500 Subject: [PATCH 06/16] Refactor --- test/test_YoutubeDL.py | 2 +- yt_dlp/YoutubeDL.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index bc3b63233d..4348cac6f0 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -720,7 +720,7 @@ class TestYoutubeDL(unittest.TestCase): ydl._num_downloads = 1 self.assertEqual(ydl.validate_outtmpl(tmpl), None) - out = ydl.evaluate_outtmpl_for_filename(tmpl, info or self.outtmpl_info) + out = ydl.evaluate_outtmpl(tmpl, info or self.outtmpl_info, trim_filename=True) fname = ydl.prepare_filename(info or self.outtmpl_info) if not isinstance(expected, (list, tuple)): diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 54cd2b66ab..2e95039ed0 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1425,12 +1425,11 @@ class YoutubeDL: return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT - def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs): + def evaluate_outtmpl(self, outtmpl, info_dict, *args, trim_filename=False, **kwargs): outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) - return self.escape_outtmpl(outtmpl) % info_dict + if not trim_filename: + return self.escape_outtmpl(outtmpl) % info_dict - def evaluate_outtmpl_for_filename(self, outtmpl, info_dict, *args, **kwargs): - outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) ext_suffix = '.%(ext\x00s)s' # not sure why this has null char suffix = '' if outtmpl.endswith(ext_suffix): @@ -1491,7 +1490,7 @@ class YoutubeDL: outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default']) try: outtmpl = self._outtmpl_expandpath(outtmpl) - filename = self.evaluate_outtmpl_for_filename(outtmpl, info_dict, True) + filename = self.evaluate_outtmpl(outtmpl, info_dict, True, trim_filename=True) if not filename: return None From ba9cfb23f2f6a640de4bd50079c02f9400d90210 Mon Sep 17 00:00:00 2001 From: gavin <32209764+7x11x13@users.noreply.github.com> Date: Wed, 8 Jan 2025 11:57:48 -0500 Subject: [PATCH 07/16] Update yt_dlp/YoutubeDL.py Co-authored-by: pukkandan --- yt_dlp/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2e95039ed0..d1b4a7c25b 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1430,7 +1430,7 @@ class YoutubeDL: if not trim_filename: return self.escape_outtmpl(outtmpl) % info_dict - ext_suffix = '.%(ext\x00s)s' # not sure why this has null char + ext_suffix = '.%(ext\0s)s' suffix = '' if outtmpl.endswith(ext_suffix): outtmpl = outtmpl[:-len(ext_suffix)] From 8949a4fef0a359d88bda56007a8804e7cd9e4b10 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Wed, 8 Jan 2025 13:03:15 -0500 Subject: [PATCH 08/16] Refactor, make default notrim --- yt_dlp/YoutubeDL.py | 27 ++------------------------- yt_dlp/__init__.py | 17 +++++++++++++++++ yt_dlp/options.py | 2 +- yt_dlp/utils/_utils.py | 6 ------ 4 files changed, 20 insertions(+), 32 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d1b4a7c25b..aec9f7a00e 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -70,7 +70,6 @@ from .update import ( ) from .utils import ( DEFAULT_OUTTMPL, - DEFAULT_MAX_FILE_NAME, IDENTITY, LINK_TEMPLATES, MEDIA_EXTENSIONS, @@ -267,6 +266,7 @@ class YoutubeDL: outtmpl_na_placeholder: Placeholder for unavailable meta fields. restrictfilenames: Do not allow "&" and spaces in file names trim_file_name: Limit length of filename (extension excluded) + trim_file_name_mode: Mode of filename trimming ('c' for characters or 'b' for bytes) filesystem_encoding: Encoding to use when calculating filename length in bytes windowsfilenames: True: Force filenames to be Windows compatible False: Sanitize filenames only minimally @@ -1438,31 +1438,8 @@ class YoutubeDL: outtmpl = self.escape_outtmpl(outtmpl) filename = outtmpl % info_dict - def parse_max_file_name(max_file_name: str): - # old --trim-filenames format - try: - return 'c', int(max_file_name) - except ValueError: - pass - - try: - max_length = int(max_file_name[:-1]) - except ValueError: - raise ValueError('Invalid --trim-filenames specified') - - if max_file_name[-1].lower() == 'c': - return 'c', max_length - elif max_file_name[-1].lower() == 'b': - return 'b', max_length - else: - raise ValueError("--trim-filenames must end with 'b' or 'c'") - max_file_name = self.params.get('trim_file_name') - if max_file_name is None: - max_file_name = DEFAULT_MAX_FILE_NAME - mode, max_file_name = parse_max_file_name(max_file_name) - if max_file_name < 0: - raise ValueError('Invalid --trim-filenames specified') + mode = self.params.get('trim_file_name_mode') if max_file_name == 0: # no maximum return filename + suffix diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 69d1e9ed36..b22c4c1dd5 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -1,3 +1,4 @@ +import codecs import sys if sys.version_info < (3, 9): @@ -429,6 +430,21 @@ def validate_options(opts): } # Other options + opts.trim_file_name_mode = 'c' + if opts.trim_file_name is not None: + mobj = re.match(r'(?:(?P\d+)(?Pb|c)?|notrim)', opts.trim_file_name) + validate(mobj, 'trim filenames', opts.trim_file_name) + if opts.trim_file_name == 'notrim': + opts.trim_file_name = 0 + else: + opts.trim_file_name = int(mobj.group('length')) + opts.trim_file_name_mode = mobj.group('mode') or 'c' + if opts.filesystem_encoding is not None: + try: + codecs.lookup(opts.filesystem_encoding) + except LookupError: + raise ValueError(f'Invalid filesystem encoding: {opts.filesystem_encoding}') + if opts.playlist_items is not None: try: tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items)) @@ -886,6 +902,7 @@ def parse_options(argv=None): 'max_downloads': opts.max_downloads, 'prefer_free_formats': opts.prefer_free_formats, 'trim_file_name': opts.trim_file_name, + 'trim_file_name_mode': opts.trim_file_name_mode, 'filesystem_encoding': opts.filesystem_encoding, 'verbose': opts.verbose, 'dump_intermediate_pages': opts.dump_intermediate_pages, diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 49387a8216..ba1b55b14e 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1378,7 +1378,7 @@ def create_parser(): help='Sanitize filenames only minimally') filesystem.add_option( '--trim-filenames', '--trim-file-names', metavar='LENGTH', - dest='trim_file_name', + dest='trim_file_name', default='notrim', help='Limit the filename length (excluding extension) to the specified number of characters or bytes') filesystem.add_option( '--filesystem-encoding', metavar='ENCODING', diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 15e92dfecf..699bf1e7f6 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -2852,12 +2852,6 @@ OUTTMPL_TYPES = { 'pl_infojson': 'info.json', } -# https://en.m.wikipedia.org/wiki/Comparison_of_file_systems#Limits -if platform.system() in ('Darwin', 'Windows'): - DEFAULT_MAX_FILE_NAME = f'{255 - len(".annotations.xml")}c' -else: - DEFAULT_MAX_FILE_NAME = f'{255 - len(".annotations.xml".encode(sys.getfilesystemencoding()))}b' - # As of [1] format syntax is: # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting From 25dca22cb7b8a16d49fb4b4457688be805e4e53c Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Wed, 8 Jan 2025 13:18:43 -0500 Subject: [PATCH 09/16] Trim all path parts --- yt_dlp/YoutubeDL.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index aec9f7a00e..72250fb348 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -12,6 +12,7 @@ import json import locale import operator import os +from pathlib import Path import random import re import shutil @@ -1446,18 +1447,15 @@ class YoutubeDL: encoding = self.params.get('filesystem_encoding') or sys.getfilesystemencoding() - def trim_filename(name: str, length: int): + def trim_filename(name: str): if mode == 'b': name = name.encode(encoding) - name = name[:length] + name = name[:max_file_name] return name.decode(encoding, 'ignore') else: - return name[:length] + return name[:max_file_name] - # only trim last component of path - assume the directories are valid names - head, tail = os.path.split(filename) - tail = trim_filename(tail, max_file_name) - filename = os.path.join(head, tail) + filename = os.path.join(*map(trim_filename, Path(filename).parts)) return filename + suffix @_catch_unsafe_extension_error From ac69f9474b3e6f27f5563ded45cf99a420b1dd58 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Wed, 8 Jan 2025 13:29:18 -0500 Subject: [PATCH 10/16] Refactor, update tests --- test/test_YoutubeDL.py | 4 ++-- yt_dlp/YoutubeDL.py | 10 +++++++--- yt_dlp/__init__.py | 12 ++---------- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 4348cac6f0..d65cc11539 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -714,7 +714,7 @@ class TestYoutubeDL(unittest.TestCase): def test_prepare_outtmpl_and_filename(self): def test(tmpl, expected, *, info=None, **params): if 'trim_file_name' not in params: - params['trim_file_name'] = 0 # disable trimming + params['trim_file_name'] = 'notrim' # disable trimming params['outtmpl'] = tmpl ydl = FakeYDL(params) ydl._num_downloads = 1 @@ -935,7 +935,7 @@ class TestYoutubeDL(unittest.TestCase): test('%(title6)s.%(ext)s', 'あ' * 3 + '.mp4', trim_file_name='11b', filesystem_encoding='utf-8') test('%(title6)s.%(ext)s', 'あ' * 4 + '.mp4', trim_file_name='12b', filesystem_encoding='utf-8') test('%(title6)s.%(ext)s', 'あ' * 6 + '.mp4', trim_file_name='12b', filesystem_encoding='utf-16le') - test('folder/%(title6)s.%(ext)s', f'folder{os.path.sep}あああ.mp4', trim_file_name='3c') + test('folder/%(title6)s.%(ext)s', f'fol{os.path.sep}あああ.mp4', trim_file_name='3c') def test_format_note(self): ydl = YoutubeDL() diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 72250fb348..2875560752 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -267,7 +267,6 @@ class YoutubeDL: outtmpl_na_placeholder: Placeholder for unavailable meta fields. restrictfilenames: Do not allow "&" and spaces in file names trim_file_name: Limit length of filename (extension excluded) - trim_file_name_mode: Mode of filename trimming ('c' for characters or 'b' for bytes) filesystem_encoding: Encoding to use when calculating filename length in bytes windowsfilenames: True: Force filenames to be Windows compatible False: Sanitize filenames only minimally @@ -1439,8 +1438,13 @@ class YoutubeDL: outtmpl = self.escape_outtmpl(outtmpl) filename = outtmpl % info_dict - max_file_name = self.params.get('trim_file_name') - mode = self.params.get('trim_file_name_mode') + def parse_trim_file_name(trim_file_name): + if trim_file_name is None or trim_file_name == 'notrim': + return 0, None + mobj = re.match(r'(?:(?P\d+)(?Pb|c)?|notrim)', trim_file_name) + return int(mobj.group('length')), mobj.group('mode') or 'c' + + max_file_name, mode = parse_trim_file_name(self.params.get('trim_file_name')) if max_file_name == 0: # no maximum return filename + suffix diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index b22c4c1dd5..69fb2c7e5d 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -430,15 +430,8 @@ def validate_options(opts): } # Other options - opts.trim_file_name_mode = 'c' - if opts.trim_file_name is not None: - mobj = re.match(r'(?:(?P\d+)(?Pb|c)?|notrim)', opts.trim_file_name) - validate(mobj, 'trim filenames', opts.trim_file_name) - if opts.trim_file_name == 'notrim': - opts.trim_file_name = 0 - else: - opts.trim_file_name = int(mobj.group('length')) - opts.trim_file_name_mode = mobj.group('mode') or 'c' + validate_regex('trim filenames', opts.trim_file_name, r'(?:\d+[bc]?|notrim)') + if opts.filesystem_encoding is not None: try: codecs.lookup(opts.filesystem_encoding) @@ -902,7 +895,6 @@ def parse_options(argv=None): 'max_downloads': opts.max_downloads, 'prefer_free_formats': opts.prefer_free_formats, 'trim_file_name': opts.trim_file_name, - 'trim_file_name_mode': opts.trim_file_name_mode, 'filesystem_encoding': opts.filesystem_encoding, 'verbose': opts.verbose, 'dump_intermediate_pages': opts.dump_intermediate_pages, From 8063b142d98dca27cdd5eb104d13b312b8557897 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Wed, 8 Jan 2025 15:00:02 -0500 Subject: [PATCH 11/16] Remove --filesystem-encoding option --- test/test_YoutubeDL.py | 15 ++++++++++----- yt_dlp/YoutubeDL.py | 4 ++-- yt_dlp/__init__.py | 8 -------- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index d65cc11539..bb7d81484b 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -2,6 +2,7 @@ # Allow direct execution import os +import platform import sys import unittest from unittest.mock import patch @@ -930,11 +931,15 @@ class TestYoutubeDL(unittest.TestCase): # --trim-filenames test('%(title6)s.%(ext)s', 'あ' * 10 + '.mp4') test('%(title6)s.%(ext)s', 'あ' * 3 + '.mp4', trim_file_name='3c') - test('%(title6)s.%(ext)s', 'あ' * 3 + '.mp4', trim_file_name='9b', filesystem_encoding='utf-8') - test('%(title6)s.%(ext)s', 'あ' * 3 + '.mp4', trim_file_name='10b', filesystem_encoding='utf-8') - test('%(title6)s.%(ext)s', 'あ' * 3 + '.mp4', trim_file_name='11b', filesystem_encoding='utf-8') - test('%(title6)s.%(ext)s', 'あ' * 4 + '.mp4', trim_file_name='12b', filesystem_encoding='utf-8') - test('%(title6)s.%(ext)s', 'あ' * 6 + '.mp4', trim_file_name='12b', filesystem_encoding='utf-16le') + if sys.getfilesystemencoding() == 'utf-8' and platform.system() != 'Windows': + test('%(title6)s.%(ext)s', 'あ' * 3 + '.mp4', trim_file_name='9b') + test('%(title6)s.%(ext)s', 'あ' * 3 + '.mp4', trim_file_name='10b') + test('%(title6)s.%(ext)s', 'あ' * 3 + '.mp4', trim_file_name='11b') + test('%(title6)s.%(ext)s', 'あ' * 4 + '.mp4', trim_file_name='12b') + elif platform.system() == 'Windows': + test('%(title6)s.%(ext)s', 'あ' * 4 + '.mp4', trim_file_name='8b') + test('%(title6)s.%(ext)s', 'あ' * 4 + '.mp4', trim_file_name='9b') + test('%(title6)s.%(ext)s', 'あ' * 5 + '.mp4', trim_file_name='10b') test('folder/%(title6)s.%(ext)s', f'fol{os.path.sep}あああ.mp4', trim_file_name='3c') def test_format_note(self): diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2875560752..2c8d54d86a 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -13,6 +13,7 @@ import locale import operator import os from pathlib import Path +import platform import random import re import shutil @@ -267,7 +268,6 @@ class YoutubeDL: outtmpl_na_placeholder: Placeholder for unavailable meta fields. restrictfilenames: Do not allow "&" and spaces in file names trim_file_name: Limit length of filename (extension excluded) - filesystem_encoding: Encoding to use when calculating filename length in bytes windowsfilenames: True: Force filenames to be Windows compatible False: Sanitize filenames only minimally This option has no effect when running on Windows @@ -1449,7 +1449,7 @@ class YoutubeDL: # no maximum return filename + suffix - encoding = self.params.get('filesystem_encoding') or sys.getfilesystemencoding() + encoding = sys.getfilesystemencoding() if platform.system() != 'Windows' else 'utf-16-le' def trim_filename(name: str): if mode == 'b': diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 69fb2c7e5d..e49e73fef9 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -1,4 +1,3 @@ -import codecs import sys if sys.version_info < (3, 9): @@ -432,12 +431,6 @@ def validate_options(opts): # Other options validate_regex('trim filenames', opts.trim_file_name, r'(?:\d+[bc]?|notrim)') - if opts.filesystem_encoding is not None: - try: - codecs.lookup(opts.filesystem_encoding) - except LookupError: - raise ValueError(f'Invalid filesystem encoding: {opts.filesystem_encoding}') - if opts.playlist_items is not None: try: tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items)) @@ -895,7 +888,6 @@ def parse_options(argv=None): 'max_downloads': opts.max_downloads, 'prefer_free_formats': opts.prefer_free_formats, 'trim_file_name': opts.trim_file_name, - 'filesystem_encoding': opts.filesystem_encoding, 'verbose': opts.verbose, 'dump_intermediate_pages': opts.dump_intermediate_pages, 'write_pages': opts.write_pages, From 5c1f6e732927765d1463de991dab52837d2fc393 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Wed, 8 Jan 2025 15:06:14 -0500 Subject: [PATCH 12/16] Remove --filesystem-encoding --- yt_dlp/options.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index ba1b55b14e..fa6ceb873e 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1380,10 +1380,6 @@ def create_parser(): '--trim-filenames', '--trim-file-names', metavar='LENGTH', dest='trim_file_name', default='notrim', help='Limit the filename length (excluding extension) to the specified number of characters or bytes') - filesystem.add_option( - '--filesystem-encoding', metavar='ENCODING', - dest='filesystem_encoding', - help='Override filesystem encoding used when calculating filename length in bytes') filesystem.add_option( '-w', '--no-overwrites', action='store_false', dest='overwrites', default=None, From d482cee11dfd76329b5bfead7d5b92142c18ec0a Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Mon, 17 Feb 2025 10:22:49 -0500 Subject: [PATCH 13/16] Rename 'notrim' option to 'none' to be consistent with other options --- test/test_YoutubeDL.py | 2 +- yt_dlp/YoutubeDL.py | 4 ++-- yt_dlp/__init__.py | 2 +- yt_dlp/options.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index bb7d81484b..c2325d2709 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -715,7 +715,7 @@ class TestYoutubeDL(unittest.TestCase): def test_prepare_outtmpl_and_filename(self): def test(tmpl, expected, *, info=None, **params): if 'trim_file_name' not in params: - params['trim_file_name'] = 'notrim' # disable trimming + params['trim_file_name'] = 'none' # disable trimming params['outtmpl'] = tmpl ydl = FakeYDL(params) ydl._num_downloads = 1 diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2c8d54d86a..c47ad84c72 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1439,9 +1439,9 @@ class YoutubeDL: filename = outtmpl % info_dict def parse_trim_file_name(trim_file_name): - if trim_file_name is None or trim_file_name == 'notrim': + if trim_file_name is None or trim_file_name == 'none': return 0, None - mobj = re.match(r'(?:(?P\d+)(?Pb|c)?|notrim)', trim_file_name) + mobj = re.match(r'(?:(?P\d+)(?Pb|c)?|none)', trim_file_name) return int(mobj.group('length')), mobj.group('mode') or 'c' max_file_name, mode = parse_trim_file_name(self.params.get('trim_file_name')) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index e49e73fef9..3f63a6b548 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -429,7 +429,7 @@ def validate_options(opts): } # Other options - validate_regex('trim filenames', opts.trim_file_name, r'(?:\d+[bc]?|notrim)') + validate_regex('trim filenames', opts.trim_file_name, r'(?:\d+[bc]?|none)') if opts.playlist_items is not None: try: diff --git a/yt_dlp/options.py b/yt_dlp/options.py index fa6ceb873e..5820bc7a10 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1378,7 +1378,7 @@ def create_parser(): help='Sanitize filenames only minimally') filesystem.add_option( '--trim-filenames', '--trim-file-names', metavar='LENGTH', - dest='trim_file_name', default='notrim', + dest='trim_file_name', default='none', help='Limit the filename length (excluding extension) to the specified number of characters or bytes') filesystem.add_option( '-w', '--no-overwrites', From 1139bd8b1437382a69a3450a0d1b12c6a9c65587 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Sun, 2 Mar 2025 13:54:24 -0500 Subject: [PATCH 14/16] fmt --- yt_dlp/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 517601c6d1..a44c1682b6 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -435,7 +435,7 @@ def validate_options(opts): opts.plugin_dirs = opts.plugin_dirs if opts.plugin_dirs is None: opts.plugin_dirs = ['default'] - + validate_regex('trim filenames', opts.trim_file_name, r'(?:\d+[bc]?|none)') if opts.playlist_items is not None: From e9aec455d200cfe915853694a1595dc781e59567 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Fri, 14 Mar 2025 15:12:25 -0400 Subject: [PATCH 15/16] Fix error when evaluating '.' outtmpl --- yt_dlp/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index eeac6c8c0d..4669b469ea 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1476,7 +1476,7 @@ class YoutubeDL: else: return name[:max_file_name] - filename = os.path.join(*map(trim_filename, Path(filename).parts)) + filename = os.path.join(*map(trim_filename, Path(filename).parts) or '.') return filename + suffix @_catch_unsafe_extension_error From 134f604982eca7c1b95e4015a68e06732eb732e3 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Fri, 14 Mar 2025 15:13:21 -0400 Subject: [PATCH 16/16] oops --- yt_dlp/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4669b469ea..752b67c599 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1476,7 +1476,7 @@ class YoutubeDL: else: return name[:max_file_name] - filename = os.path.join(*map(trim_filename, Path(filename).parts) or '.') + filename = os.path.join(*map(trim_filename, Path(filename).parts or '.')) return filename + suffix @_catch_unsafe_extension_error