From 752cda3880f30a46bed1d27b69188ab93ad1a368 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 3 Jun 2021 23:30:38 +0530 Subject: [PATCH] Fix and refactor `prepare_outtmpl` The following tests would have failed previously: %(id)d %(id)r %(ext)s-%(ext|def)d %(width|)d %(id)r %(height)r %(formats.0)r %s --- test/test_YoutubeDL.py | 121 ++++++++++----- test/test_postprocessors.py | 14 ++ yt_dlp/YoutubeDL.py | 181 ++++++++++------------ yt_dlp/postprocessor/execafterdownload.py | 18 +-- yt_dlp/postprocessor/metadatafromfield.py | 6 +- yt_dlp/utils.py | 16 +- 6 files changed, 194 insertions(+), 162 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 2b3ed8f7b..48015f98b 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -17,7 +17,7 @@ from yt_dlp.compat import compat_str, compat_urllib_error from yt_dlp.extractor import YoutubeIE from yt_dlp.extractor.common import InfoExtractor from yt_dlp.postprocessor.common import PostProcessor -from yt_dlp.utils import ExtractorError, match_filter_func +from yt_dlp.utils import ExtractorError, float_or_none, match_filter_func TEST_URL = 'http://localhost/sample.mp4' @@ -648,56 +648,95 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(test_dict['extractor'], 'Foo') self.assertEqual(test_dict['playlist'], 'funny videos') - def test_prepare_filename(self): - info = { - 'id': '1234', - 'ext': 'mp4', - 'width': None, - 'height': 1080, - 'title1': '$PATH', - 'title2': '%PATH%', - 'timestamp': 1618488000, - 'formats': [{'id': 'id1'}, {'id': 'id2'}] - } + outtmpl_info = { + 'id': '1234', + 'ext': 'mp4', + 'width': None, + 'height': 1080, + 'title1': '$PATH', + 'title2': '%PATH%', + 'timestamp': 1618488000, + 'duration': 100000, + 'playlist_index': 1, + '_last_playlist_index': 100, + 'n_entries': 10, + 'formats': [{'id': 'id1'}, {'id': 'id2'}, {'id': 'id3'}] + } + + def test_prepare_outtmpl(self): + def out(tmpl, **params): + params['outtmpl'] = tmpl + ydl = YoutubeDL(params) + ydl._num_downloads = 1 + outtmpl, tmpl_dict = ydl.prepare_outtmpl(tmpl, self.outtmpl_info) + return outtmpl % tmpl_dict + + self.assertEqual(out('%(id)s.%(ext)s'), '1234.mp4') + self.assertEqual(out('%(duration_string)s'), '27:46:40') + self.assertTrue(float_or_none(out('%(epoch)d'))) + self.assertEqual(out('%(resolution)s'), '1080p') + self.assertEqual(out('%(playlist_index)s'), '001') + self.assertEqual(out('%(autonumber)s'), '00001') + self.assertEqual(out('%(autonumber+2)03d', autonumber_start=3), '005') + self.assertEqual(out('%(autonumber)s', autonumber_size=3), '001') + + self.assertEqual(out('%%'), '%') + self.assertEqual(out('%%%%'), '%%') + self.assertEqual(out('%(invalid@tmpl|def)s', outtmpl_na_placeholder='none'), 'none') + self.assertEqual(out('%()s'), 'NA') + self.assertEqual(out('%s'), '%s') + + NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s' + self.assertEqual(out(NA_TEST_OUTTMPL), 'NA-NA-def-1234.mp4') + self.assertEqual(out(NA_TEST_OUTTMPL, outtmpl_na_placeholder='none'), 'none-none-def-1234.mp4') + self.assertEqual(out(NA_TEST_OUTTMPL, outtmpl_na_placeholder=''), '--def-1234.mp4') + + FMT_TEST_OUTTMPL = '%%(height)%s.%%(ext)s' + self.assertEqual(out(FMT_TEST_OUTTMPL % 's'), '1080.mp4') + self.assertEqual(out(FMT_TEST_OUTTMPL % 'd'), '1080.mp4') + self.assertEqual(out(FMT_TEST_OUTTMPL % '6d'), ' 1080.mp4') + self.assertEqual(out(FMT_TEST_OUTTMPL % '-6d'), '1080 .mp4') + self.assertEqual(out(FMT_TEST_OUTTMPL % '06d'), '001080.mp4') + self.assertEqual(out(FMT_TEST_OUTTMPL % ' 06d'), ' 01080.mp4') + self.assertEqual(out(FMT_TEST_OUTTMPL % ' 06d'), ' 01080.mp4') + self.assertEqual(out(FMT_TEST_OUTTMPL % '0 6d'), ' 01080.mp4') + self.assertEqual(out(FMT_TEST_OUTTMPL % '0 6d'), ' 01080.mp4') + self.assertEqual(out(FMT_TEST_OUTTMPL % ' 0 6d'), ' 01080.mp4') + + self.assertEqual(out('%(id)d'), '1234') + self.assertEqual(out('%(id)d %(id)r'), "1234 '1234'") + self.assertEqual(out('%(ext)s-%(ext|def)d'), 'mp4-def') + self.assertEqual(out('%(width|0)04d'), '0000') + self.assertEqual(out('%(width|)d', outtmpl_na_placeholder='none'), '') + + FORMATS = self.outtmpl_info['formats'] + self.assertEqual(out('%(timestamp+-1000>%H-%M-%S)s'), '11-43-20') + self.assertEqual(out('%(id+1-height+3)05d'), '00158') + self.assertEqual(out('%(width+100)05d'), 'NA') + self.assertEqual(out('%(formats.0)s'), str(FORMATS[0])) + self.assertEqual(out('%(formats.-1.id)s'), str(FORMATS[-1]['id'])) + self.assertEqual(out('%(formats.3)s'), 'NA') + self.assertEqual(out('%(formats.:2:-1)r'), repr(FORMATS[:2:-1])) + self.assertEqual(out('%(formats.0.id.-1+id)f'), '1235.000000') - def fname(templ, na_placeholder='NA'): + def test_prepare_filename(self): + def fname(templ): params = {'outtmpl': templ} - if na_placeholder != 'NA': - params['outtmpl_na_placeholder'] = na_placeholder ydl = YoutubeDL(params) - return ydl.prepare_filename(info) - self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4') - self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4') - NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(id)s.%(ext)s' - # Replace missing fields with 'NA' by default - self.assertEqual(fname(NA_TEST_OUTTMPL), 'NA-NA-1234.mp4') - # Or by provided placeholder - self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder='none'), 'none-none-1234.mp4') - self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '--1234.mp4') - self.assertEqual(fname('%(height)s.%(ext)s'), '1080.mp4') - self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4') - self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4') - self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4') - self.assertEqual(fname('%(height)06d.%(ext)s'), '001080.mp4') - self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4') - self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4') - self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4') - self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4') - self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4') + return ydl.prepare_filename(self.outtmpl_info) + self.assertEqual(fname('%%'), '%') self.assertEqual(fname('%%%%'), '%%') - self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4') + self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4') self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4') self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s') self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4') + self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH') self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%') - self.assertEqual(fname('%(timestamp+-1000>%H-%M-%S)s'), '11-43-20') - self.assertEqual(fname('%(id+1)05d'), '01235') - self.assertEqual(fname('%(width+100)05d'), 'NA') - self.assertEqual(fname('%(formats.0)s').replace("u", ""), "{'id' - 'id1'}") - self.assertEqual(fname('%(formats.-1.id)s'), 'id2') - self.assertEqual(fname('%(formats.2)s'), 'NA') + + self.assertEqual(fname('%(id)r %(height)r'), "'1234' 1080") + self.assertEqual(fname('%(formats.0)r'), "{'id' - 'id1'}") def test_format_note(self): ydl = YoutubeDL() diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index 1f8f375cc..bdc2d93cb 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -8,7 +8,10 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from yt_dlp import YoutubeDL +from yt_dlp.compat import compat_shlex_quote from yt_dlp.postprocessor import ( + ExecAfterDownloadPP, FFmpegThumbnailsConvertorPP, MetadataFromFieldPP, MetadataFromTitlePP, @@ -55,3 +58,14 @@ class TestConvertThumbnail(unittest.TestCase): for _, out in tests: os.remove(file.format(out)) + + +class TestExecAfterDownload(unittest.TestCase): + def test_parse_cmd(self): + pp = ExecAfterDownloadPP(YoutubeDL(), '') + info = {'filepath': 'file name'} + quoted_filepath = compat_shlex_quote(info['filepath']) + + self.assertEqual(pp.parse_cmd('echo', info), 'echo %s' % quoted_filepath) + self.assertEqual(pp.parse_cmd('echo.{}', info), 'echo.%s' % quoted_filepath) + self.assertEqual(pp.parse_cmd('echo "%(filepath)s"', info), 'echo "%s"' % info['filepath']) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index a09c05b41..5b9cc235e 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -64,7 +64,7 @@ from .utils import ( float_or_none, format_bytes, format_field, - FORMAT_RE, + STR_FORMAT_RE, formatSeconds, GeoRestrictedError, int_or_none, @@ -815,52 +815,26 @@ class YoutubeDL(object): def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)""" - template_dict = dict(info_dict) + info_dict = dict(info_dict) na = self.params.get('outtmpl_na_placeholder', 'NA') - # duration_string - template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs + info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs formatSeconds(info_dict['duration'], '-' if sanitize else ':') if info_dict.get('duration', None) is not None else None) - - # epoch - template_dict['epoch'] = int(time.time()) - - # autonumber - autonumber_size = self.params.get('autonumber_size') - if autonumber_size is None: - autonumber_size = 5 - template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads - - # resolution if not defined - if template_dict.get('resolution') is None: - if template_dict.get('width') and template_dict.get('height'): - template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height']) - elif template_dict.get('height'): - template_dict['resolution'] = '%sp' % template_dict['height'] - elif template_dict.get('width'): - template_dict['resolution'] = '%dx?' % template_dict['width'] + info_dict['epoch'] = int(time.time()) + info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads + if info_dict.get('resolution') is None: + info_dict['resolution'] = self.format_resolution(info_dict, default=None) # For fields playlist_index and autonumber convert all occurrences # of %(field)s to %(field)0Nd for backward compatibility field_size_compat_map = { - 'playlist_index': len(str(template_dict.get('_last_playlist_index') or '')), - 'autonumber': autonumber_size, + 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')), + 'autonumber': self.params.get('autonumber_size') or 5, } - FIELD_SIZE_COMPAT_RE = r'(?autonumber|playlist_index)\)s' - mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl) - if mobj: - outtmpl = re.sub( - FIELD_SIZE_COMPAT_RE, - r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')], - outtmpl) - - numeric_fields = list(self._NUMERIC_FIELDS) - if sanitize is None: - sanitize = lambda k, v: v - - EXTERNAL_FORMAT_RE = FORMAT_RE.format('(?P[^)]*)') + + EXTERNAL_FORMAT_RE = STR_FORMAT_RE.format('[^)]*') # Field is of the form key1.key2... # where keys (except first) can be string, int or slice FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*' @@ -876,71 +850,76 @@ class YoutubeDL(object): '+': float.__add__, '-': float.__sub__, } - for outer_mobj in re.finditer(EXTERNAL_FORMAT_RE, outtmpl): - final_key = outer_mobj.group('key') - str_type = outer_mobj.group('type') - value = None - mobj = re.match(INTERNAL_FORMAT_RE, final_key) - if mobj is not None: + tmpl_dict = {} + + def get_value(mdict): + # Object traversal + fields = mdict['fields'].split('.') + value = traverse_dict(info_dict, fields) + # Negative + if mdict['negate']: + value = float_or_none(value) + if value is not None: + value *= -1 + # Do maths + if mdict['maths']: + value = float_or_none(value) + operator = None + for item in MATH_OPERATORS_RE.split(mdict['maths'])[1:]: + if item == '' or value is None: + return None + if operator: + item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1) + offset = float_or_none(item) + if offset is None: + offset = float_or_none(traverse_dict(info_dict, item.split('.'))) + try: + value = operator(value, multiplier * offset) + except (TypeError, ZeroDivisionError): + return None + operator = None + else: + operator = MATH_FUNCTIONS[item] + # Datetime formatting + if mdict['strf_format']: + value = strftime_or_none(value, mdict['strf_format']) + + return value + + def create_key(outer_mobj): + if not outer_mobj.group('has_key'): + return '%{}'.format(outer_mobj.group(0)) + + key = outer_mobj.group('key') + fmt = outer_mobj.group('format') + mobj = re.match(INTERNAL_FORMAT_RE, key) + if mobj is None: + value, default = None, na + else: mobj = mobj.groupdict() - # Object traversal - fields = mobj['fields'].split('.') - value = traverse_dict(template_dict, fields) - # Negative - if mobj['negate']: - value = float_or_none(value) - if value is not None: - value *= -1 - # Do maths - if mobj['maths']: - value = float_or_none(value) - operator = None - for item in MATH_OPERATORS_RE.split(mobj['maths'])[1:]: - if item == '': - value = None - if value is None: - break - if operator: - item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1) - offset = float_or_none(item) - if offset is None: - offset = float_or_none(traverse_dict(template_dict, item.split('.'))) - try: - value = operator(value, multiplier * offset) - except (TypeError, ZeroDivisionError): - value = None - operator = None - else: - operator = MATH_FUNCTIONS[item] - # Datetime formatting - if mobj['strf_format']: - value = strftime_or_none(value, mobj['strf_format']) - # Set default - if value is None and mobj['default'] is not None: - value = mobj['default'] - # Sanitize - if str_type in 'crs' and value is not None: # string - value = sanitize('%{}'.format(str_type) % fields[-1], value) - else: # numeric - numeric_fields.append(final_key) + default = mobj['default'] if mobj['default'] is not None else na + value = get_value(mobj) + + if fmt == 's' and value is not None and key in field_size_compat_map.keys(): + fmt = '0{:d}d'.format(field_size_compat_map[key]) + + value = default if value is None else value + key += '\0%s' % fmt + + if fmt[-1] not in 'crs': # numeric value = float_or_none(value) - if value is not None: - template_dict[final_key] = value - - # Missing numeric fields used together with integer presentation types - # in format specification will break the argument substitution since - # string NA placeholder is returned for missing fields. We will patch - # output template for missing fields to meet string presentation type. - for numeric_field in numeric_fields: - if template_dict.get(numeric_field) is None: - outtmpl = re.sub( - FORMAT_RE.format(re.escape(numeric_field)), - r'%({0})s'.format(numeric_field), outtmpl) - - template_dict = collections.defaultdict(lambda: na, ( - (k, v if isinstance(v, compat_numeric_types) else sanitize(k, v)) - for k, v in template_dict.items() if v is not None)) - return outtmpl, template_dict + if value is None: + value, fmt = default, 's' + if sanitize: + if fmt[-1] == 'r': + # If value is an object, sanitize might convert it to a string + # So we convert it to repr first + value, fmt = repr(value), '%ss' % fmt[:-1] + value = sanitize(key, value) + tmpl_dict[key] = value + return '%({key}){fmt}'.format(key=key, fmt=fmt) + + return re.sub(EXTERNAL_FORMAT_RE, create_key, outtmpl), tmpl_dict def _prepare_filename(self, info_dict, tmpl_type='default'): try: @@ -966,7 +945,7 @@ class YoutubeDL(object): force_ext = OUTTMPL_TYPES.get(tmpl_type) if force_ext is not None: - filename = replace_extension(filename, force_ext, template_dict.get('ext')) + filename = replace_extension(filename, force_ext, info_dict.get('ext')) # https://github.com/blackjack4494/youtube-dlc/issues/85 trim_file_name = self.params.get('trim_file_name', False) diff --git a/yt_dlp/postprocessor/execafterdownload.py b/yt_dlp/postprocessor/execafterdownload.py index 9d68583e7..948b3ffb3 100644 --- a/yt_dlp/postprocessor/execafterdownload.py +++ b/yt_dlp/postprocessor/execafterdownload.py @@ -1,13 +1,11 @@ from __future__ import unicode_literals -import re import subprocess from .common import PostProcessor from ..compat import compat_shlex_quote from ..utils import ( encodeArgument, - FORMAT_RE, PostProcessingError, ) @@ -23,14 +21,14 @@ class ExecAfterDownloadPP(PostProcessor): return 'Exec' def parse_cmd(self, cmd, info): - # If no %(key)s is found, replace {} for backard compatibility - if not re.search(FORMAT_RE.format(r'[^)]*'), cmd): - if '{}' not in cmd: - cmd += ' {}' - return cmd.replace('{}', compat_shlex_quote(info['filepath'])) - - tmpl, info_copy = self._downloader.prepare_outtmpl(cmd, info) - return tmpl % info_copy + tmpl, tmpl_dict = self._downloader.prepare_outtmpl(cmd, info) + if tmpl_dict: # if there are no replacements, tmpl_dict = {} + return tmpl % tmpl_dict + + # If no replacements are found, replace {} for backard compatibility + if '{}' not in cmd: + cmd += ' {}' + return cmd.replace('{}', compat_shlex_quote(info['filepath'])) def run(self, info): cmd = self.parse_cmd(self.exec_cmd, info) diff --git a/yt_dlp/postprocessor/metadatafromfield.py b/yt_dlp/postprocessor/metadatafromfield.py index 1def868e8..8c795586c 100644 --- a/yt_dlp/postprocessor/metadatafromfield.py +++ b/yt_dlp/postprocessor/metadatafromfield.py @@ -54,9 +54,9 @@ class MetadataFromFieldPP(PostProcessor): def run(self, info): for dictn in self._data: - tmpl, info_copy = self._downloader.prepare_outtmpl(dictn['tmpl'], info) - data_to_parse = tmpl % info_copy - self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], tmpl)) + tmpl, tmpl_dict = self._downloader.prepare_outtmpl(dictn['tmpl'], info) + data_to_parse = tmpl % tmpl_dict + self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], dictn['tmpl'])) match = re.search(dictn['regex'], data_to_parse) if match is None: self.report_warning('Could not interpret video %s as "%s"' % (dictn['in'], dictn['out'])) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index dea7d85cd..72fd8a0e7 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4393,15 +4393,17 @@ OUTTMPL_TYPES = { # As of [1] format syntax is: # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting -FORMAT_RE = r'''(?x) +STR_FORMAT_RE = r'''(?x) (?[diouxXeEfFgGcrs%]) # conversion type + (?P\((?P{0})\))? # mapping key + (?P + (?:[#0\-+ ]+)? # conversion flags (optional) + (?:\d+)? # minimum field width (optional) + (?:\.\d+)? # precision (optional) + [hlL]? # length modifier (optional) + [diouxXeEfFgGcrs] # conversion type + ) '''