From 1bb83e2d8ca6d2741b411a8a7d21604db406a76b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 4 Nov 2020 03:36:47 +0530 Subject: [PATCH] Bug fixes and flake8 --- README.md | 12 +++-- youtube_dlc/extractor/common.py | 80 ++++++++++++++++++--------------- youtube_dlc/options.py | 2 +- 3 files changed, 51 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index a9619071e..69559c7a2 100644 --- a/README.md +++ b/README.md @@ -712,13 +712,13 @@ You can also use special names to select particular edge case formats: - `bestaudio`: Select the best quality audio only-format. May not be available. - `worstaudio`: Select the worst quality audio only-format. May not be available. -For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recomended to never actually use `worst` and related options. See [sorting formats](#sorting-formats) for more details. +For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recomended to never actually use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps` instead of `-f worst`. See [sorting formats](#sorting-formats) for more details. If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred, for example `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download. If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. -You can merge the video and audio of multiple formats into a single file using `-f +` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg/avconv. +You can merge the video and audio of multiple formats into a single file using `-f ++...` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg/avconv. ## Filtering Formats @@ -768,7 +768,7 @@ You can change the criteria for being considered the `best` by using `-S` (`--fo - `acodec`, `audio_codec`: Audio Codec (`opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `ac3` > `dts` > other > unknown) - `codec`: Equivalent to `vcodec,acodec` - `vext`, `video_ext`: Video Extension (`mp4` > `flv` > `webm` > other > unknown). If `--prefer-free-formats` is used, `webm` is prefered. - - `aext`, `audio_ext`: Video Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other > unknown). If `--prefer-free-formats` is used, the order changes to `opus` > `ogg` > `webm` > `m4a` > `mp3` > `aac`. + - `aext`, `audio_ext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other > unknown). If `--prefer-free-formats` is used, the order changes to `opus` > `ogg` > `webm` > `m4a` > `mp3` > `aac`. - `ext`, `extension`: Equivalent to `vext,aext` - `filesize`: Exact filesize, if know in advance. This will be unavailable for mu38 and DASH formats. - `filesize_approx`: Approximate filesize calculated the manifests @@ -783,11 +783,9 @@ You can change the criteria for being considered the `best` by using `-S` (`--fo - `br`, `bitrate`: Equivalent to using `tbr,vbr,abr` - `samplerate`, `asr`: Audio sample rate in Hz -All fields, unless specified otherwise, are sorted in decending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers the smallest resolution format. Additionally, you can suffix a prefered value for the fields, seperated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two prefered values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB. +All fields, unless specified otherwise, are sorted in decending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers the format with the smallest resolution. Additionally, you can suffix a prefered value for the fields, seperated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two prefered values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB. -The fields `has_video`, `has_audio`, `extractor_preference`, `language_preference`, `quality` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--force-format-sort`. Apart from these, the default order used by youtube-dlc is: `tbr,filesize,vbr,height,width,protocol,vext,abr,aext,fps,filesize_approx,source_preference,format_id`. Note that the extractors may override this default order (currently no extractor does this), but not the user-provided order. - -If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all repects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`. +The fields `has_video`, `has_audio`, `extractor_preference`, `language_preference`, `quality` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--force-format-sort`. Apart from these, the default order used by youtube-dlc is: `tbr,filesize,vbr,height,width,protocol,vext,abr,aext,fps,filesize_approx,source_preference,format_id`. Note that the extractors may override this default order, but not the user-provided order. **Tip**: You can use the `-v -F` to see how the formats have been sorted (worst to best). diff --git a/youtube_dlc/extractor/common.py b/youtube_dlc/extractor/common.py index 96ad258eb..746c6d635 100644 --- a/youtube_dlc/extractor/common.py +++ b/youtube_dlc/extractor/common.py @@ -1359,8 +1359,8 @@ class InfoExtractor(object): regex = r' *((?P\+)?(?P[a-zA-Z0-9_]+)((?P[~:])(?P.*?))?)? *$' default = ('hidden', 'has_video', 'has_audio', 'extractor', 'lang', 'quality', - 'tbr', 'filesize', 'vbr', 'height', 'width', 'protocol', 'vext', - 'abr', 'aext', 'fps', 'filesize_approx','source_preference', 'format_id') + 'tbr', 'filesize', 'vbr', 'height', 'width', 'protocol', 'vext', + 'abr', 'aext', 'fps', 'filesize_approx', 'source_preference', 'format_id') settings = { 'vcodec': {'type': 'ordered', 'regex': True, @@ -1368,11 +1368,11 @@ class InfoExtractor(object): 'acodec': {'type': 'ordered', 'regex': True, 'order': ['opus', 'vorbis', 'aac', 'mp?4a?', 'mp3', 'e?a?c-?3', 'dts', '', None, 'none']}, 'protocol': {'type': 'ordered', 'regex': True, - 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', 'm3u8', '.*dash', '', 'mms|rtsp', 'none', 'f4']}, - 'vext': {'type': 'ordered', 'field': 'ext', + 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', 'm3u8', '.*dash', '', 'mms|rtsp', 'none', 'f4']}, + 'vext': {'type': 'ordered', 'field': 'video_ext', 'order': ('mp4', 'flv', 'webm', '', 'none'), # Why is flv prefered over webm??? 'order_free': ('webm', 'mp4', 'flv', '', 'none')}, - 'aext': {'type': 'ordered', 'field': 'ext', + 'aext': {'type': 'ordered', 'field': 'audio_ext', 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'), 'order_free': ('opus', 'ogg', 'webm', 'm4a', 'mp3', 'aac', '', 'none')}, 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000}, @@ -1393,7 +1393,7 @@ class InfoExtractor(object): 'asr': {'convert': 'float_none'}, 'source_preference': {'convert': 'ignore'}, 'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')}, - 'bitrate': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True}, # equivalent to using tbr? + 'bitrate': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True}, 'filesize_estimate': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'filesize_approx')}, 'extension': {'type': 'combined', 'field': ('vext', 'aext')}, 'dimension': {'type': 'multiple', 'field': ('height', 'width'), 'function': min}, # not named as 'resolution' because such a field exists @@ -1435,7 +1435,7 @@ class InfoExtractor(object): propObj[key] = default return propObj[key] - def _resolve_field_value(self, field, value, convertNone = False): + def _resolve_field_value(self, field, value, convertNone=False): if value is None: if not convertNone: return None @@ -1455,9 +1455,9 @@ class InfoExtractor(object): order_list = order_free if order_free and self._use_free_order else self._get_field_setting(field, 'order') use_regex = self._get_field_setting(field, 'regex') list_length = len(order_list) - empty_pos = order_list.index('') if '' in order_list else list_length+1 + empty_pos = order_list.index('') if '' in order_list else list_length + 1 if use_regex and value is not None: - for (i,regex) in enumerate(order_list): + for (i, regex) in enumerate(order_list): if regex and re.match(regex, value): return list_length - i return list_length - empty_pos # not in list @@ -1472,7 +1472,7 @@ class InfoExtractor(object): def evaluate_params(self, params, sort_extractor): self._use_free_order = params.get('prefer_free_formats', False) - self._sort_user = params.get('format_sort') + self._sort_user = params.get('format_sort', []) self._sort_extractor = sort_extractor def add_item(field, reverse, closest, limit_text): @@ -1492,18 +1492,18 @@ class InfoExtractor(object): self.settings[field] = data sort_list = ( - tuple(field for field in self.default if self._get_field_setting(field, 'forced')) + \ - (tuple() if params.get('format_sort_force') - else tuple(field for field in self.default if self._get_field_setting(field, 'priority'))) + \ - tuple(self._sort_user) + tuple(sort_extractor) + self.default) - + tuple(field for field in self.default if self._get_field_setting(field, 'forced')) + + (tuple() if params.get('format_sort_force', False) + else tuple(field for field in self.default if self._get_field_setting(field, 'priority'))) + + tuple(self._sort_user) + tuple(sort_extractor) + self.default) + for item in sort_list: match = re.match(self.regex, item) if match is None: raise ExtractorError('Invalid format sort string "%s" given by extractor' % item) field = match.group('field') if field is None: - continue + continue if self._get_field_setting(field, 'type') == 'alias': field = self._get_field_setting(field, 'field') reverse = match.group('reverse') is not None @@ -1517,7 +1517,7 @@ class InfoExtractor(object): fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,) limits = limit_text.split(":") if has_multiple_limits else (limit_text,) if has_limit else tuple() limit_count = len(limits) - for i,f in enumerate(fields): + for (i, f) in enumerate(fields): add_item(f, reverse, closest, limits[i] if i < limit_count else limits[0] if has_limit and not has_multiple_limits @@ -1528,11 +1528,11 @@ class InfoExtractor(object): if self._sort_extractor: to_screen('[debug] Sort order given by extractor: %s' % ','.join(self._sort_extractor)) to_screen('[debug] Formats sorted by: %s' % ', '.join(['%s%s%s' % ( - '+' if self._get_field_setting(field, 'reverse') else '', field, - '%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':', - self._get_field_setting(field, 'limit_text'), - self._get_field_setting(field, 'limit')) - if self._get_field_setting(field, 'limit_text') is not None else '') + '+' if self._get_field_setting(field, 'reverse') else '', field, + '%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':', + self._get_field_setting(field, 'limit_text'), + self._get_field_setting(field, 'limit')) + if self._get_field_setting(field, 'limit_text') is not None else '') for field in self._order if self._get_field_setting(field, 'visible')])) def _calculate_field_preference_from_value(self, format, field, type, value): @@ -1543,12 +1543,11 @@ class InfoExtractor(object): if type == 'extractor': maximum = self._get_field_setting(field, 'max') if value is None or (maximum is not None and value >= maximum): - value = 0 + value = 0 elif type == 'boolean': in_list = self._get_field_setting(field, 'in_list') not_in_list = self._get_field_setting(field, 'not_in_list') - value = 0 if ((in_list is None or value in in_list) and - (not_in_list is None or value not in not_in_list)) else -1 + value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1 elif type == 'ordered': value = self._resolve_field_value(field, value, True) @@ -1560,8 +1559,8 @@ class InfoExtractor(object): return ((-10, 0) if value is None else (1, value, 0) if not is_num # if a field has mixed strings and numbers, strings are sorted higher - else (0,-abs(value-limit), value-limit if reverse else limit-value) if closest - else (0, +value, 0) if not reverse and (limit is None or value <= limit) + else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest + else (0, value, 0) if not reverse and (limit is None or value <= limit) else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit else (-1, value, 0)) @@ -1569,14 +1568,19 @@ class InfoExtractor(object): type = self._get_field_setting(field, 'type') # extractor, boolean, ordered, field, multiple get_value = lambda f: format.get(self._get_field_setting(f, 'field')) if type == 'multiple': + type = 'field' # Only 'field' is allowed in multiple for now actual_fields = self._get_field_setting(field, 'field') + def wrapped_function(values): values = tuple(filter(lambda x: x is not None, values)) - return self._get_field_setting(field, 'function')(*values) if values else None + return (self._get_field_setting(field, 'function')(*values) if len(values) > 1 + else values[0] if values + else None) + value = wrapped_function((get_value(f) for f in actual_fields)) else: value = get_value(field) - return self._calculate_field_preference_from_value(format, field, 'field', value) # multiple only works with type 'field' + return self._calculate_field_preference_from_value(format, field, type, value) def calculate_preference(self, format): # Determine missing protocol @@ -1586,26 +1590,32 @@ class InfoExtractor(object): # Determine missing ext if not format.get('ext') and 'url' in format: format['ext'] = determine_ext(format['url']) + if format.get('vcodec') == 'none': + format['audio_ext'] = format['ext'] + format['video_ext'] = 'none' + else: + format['video_ext'] = format['ext'] + format['audio_ext'] = 'none' # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported? # format['preference'] = -1000 # Determine missing bitrates if format.get('tbr') is None: - if format.get('vbr') is not None or format.get('abr') is not None: - format['tbr'] = format.get('vbr',0) + format.get('abr',0) + if format.get('vbr') is not None and format.get('abr') is not None: + format['tbr'] = format.get('vbr', 0) + format.get('abr', 0) else: if format.get('vcodec') != "none" and format.get('vbr') is None: - format['vbr'] = format.get('tbr') - format.get('abr',0) + format['vbr'] = format.get('tbr') - format.get('abr', 0) if format.get('acodec') != "none" and format.get('abr') is None: - format['abr'] = format.get('tbr') - format.get('vbr',0) + format['abr'] = format.get('tbr') - format.get('vbr', 0) return tuple(self._calculate_field_preference(format, field) for field in self._order) - def _sort_formats(self, formats, sort_extractor = []): + def _sort_formats(self, formats, field_preference=[]): if not formats: raise ExtractorError('No video formats found') format_sort = self.FormatSort() # params and to_screen are taken from the downloader - format_sort.evaluate_params(self._downloader.params, sort_extractor) + format_sort.evaluate_params(self._downloader.params, field_preference) if self._downloader.params.get('verbose', False): format_sort.print_verbose_info(self._downloader.to_screen) formats.sort(key=lambda f: format_sort.calculate_preference(f)) diff --git a/youtube_dlc/options.py b/youtube_dlc/options.py index 263028f49..42f56b9be 100644 --- a/youtube_dlc/options.py +++ b/youtube_dlc/options.py @@ -395,7 +395,7 @@ def parseOpts(overrideArguments=None): action='store', dest='format', metavar='FORMAT', default=None, help='Video format code, see "FORMAT SELECTION" for more details') video_format.add_option( - '-S', '--format-sort', + '-S', '--format-sort', dest='format_sort', default=[], action='callback', callback=_comma_separated_values_options_callback, type='str', help='Sort the formats by the fields given, see "Sorting Formats" for more details')