From 5b6103e2ae045cfaf6ba8a755741edcd7372de8c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 26 Oct 2020 21:05:55 +0530 Subject: [PATCH] Better Format Selection New format selectors: best*, worst*, bestvideo*, bestaudio*, worstvideo*, worstaudio* Added b,w,v,a as alias for best, worst, video and audio respectively Changed video format sorting to show video only files and video+audio files together. Added options: --video-multistreams, --audio-multistreams, --no-video-multistreams, --no-audio-multistreams --- README.md | 32 ++++++-- youtube_dlc/YoutubeDL.py | 128 +++++++++++++++++--------------- youtube_dlc/__init__.py | 2 + youtube_dlc/extractor/common.py | 4 +- youtube_dlc/extractor/vimeo.py | 11 +-- youtube_dlc/options.py | 16 ++++ 6 files changed, 121 insertions(+), 72 deletions(-) diff --git a/README.md b/README.md index 9d40d2631..a971ac0d8 100644 --- a/README.md +++ b/README.md @@ -381,6 +381,10 @@ I will add some memorable short links to the binaries so you can download them e ## Video Format Options: -f, --format FORMAT Video format code, see the "FORMAT SELECTION" for all the info + --video-multistreams Allow multiple video streams to be merged into a single file (default) + --no-video-multistreams Only one video stream is downloaded for each output file + --audio-multistreams Allow multiple audio streams to be merged into a single file (default) + --no-audio-multistreams Only one audio stream is downloaded for each output file --all-formats Download all available video formats --prefer-free-formats Prefer free video formats unless a specific one is requested @@ -694,12 +698,23 @@ You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, You can also use special names to select particular edge case formats: - - `best`: Select the best quality format represented by a single file with video and audio. - - `worst`: Select the worst quality format represented by a single file with video and audio. - - `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available. - - `worstvideo`: Select the worst quality video-only format. May not be available. - - `bestaudio`: Select the best quality audio only-format. May not be available. - - `worstaudio`: Select the worst quality audio only-format. May not be available. + - `b*`, `best*`: Select the best quality format irrespective of whether it contains video or audio. + - `w*`, `worst*`: Select the worst quality format irrespective of whether it contains video or audio. + + - `b`, `best`: Select the best quality format that contains both video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]` + - `w`, `worst`: Select the worst quality format that contains both video and audio. Equivalent to `worst*[vcodec!=none][acodec!=none]` + + - `bv`, `bestvideo`: Select the best quality video-only format. Equivalent to `best*[acodec=none]` + - `wv`, `worstvideo`: Select the worst quality video-only format. Equivalent to `worst*[acodec=none]` + + - `bv*`, `bestvideo*`: Select the best quality format that contains video. It may also contain audio. Equivalent to `best*[vcodec!=none]` + - `wv*`, `worstvideo*`: Select the worst quality format that contains video. It may also contain audio. Equivalent to `worst*[vcodec!=none]` + + - `ba`, `bestaudio`: Select the best quality audio-only format. Equivalent to `best*[vcodec=none]` + - `wa`, `worstaudio`: Select the worst quality audio-only format. Equivalent to `worst*[vcodec=none]` + + - `ba*`, `bestaudio*`: Select the best quality format that contains audio. It may also contain video. Equivalent to `best*[acodec!=none]` + - `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]` For example, to download the worst quality video-only format you can use `-f worstvideo`. @@ -735,7 +750,7 @@ Note that none of the aforementioned meta fields are guaranteed to be present si Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. -You can merge the video and audio of two formats into a single file using `-f +` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg/avconv. +You can merge the video and audio of multiple formats into a single file using `-f ++...` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg/avconv. If `--no-video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, if `--no-audio-multistreams` is used, all formats with an audio stream except the first one are ignored. For example, `-f bestvideo+best+bestaudio` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download and merge both formats while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`. Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`. @@ -748,6 +763,9 @@ If you want to preserve the old format selection behavior (prior to youtube-dlc Note that on Windows you may need to use double quotes instead of single. ```bash +# Download best format that contains video and if it doesn't already have an audio stream, merge it with best audio-only format +$ youtube-dlc -f 'bestvideo*+bestaudio' --no-audio-multistreams + # Download best mp4 format available or any other best if no mp4 available $ youtube-dlc -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best' diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py index fc351db0d..84e642364 100644 --- a/youtube_dlc/YoutubeDL.py +++ b/youtube_dlc/YoutubeDL.py @@ -163,6 +163,8 @@ class YoutubeDL(object): (or video) as a single JSON line. simulate: Do not download the video files. format: Video format code. See options.py for more information. + allow_multiple_video_streams: Allow multiple video streams to be merged into a single file + allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file outtmpl: Template for output names. restrictfilenames: Do not allow "&" and spaces in file names. trim_file_name: Limit length of filename (extension excluded). @@ -1167,6 +1169,9 @@ class YoutubeDL(object): GROUP = 'GROUP' FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters']) + allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', True), + 'video': self.params.get('allow_multiple_video_streams', True)} + def _parse_filter(tokens): filter_parts = [] for type, string, start, _, _ in tokens: @@ -1265,7 +1270,7 @@ class YoutubeDL(object): return selectors def _build_selector_function(selector): - if isinstance(selector, list): + if isinstance(selector, list): # , fs = [_build_selector_function(s) for s in selector] def selector_function(ctx): @@ -1273,9 +1278,11 @@ class YoutubeDL(object): for format in f(ctx): yield format return selector_function - elif selector.type == GROUP: + + elif selector.type == GROUP: # () selector_function = _build_selector_function(selector.selector) - elif selector.type == PICKFIRST: + + elif selector.type == PICKFIRST: # / fs = [_build_selector_function(s) for s in selector.selector] def selector_function(ctx): @@ -1284,62 +1291,54 @@ class YoutubeDL(object): if picked_formats: return picked_formats return [] - elif selector.type == SINGLE: - format_spec = selector.selector - def selector_function(ctx): - formats = list(ctx['formats']) - if not formats: - return - if format_spec == 'all': - for f in formats: - yield f - elif format_spec in ['best', 'worst', None]: - format_idx = 0 if format_spec == 'worst' else -1 - audiovideo_formats = [ - f for f in formats - if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] - if audiovideo_formats: - yield audiovideo_formats[format_idx] - # for extractors with incomplete formats (audio only (soundcloud) - # or video only (imgur)) we will fallback to best/worst - # {video,audio}-only format - elif ctx['incomplete_formats']: - yield formats[format_idx] - elif format_spec == 'bestaudio': - audio_formats = [ - f for f in formats - if f.get('vcodec') == 'none'] - if audio_formats: - yield audio_formats[-1] - elif format_spec == 'worstaudio': - audio_formats = [ - f for f in formats - if f.get('vcodec') == 'none'] - if audio_formats: - yield audio_formats[0] - elif format_spec == 'bestvideo': - video_formats = [ - f for f in formats - if f.get('acodec') == 'none'] - if video_formats: - yield video_formats[-1] - elif format_spec == 'worstvideo': - video_formats = [ - f for f in formats - if f.get('acodec') == 'none'] - if video_formats: - yield video_formats[0] + elif selector.type == SINGLE: # atom + format_spec = selector.selector if selector.selector is not None else 'best' + + if format_spec == 'all': + def selector_function(ctx): + formats = list(ctx['formats']) + if formats: + for f in formats: + yield f + + else: + format_fallback = False + format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec) + if format_spec_obj is not None: + format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1 + format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False + not_format_type = 'v' if format_type == 'a' else 'a' + format_modified = format_spec_obj.group(3) is not None + + format_fallback = not format_type and not format_modified # for b, w + filter_f = ((lambda f: f.get(format_type + 'codec') != 'none') + if format_type and format_modified # bv*, ba*, wv*, wa* + else (lambda f: f.get(not_format_type + 'codec') == 'none') + if format_type # bv, ba, wv, wa + else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none') + if not format_modified # b, w + else None) # b*, w* else: - extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] - if format_spec in extensions: - filter_f = lambda f: f['ext'] == format_spec - else: - filter_f = lambda f: f['format_id'] == format_spec - matches = list(filter(filter_f, formats)) + format_idx = -1 + filter_f = ((lambda f: f.get('ext') == format_spec) + if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension + else (lambda f: f.get('format_id') == format_spec)) # id + + def selector_function(ctx): + formats = list(ctx['formats']) + if not formats: + return + matches = list(filter(filter_f, formats)) if filter_f is not None else formats if matches: - yield matches[-1] - elif selector.type == MERGE: + yield matches[format_idx] + elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']): + # for extractors with incomplete formats (audio only (soundcloud) + # or video only (imgur)) best/worst will fallback to + # best/worst {video,audio}-only format + yield formats[format_idx] + + elif selector.type == MERGE: # + def _merge(formats_pair): format_1, format_2 = formats_pair @@ -1347,6 +1346,18 @@ class YoutubeDL(object): formats_info.extend(format_1.get('requested_formats', (format_1,))) formats_info.extend(format_2.get('requested_formats', (format_2,))) + if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']: + get_no_more = {"video": False, "audio": False} + for (i, fmt_info) in enumerate(formats_info): + for aud_vid in ["audio", "video"]: + if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none': + if get_no_more[aud_vid]: + formats_info.pop(i) + get_no_more[aud_vid] = True + + if len(formats_info) == 1: + return formats_info[0] + video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none'] audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none'] @@ -1683,6 +1694,7 @@ class YoutubeDL(object): expected=True) if download: + self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download])) if len(formats_to_download) > 1: self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download))) for format in formats_to_download: @@ -2270,8 +2282,8 @@ class YoutubeDL(object): [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)] for f in formats if f.get('preference') is None or f['preference'] >= -1000] - if len(formats) > 1: - table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)' + # if len(formats) > 1: + # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best*)' header_line = ['format code', 'extension', 'resolution', 'note'] self.to_screen( diff --git a/youtube_dlc/__init__.py b/youtube_dlc/__init__.py index 105786bc0..1724cd04c 100644 --- a/youtube_dlc/__init__.py +++ b/youtube_dlc/__init__.py @@ -347,6 +347,8 @@ def _real_main(argv=None): 'simulate': opts.simulate or any_getting, 'skip_download': opts.skip_download, 'format': opts.format, + 'allow_multiple_video_streams': opts.allow_multiple_video_streams, + 'allow_multiple_audio_streams': opts.allow_multiple_audio_streams, 'listformats': opts.listformats, 'outtmpl': outtmpl, 'autonumber_size': opts.autonumber_size, diff --git a/youtube_dlc/extractor/common.py b/youtube_dlc/extractor/common.py index 4b42d699f..da1ba79b4 100644 --- a/youtube_dlc/extractor/common.py +++ b/youtube_dlc/extractor/common.py @@ -1398,8 +1398,8 @@ class InfoExtractor(object): except ValueError: audio_ext_preference = -1 else: - if f.get('acodec') == 'none': # video only - preference -= 40 + # if f.get('acodec') == 'none': # video only + # preference -= 40 if self._downloader.params.get('prefer_free_formats'): ORDER = ['flv', 'mp4', 'webm'] else: diff --git a/youtube_dlc/extractor/vimeo.py b/youtube_dlc/extractor/vimeo.py index 9839657ca..89bf170c4 100644 --- a/youtube_dlc/extractor/vimeo.py +++ b/youtube_dlc/extractor/vimeo.py @@ -181,11 +181,12 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'preference': 1, }) - for f in formats: - if f.get('vcodec') == 'none': - f['preference'] = -50 - elif f.get('acodec') == 'none': - f['preference'] = -40 + # Reduntant code! This is already done in common.py + # for f in formats: + # if f.get('vcodec') == 'none': + # f['preference'] = -50 + # elif f.get('acodec') == 'none': + # f['preference'] = -40 subtitles = {} text_tracks = config['request'].get('text_tracks') diff --git a/youtube_dlc/options.py b/youtube_dlc/options.py index 1d7a7fed2..4cb307719 100644 --- a/youtube_dlc/options.py +++ b/youtube_dlc/options.py @@ -394,6 +394,22 @@ def parseOpts(overrideArguments=None): '-f', '--format', action='store', dest='format', metavar='FORMAT', default=None, help='Video format code, see the "FORMAT SELECTION" for all the info') + video_format.add_option( + '--video-multistreams', + action='store_true', dest='allow_multiple_video_streams', default=True, + help='Allow multiple video streams to be merged into a single file (default)') + video_format.add_option( + '--no-video-multistreams', + action='store_false', dest='allow_multiple_video_streams', + help='Only one video stream is downloaded for each output file') + video_format.add_option( + '--audio-multistreams', + action='store_true', dest='allow_multiple_audio_streams', default=True, + help='Allow multiple audio streams to be merged into a single file (default)') + video_format.add_option( + '--no-audio-multistreams', + action='store_false', dest='allow_multiple_audio_streams', + help='Only one audio stream is downloaded for each output file') video_format.add_option( '--all-formats', action='store_const', dest='format', const='all',