From e8e738406a01f2a2ca07ef348b453a49d19d1614 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 4 May 2021 21:24:00 +0530 Subject: [PATCH] Add experimental option `--check-formats` to test the URLs before format selection --- README.md | 2 + yt_dlp/YoutubeDL.py | 94 ++++++++++++++++++++++++----------- yt_dlp/__init__.py | 1 + yt_dlp/downloader/fragment.py | 6 +-- yt_dlp/options.py | 4 ++ 5 files changed, 76 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 5ae596da7..2c231f847 100644 --- a/README.md +++ b/README.md @@ -577,6 +577,8 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t containers irrespective of quality --no-prefer-free-formats Don't give any special preference to free containers (default) + --check-formats Check that the formats selected are + actually downloadable (Experimental) -F, --list-formats List all available formats of requested videos --list-formats-as-table Present the output of -F in tabular form diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 79ba3ef93..c710f7554 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -84,6 +84,7 @@ from .utils import ( PostProcessingError, preferredencoding, prepend_extension, + random_uuidv4, register_socks_protocols, render_table, replace_extension, @@ -1521,6 +1522,8 @@ class YoutubeDL(object): allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False), 'video': self.params.get('allow_multiple_video_streams', False)} + check_formats = self.params.get('check_formats') + def _parse_filter(tokens): filter_parts = [] for type, string, start, _, _ in tokens: @@ -1678,6 +1681,22 @@ class YoutubeDL(object): return new_dict + def _check_formats(formats): + for f in formats: + self.to_screen('[info] Testing format %s' % f['format_id']) + paths = self.params.get('paths', {}) + temp_file = os.path.join( + expand_path(paths.get('home', '').strip()), + expand_path(paths.get('temp', '').strip()), + 'ytdl.%s.f%s.check-format' % (random_uuidv4(), f['format_id'])) + dl, _ = self.dl(temp_file, f, test=True) + if os.path.exists(temp_file): + os.remove(temp_file) + if dl: + yield f + else: + self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id']) + def _build_selector_function(selector): if isinstance(selector, list): # , fs = [_build_selector_function(s) for s in selector] @@ -1708,12 +1727,13 @@ class YoutubeDL(object): if format_spec == 'all': def selector_function(ctx): formats = list(ctx['formats']) - if formats: - for f in formats: - yield f + if check_formats: + formats = _check_formats(formats) + for f in formats: + yield f elif format_spec == 'mergeall': def selector_function(ctx): - formats = list(ctx['formats']) + formats = list(_check_formats(ctx['formats'])) if not formats: return merged_format = formats[-1] @@ -1722,13 +1742,13 @@ class YoutubeDL(object): yield merged_format else: - format_fallback = False + format_fallback, format_reverse, format_idx = False, True, 1 mobj = re.match( r'(?Pbest|worst|b|w)(?Pvideo|audio|v|a)?(?P\*)?(?:\.(?P[1-9]\d*))?$', format_spec) if mobj is not None: format_idx = int_or_none(mobj.group('n'), default=1) - format_idx = format_idx - 1 if mobj.group('bw')[0] == 'w' else -format_idx + format_reverse = mobj.group('bw')[0] == 'b' format_type = (mobj.group('type') or [None])[0] not_format_type = {'v': 'a', 'a': 'v'}.get(format_type) format_modified = mobj.group('mod') is not None @@ -1743,7 +1763,6 @@ class YoutubeDL(object): if not format_modified # b, w else None) # b*, w* else: - format_idx = -1 filter_f = ((lambda f: f.get('ext') == format_spec) if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension else (lambda f: f.get('format_id') == format_spec)) # id @@ -1753,16 +1772,18 @@ class YoutubeDL(object): if not formats: return matches = list(filter(filter_f, formats)) if filter_f is not None else formats - n = len(matches) - if -n <= format_idx < n: - yield matches[format_idx] - elif format_fallback and ctx['incomplete_formats']: + if format_fallback and ctx['incomplete_formats'] and not matches: # for extractors with incomplete formats (audio only (soundcloud) # or video only (imgur)) best/worst will fallback to # best/worst {video,audio}-only format - n = len(formats) - if -n <= format_idx < n: - yield formats[format_idx] + matches = formats + if format_reverse: + matches = matches[::-1] + if check_formats: + matches = list(itertools.islice(_check_formats(matches), format_idx)) + n = len(matches) + if -n <= format_idx - 1 < n: + yield matches[format_idx - 1] elif selector.type == MERGE: # + selector_1, selector_2 = map(_build_selector_function, selector.selector) @@ -2179,6 +2200,34 @@ class YoutubeDL(object): self.post_extract(info_dict) self.to_stdout(json.dumps(info_dict, default=repr)) + def dl(self, name, info, subtitle=False, test=False): + + if test: + verbose = self.params.get('verbose') + params = { + 'test': True, + 'quiet': not verbose, + 'verbose': verbose, + 'noprogress': not verbose, + 'nopart': True, + 'skip_unavailable_fragments': False, + 'keep_fragments': False, + 'overwrites': True, + '_no_ytdl_file': True, + } + else: + params = self.params + fd = get_suitable_downloader(info, params)(self, params) + if not test: + for ph in self._progress_hooks: + fd.add_progress_hook(ph) + if self.params.get('verbose'): + self.to_screen('[debug] Invoking downloader on %r' % info.get('url')) + new_info = dict(info) + if new_info.get('http_headers') is None: + new_info['http_headers'] = self._calc_headers(new_info) + return fd.download(name, new_info, subtitle) + def process_info(self, info_dict): """Process a single resolved IE result.""" @@ -2264,17 +2313,6 @@ class YoutubeDL(object): self.report_error('Cannot write annotations file: ' + annofn) return - def dl(name, info, subtitle=False): - fd = get_suitable_downloader(info, self.params)(self, self.params) - for ph in self._progress_hooks: - fd.add_progress_hook(ph) - if self.params.get('verbose'): - self.to_screen('[debug] Invoking downloader on %r' % info.get('url')) - new_info = dict(info) - if new_info.get('http_headers') is None: - new_info['http_headers'] = self._calc_headers(new_info) - return fd.download(name, new_info, subtitle) - subtitles_are_requested = any([self.params.get('writesubtitles', False), self.params.get('writeautomaticsub')]) @@ -2307,7 +2345,7 @@ class YoutubeDL(object): return else: try: - dl(sub_filename, sub_info.copy(), subtitle=True) + self.dl(sub_filename, sub_info.copy(), subtitle=True) sub_info['filepath'] = sub_filename files_to_move[sub_filename] = sub_filename_final except tuple([ExtractorError, IOError, OSError, ValueError] + network_exceptions) as err: @@ -2493,7 +2531,7 @@ class YoutubeDL(object): if not self._ensure_dir_exists(fname): return downloaded.append(fname) - partial_success, real_download = dl(fname, new_info) + partial_success, real_download = self.dl(fname, new_info) info_dict['__real_download'] = info_dict['__real_download'] or real_download success = success and partial_success if merger.available and not self.params.get('allow_unplayable_formats'): @@ -2508,7 +2546,7 @@ class YoutubeDL(object): # Just a single file dl_filename = existing_file(full_filename, temp_filename) if dl_filename is None: - success, real_download = dl(temp_filename, info_dict) + success, real_download = self.dl(temp_filename, info_dict) info_dict['__real_download'] = real_download dl_filename = dl_filename or temp_filename diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index e6e568780..c88cca7ae 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -471,6 +471,7 @@ def _real_main(argv=None): 'format_sort_force': opts.format_sort_force, 'allow_multiple_video_streams': opts.allow_multiple_video_streams, 'allow_multiple_audio_streams': opts.allow_multiple_audio_streams, + 'check_formats': opts.check_formats, 'listformats': opts.listformats, 'listformats_table': opts.listformats_table, 'outtmpl': outtmpl, diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index fadd0dfc5..6d6d28483 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -31,6 +31,7 @@ class FragmentFD(FileDownloader): Skip unavailable fragments (DASH and hlsnative only) keep_fragments: Keep downloaded fragments on disk after downloading is finished + _no_ytdl_file: Don't use .ytdl file For each incomplete fragment download yt-dlp keeps on disk a special bookkeeping file with download state and metadata (in future such files will @@ -69,9 +70,8 @@ class FragmentFD(FileDownloader): self._prepare_frag_download(ctx) self._start_frag_download(ctx) - @staticmethod - def __do_ytdl_file(ctx): - return not ctx['live'] and not ctx['tmpfilename'] == '-' + def __do_ytdl_file(self, ctx): + return not ctx['live'] and not ctx['tmpfilename'] == '-' and not self.params.get('_no_ytdl_file') def _read_ytdl_file(self, ctx): assert 'ytdl_corrupt' not in ctx diff --git a/yt_dlp/options.py b/yt_dlp/options.py index a3fd9d122..7505e8c78 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -502,6 +502,10 @@ def parseOpts(overrideArguments=None): '--no-prefer-free-formats', action='store_true', dest='prefer_free_formats', default=False, help="Don't give any special preference to free containers (default)") + video_format.add_option( + '--check-formats', + action='store_true', dest='check_formats', default=False, + help="Check that the formats selected are actually downloadable (Experimental)") video_format.add_option( '-F', '--list-formats', action='store_true', dest='listformats',