diff --git a/README.md b/README.md index f8c99ace40..89d32d115d 100644 --- a/README.md +++ b/README.md @@ -1782,6 +1782,7 @@ The following extractors use this feature: * `data_sync_id`: Overrides the account Data Sync ID used in Innertube API requests. This may be needed if you are using an account with `youtube:player_skip=webpage,configs` or `youtubetab:skip=webpage` * `visitor_data`: Overrides the Visitor Data used in Innertube API requests. This should be used with `player_skip=webpage,configs` and without cookies. Note: this may have adverse effects if used improperly. If a session from a browser is wanted, you should pass cookies instead (which contain the Visitor ID) * `po_token`: Proof of Origin (PO) Token(s) to use for requesting video playback. Comma seperated list of PO Tokens in the format `CLIENT+PO_TOKEN`, e.g. `youtube:po_token=web+XXX,android+YYY` +* `missing_pot`: Do not skip formats that require a PO Token but are missing one. These formats may fail immediately or during download with HTTP Error 403. #### youtubetab (YouTube playlists, channels, feeds, etc.) * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 0d3963116e..22c44d29a1 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -214,6 +214,7 @@ INNERTUBE_CLIENTS = { }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, + 'REQUIRE_PO_TOKEN': True, 'REQUIRE_JS_PLAYER': False, }, # This client now requires sign-in for every video @@ -3973,13 +3974,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ) require_po_token = self._get_default_ytcfg(client).get('REQUIRE_PO_TOKEN') - if not po_token and require_po_token: + if not po_token and require_po_token and 'missing_pot' in self._configuration_arg('formats'): self.report_warning( f'No PO Token provided for {client} client, ' - f'which is required for working {client} formats. ' - f'You can manually pass a PO Token for this client with ' - f'--extractor-args "youtube:po_token={client}+XXX"', - only_once=True) + f'which may be required for working {client} formats. This client will be deprioritized.', only_once=True) deprioritize_pr = True pr = initial_pr if client == 'web' else None @@ -4053,6 +4051,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): or (live_status == 'post_live' and (duration or 0) > 2 * 3600)): return live_status + def _report_pot_format_skipped(self, video_id, client_name, proto): + msg = ( + f'{video_id}: {client_name} client {proto} formats require a PO Token which was not provided. ' + 'They will be skipped as they may yield HTTP Error 403. ' + f'You can manually pass a PO Token for this client with --extractor-args "youtube:po_token={client_name}+XXX. ' + 'For more information, refer to https://github.com/yt-dlp/yt-dlp/wiki/Extractors#po-token-guide . ' + 'To enable these broken formats anyway, pass --extractor-args "youtube:formats=missing_pot".') + + # Only raise a warning for non-default clients, to not confuse users. + # iOS HLS formats still work without PO Token, so we don't need to warn about them. + if client_name in (*self._DEFAULT_CLIENTS, *self._DEFAULT_AUTHED_CLIENTS): + self.write_debug(msg, only_once=True) + else: + self.report_warning(msg, only_once=True) + def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration): CHUNK_SIZE = 10 << 20 PREFERRED_LANG_VALUE = 10 @@ -4179,11 +4192,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): fmt_url = update_url_query(fmt_url, {'pot': po_token}) # Clients that require PO Token return videoplayback URLs that may return 403 - is_broken = (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN')) - if is_broken: - self.report_warning( - f'{video_id}: {client_name} client formats require a PO Token which was not provided. ' - 'They will be deprioritized as they may yield HTTP Error 403', only_once=True) + require_po_token = (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN')) + if require_po_token and 'missing_pot' not in self._configuration_arg('formats'): + self._report_pot_format_skipped(video_id, client_name, 'https') + continue name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or '' fps = int_or_none(fmt.get('fps')) or 0 @@ -4196,7 +4208,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): name, fmt.get('isDrc') and 'DRC', try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()), try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()), - is_damaged and 'DAMAGED', is_broken and 'BROKEN', + is_damaged and 'DAMAGED', require_po_token and 'MISSING POT', (self.get_param('verbose') or all_formats) and short_client_name(client_name), delim=', '), # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372 @@ -4213,7 +4225,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None, 'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1, # Strictly de-prioritize broken, damaged and 3gp formats - 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None, + 'preference': -20 if require_po_token else -10 if is_damaged else -2 if itag == '17' else None, } mime_mobj = re.match( r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '') @@ -4271,10 +4283,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Clients that require PO Token return videoplayback URLs that may return 403 # hls does not currently require PO Token if (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN')) and proto != 'hls': - self.report_warning( - f'{video_id}: {client_name} client {proto} formats require a PO Token which was not provided. ' - 'They will be deprioritized as they may yield HTTP Error 403', only_once=True) - f['format_note'] = join_nonempty(f.get('format_note'), 'BROKEN', delim=' ') + if 'missing_pot' not in self._configuration_arg('formats'): + self._report_pot_format_skipped(video_id, client_name, 'https') + return False + f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ') f['source_preference'] -= 20 if itag and all_formats: