From b505e8517ad2ca8e07d5f9577dfd9a96165beaa0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 1 Sep 2022 13:38:25 +0530 Subject: [PATCH] [extractor/youtube] Fallback regex for nsig code extraction --- yt_dlp/extractor/youtube.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index b1eda0d07..9303557f7 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2661,7 +2661,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): raise ExtractorError('Cannot decrypt nsig without player_url') player_url = urljoin('https://www.youtube.com', player_url) - jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url) + try: + jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url) + except ExtractorError as e: + raise ExtractorError('Unable to extract nsig function code', cause=e) if self.get_param('youtube_print_sig_code'): self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n') @@ -2706,7 +2709,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if func_code: return jsi, player_id, func_code - func_code = jsi.extract_function_code(self._extract_n_function_name(jscode)) + func_name = self._extract_n_function_name(jscode) + + # For redundancy + func_code = self._search_regex( + r'''(?xs)%s\s*=\s*function\s*\((?P[\w$]+)\)\s* + # NB: The end of the regex is intentionally kept strict + {(?P.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name, + jscode, 'nsig function', group=('var', 'code'), default=None) + if func_code: + func_code = ([func_code[0]], func_code[1]) + else: + self.write_debug('Extracting nsig function with jsinterp') + func_code = jsi.extract_function_code(func_name) + self.cache.store('youtube-nsig', player_id, func_code) return jsi, player_id, func_code