From 71223bff39551a11b6959a3de2dd9e2f070f3c4f Mon Sep 17 00:00:00 2001 From: Aiur Adept <151766879+aiur-adept@users.noreply.github.com> Date: Thu, 1 Aug 2024 14:18:34 -0400 Subject: [PATCH] [Youtube] Fix nsig extraction for player 20dfca59 (#32891) * dirkf's patch for nsig extraction * add generic search per yt-dlp/yt-dlp/pull/10611 - thx bashonly --------- Co-authored-by: dirkf --- test/test_youtube_signature.py | 4 ++++ youtube_dl/extractor/youtube.py | 32 ++++++++++++++++++++++++++------ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 5b4aa3aa0..1c5f667f5 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -174,6 +174,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js', '7X-he4jjvMx7BCX', 'sViSydX8IHtdWA', ), + ( + 'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js', + '-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw', + ), ] diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 84371ff06..509e374a4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1659,18 +1659,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_n_function_name(self, jscode): func_name, idx = self._search_regex( # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c) - # or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)s + # or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c) + # or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b) # old: .get("n"))&&(b=nfunc[idx](b) # older: .get("n"))&&(b=nfunc(b) r'''(?x) - (?:\(\s*(?P[a-z])\s*=\s*(?: + (?:\((?:[\w$()\s]+,)*?\s*(?P[a-z])\s*=\s*(?: String\s*\.\s*fromCharCode\s*\(\s*110\s*\)| - "n+"\[\s*\+?s*[\w$.]+\s*] - )\s*,(?P[a-z])\s*=\s*[a-z]\s*)? - \.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s* + "n+"\[\s*\+?s*[\w$.]+\s*]| + (?P(?:[\w$]+\s*\.\s*)+n\b(?:(?!&&).)+\)) + )\s* + (?(b1) + &&\s*\(\s*(?P=b)| + (?: + ,(?P[a-z])\s*=\s*[a-z]\s*)? + \.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s* + &&\s*\(\s*(?(c)(?P=c)|(?P=b)) + ) + )\s*=\s* (?P[a-zA-Z_$][\w$]*)(?:\s*\[(?P\d+)\])?\s*\(\s*[\w$]+\s*\) - ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx')) + ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'), + default=(None, None)) + # thx bashonly: yt-dlp/yt-dlp/pull/10611 + if not func_name: + self.report_warning('Falling back to generic n function search') + return self._search_regex( + r'''(?xs) + (?:(?<=[^\w$])|^) # instead of \b, which ignores $ + (?P(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\) + \s*\{(?:(?!};).)+?["']enhanced_except_ + ''', jscode, 'Initial JS player n function name', group='name') if not idx: + self.report_warning('Falling back to generic n function search') return func_name return self._parse_json(self._search_regex(