From dbc08fba83a0e6c5215e8cec97eb4076f85eee5f Mon Sep 17 00:00:00 2001 From: dirkf Date: Tue, 6 Aug 2024 20:44:30 +0100 Subject: [PATCH 1/2] [jsinterp] Improve slice implementation for player b12cc44b Partly taken from yt-dlp/yt-dlp#10664, thx seproDev Fixes #32896 --- test/test_jsinterp.py | 28 ++++++++++++++++++++++++++++ test/test_youtube_signature.py | 4 ++++ youtube_dl/jsinterp.py | 13 ++++++++++--- 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 104e766be36..c7a4f2cbf23 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -425,6 +425,34 @@ def test_split(self): self._test(jsi, [''], args=['', '-']) self._test(jsi, [], args=['', '']) + def test_slice(self): + self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8]) + self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0)}', [0, 1, 2, 3, 4, 5, 6, 7, 8]) + self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(5)}', [5, 6, 7, 8]) + self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(99)}', []) + self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-2)}', [7, 8]) + self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-99)}', [0, 1, 2, 3, 4, 5, 6, 7, 8]) + self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 0)}', []) + self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, 0)}', []) + self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 1)}', [0]) + self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(3, 6)}', [3, 4, 5]) + self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, -1)}', [1, 2, 3, 4, 5, 6, 7]) + self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-1, 1)}', []) + self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-3, -1)}', [6, 7]) + self._test('function f(){return "012345678".slice()}', '012345678') + self._test('function f(){return "012345678".slice(0)}', '012345678') + self._test('function f(){return "012345678".slice(5)}', '5678') + self._test('function f(){return "012345678".slice(99)}', '') + self._test('function f(){return "012345678".slice(-2)}', '78') + self._test('function f(){return "012345678".slice(-99)}', '012345678') + self._test('function f(){return "012345678".slice(0, 0)}', '') + self._test('function f(){return "012345678".slice(1, 0)}', '') + self._test('function f(){return "012345678".slice(0, 1)}', '0') + self._test('function f(){return "012345678".slice(3, 6)}', '345') + self._test('function f(){return "012345678".slice(1, -1)}', '1234567') + self._test('function f(){return "012345678".slice(-1, 1)}', '') + self._test('function f(){return "012345678".slice(-3, -1)}', '67') + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 1c5f667f57a..56e92fac5df 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -178,6 +178,10 @@ 'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js', '-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw', ), + ( + 'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js', + 'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw', + ), ] diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 949f77775e8..a616ad070b2 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -925,9 +925,16 @@ def eval_method(variable, member): obj.reverse() return obj elif member == 'slice': - assertion(isinstance(obj, list), 'must be applied on a list') - assertion(len(argvals) == 1, 'takes exactly one argument') - return obj[argvals[0]:] + assertion(isinstance(obj, (list, compat_str)), 'must be applied on a list or string') + # From [1]: + # .slice() - like [:] + # .slice(n) - like [n:] (not [slice(n)] + # .slice(m, n) - like [m:n] or [slice(m, n)] + # [1] https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/slice + assertion(len(argvals) <= 2, 'takes between 0 and 2 arguments') + if len(argvals) < 2: + argvals += (None,) + return obj[slice(*argvals)] elif member == 'splice': assertion(isinstance(obj, list), 'must be applied on a list') assertion(argvals, 'takes one or more arguments') From c5098961b04ce83f4615f2a846c84f803b072639 Mon Sep 17 00:00:00 2001 From: dirkf Date: Tue, 6 Aug 2024 20:59:09 +0100 Subject: [PATCH 2/2] [Youtube] Rework n function extraction pattern Now also succeeds with player b12cc44b --- youtube_dl/extractor/youtube.py | 43 ++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 509e374a4d7..6fe520e9a44 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1661,23 +1661,33 @@ def _extract_n_function_name(self, jscode): # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c) # or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c) # or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b) - # old: .get("n"))&&(b=nfunc[idx](b) - # older: .get("n"))&&(b=nfunc(b) + # or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("") + # old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P[a-z])\s*=\s*[a-z]\s* + # older: (b=a.get("n"))&&(b=nfunc(b) r'''(?x) - (?:\((?:[\w$()\s]+,)*?\s*(?P[a-z])\s*=\s*(?: - String\s*\.\s*fromCharCode\s*\(\s*110\s*\)| - "n+"\[\s*\+?s*[\w$.]+\s*]| - (?P(?:[\w$]+\s*\.\s*)+n\b(?:(?!&&).)+\)) - )\s* - (?(b1) - &&\s*\(\s*(?P=b)| - (?: - ,(?P[a-z])\s*=\s*[a-z]\s*)? - \.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s* - &&\s*\(\s*(?(c)(?P=c)|(?P=b)) - ) - )\s*=\s* - (?P[a-zA-Z_$][\w$]*)(?:\s*\[(?P\d+)\])?\s*\(\s*[\w$]+\s*\) + \((?:[\w$()\s]+,)*?\s* # ( + (?P[a-z])\s*=\s* # b= + (?: + (?: # expect ,c=a.get(b) (etc) + String\s*\.\s*fromCharCode\s*\(\s*110\s*\)| + "n+"\[\s*\+?s*[\w$.]+\s*] + )\s*(?:,[\w$()\s]+(?=,))*| + (?P[\w$]+) # a (old[er]) + )\s* + (?(old) + # b.get("n") + (?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*? + (?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\)) + | # ,c=a.get(b) + ,\s*(?P[a-z])\s*=\s*[a-z]\s* + (?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*? + (?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\)) + ) + # interstitial junk + \s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)? + (?(c)(?P=c)|(?P=b))\s*=\s* # [c|b]= + # nfunc|nfunc[idx] + (?P[a-zA-Z_$][\w$]*)(?:\s*\[(?P\d+)\])?\s*\(\s*[\w$]+\s*\) ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'), default=(None, None)) # thx bashonly: yt-dlp/yt-dlp/pull/10611 @@ -1690,7 +1700,6 @@ def _extract_n_function_name(self, jscode): \s*\{(?:(?!};).)+?["']enhanced_except_ ''', jscode, 'Initial JS player n function name', group='name') if not idx: - self.report_warning('Falling back to generic n function search') return func_name return self._parse_json(self._search_regex(