From d4a072b3890ef45b9a990ba4d4cd4f2c1b98463a Mon Sep 17 00:00:00 2001 From: Robert Hafner Date: Sun, 5 Mar 2023 17:49:56 -0600 Subject: [PATCH 1/2] Refactored regex, removed buffer usage closes #38 closes #68 closes #47 closes #73 closes #94 --- src/JShrink/Minifier.php | 125 +++++++++++++----- .../Resources/jshrink/input/regex_keywords.js | 3 + tests/Resources/jshrink/input/regex_spaces.js | 7 + .../jshrink/input/regex_with_quote.js | 1 + .../jshrink/input/regex_with_quote_real.js | 3 + .../jshrink/output/regex_keywords.js | 2 + .../Resources/jshrink/output/regex_spaces.js | 2 + .../jshrink/output/regex_with_quote.js | 1 + .../jshrink/output/regex_with_quote_real.js | 1 + 9 files changed, 114 insertions(+), 31 deletions(-) create mode 100644 tests/Resources/jshrink/input/regex_keywords.js create mode 100644 tests/Resources/jshrink/input/regex_spaces.js create mode 100644 tests/Resources/jshrink/input/regex_with_quote.js create mode 100644 tests/Resources/jshrink/input/regex_with_quote_real.js create mode 100644 tests/Resources/jshrink/output/regex_keywords.js create mode 100644 tests/Resources/jshrink/output/regex_spaces.js create mode 100644 tests/Resources/jshrink/output/regex_with_quote.js create mode 100644 tests/Resources/jshrink/output/regex_with_quote_real.js diff --git a/src/JShrink/Minifier.php b/src/JShrink/Minifier.php index 9c6ab02..c784e0e 100644 --- a/src/JShrink/Minifier.php +++ b/src/JShrink/Minifier.php @@ -74,6 +74,20 @@ class Minifier */ protected $c; + /** + * This character is only active when certain look ahead actions take place. + * + * @var string + */ + protected $last_char; + + /** + * This character is only active when certain look ahead actions take place. + * + * @var string + */ + protected $output; + /** * Contains the options for the current minification process. * @@ -95,6 +109,9 @@ class Minifier */ protected static $defaultOptions = ['flaggedComments' => true]; + + protected static $keywords = ["delete", "do", "for", "in", "instanceof", "return", "typeof", "yield"]; + /** * Contains lock ids which are used to replace certain code patterns and * prevent them from being minified @@ -115,17 +132,11 @@ class Minifier public static function minify($js, $options = []) { try { - ob_start(); - $jshrink = new Minifier(); $js = $jshrink->lock($js); - $jshrink->minifyDirectToOutput($js, $options); - - // Sometimes there's a leading new line, so we trim that out here. - $js = ltrim(ob_get_clean()); + $js = ltrim($jshrink->minifyToString($js, $options)); $js = $jshrink->unlock($js); unset($jshrink); - return $js; } catch (\Exception $e) { if (isset($jshrink)) { @@ -134,9 +145,6 @@ public static function minify($js, $options = []) $jshrink->clean(); unset($jshrink); } - - // without this call things get weird, with partially outputted js. - ob_end_clean(); throw $e; } } @@ -148,11 +156,12 @@ public static function minify($js, $options = []) * @param string $js The raw javascript to be minified * @param array $options Various runtime options in an associative array */ - protected function minifyDirectToOutput($js, $options) + protected function minifyToString($js, $options) { $this->initialize($js, $options); $this->loop(); $this->clean(); + return $this->output; } /** @@ -177,7 +186,9 @@ protected function initialize($js, $options) // Populate "a" with a new line, "b" with the first character, before // entering the loop $this->a = "\n"; - $this->b = $this->getReal(); + $this->b = "\n"; + $this->last_char = "\n"; + $this->output = ""; } /** @@ -192,6 +203,14 @@ protected function initialize($js, $options) '[' => true, '@' => true]; + + protected function echo($char) { + echo($char); + $this->output .= $char; + $this->last_char = $char[-1]; + } + + /** * The primary action occurs here. This function loops through the input string, * outputting anything that's relevant and discarding anything that is not. @@ -201,10 +220,11 @@ protected function loop() while ($this->a !== false && !is_null($this->a) && $this->a !== '') { switch ($this->a) { // new lines + case "\r": case "\n": // if the next line is something that can't stand alone preserve the newline if ($this->b !== false && isset($this->noNewLineCharacters[$this->b])) { - echo $this->a; + $this->echo($this->a); $this->saveString(); break; } @@ -220,7 +240,7 @@ protected function loop() // no break case ' ': if (static::isAlphaNumeric($this->b)) { - echo $this->a; + $this->echo($this->a); } $this->saveString(); @@ -228,14 +248,15 @@ protected function loop() default: switch ($this->b) { + case "\r": case "\n": if (strpos('}])+-"\'', $this->a) !== false) { - echo $this->a; + $this->echo($this->a); $this->saveString(); break; } else { if (static::isAlphaNumeric($this->a)) { - echo $this->a; + $this->echo($this->a); $this->saveString(); } } @@ -254,7 +275,7 @@ protected function loop() continue 3; } - echo $this->a; + $this->echo($this->a); $this->saveString(); break; } @@ -263,9 +284,20 @@ protected function loop() // do reg check of doom $this->b = $this->getReal(); - if (($this->b == '/' && strpos('(,=:[!&|?', $this->a) !== false)) { - $this->saveRegex(); + if ($this->b == '/') { + $valid_tokens = "(,=:[!&|?\n"; + if (strpos($valid_tokens, $this->last_char) !== false || strpos($valid_tokens, $this->a) !== false) { + // Regex can appear unquoted after these symbols + $this->saveRegex(); + } else if ($this->endsInKeyword()) { + // This block checks for the "return" token before the slash. + $this->saveRegex(); + } } + + // if (($this->b == '/' && strpos('(,=:[!&|?', $this->a) !== false)) { + // $this->saveRegex(); + // } } } @@ -332,8 +364,25 @@ protected function getChar() */ protected function peek() { - # Pull the next character but don't push the index. - return $this->index < $this->len ? $this->input[$this->index] : false; + if ($this->index >= $this->len) { + return false; + } + + $char = $this->input[$this->index]; + # Convert all line endings to unix standard. + # `\r\n` converts to `\n\n` and is minified. + if ($char == "\r") { + $char = "\n"; + } + + // Normalize all whitespace except for the newline character into a + // standard space. + if ($char !== "\n" && $char < "\x20") { + return ' '; + } + + # Return the next character but don't push the index. + return $char; } /** @@ -428,17 +477,17 @@ protected function processMultiLineComments($startIndex) // If conditional comments or flagged comments are not the first thing in the script // we need to echo a and fill it with a space before moving on. if ($startIndex > 0) { - echo $this->a; + $this->echo($this->a); $this->a = " "; // If the comment started on a new line we let it stay on the new line if ($this->input[($startIndex - 1)] === "\n") { - echo "\n"; + $this->echo("\n"); } } $endPoint = ($this->index - 1) - $startIndex; - echo substr($this->input, $startIndex, $endPoint); + $this->echo(substr($this->input, $startIndex, $endPoint)); $this->c = $char; @@ -504,7 +553,7 @@ protected function saveString() $stringType = $this->a; // Echo out that starting quote - echo $this->a; + $this->echo($this->a); // Loop until the string is done // Grab the very next character and load it into a @@ -523,7 +572,7 @@ protected function saveString() // block below. case "\n": if ($stringType === '`') { - echo $this->a; + $this->echo($this->a); } else { throw new \RuntimeException('Unclosed string at position: ' . $startpos); } @@ -543,14 +592,14 @@ protected function saveString() } // echo out the escaped character and restart the loop. - echo $this->a . $this->b; + $this->echo($this->a . $this->b); break; // Since we're not dealing with any special cases we simply // output the character and continue our loop. default: - echo $this->a; + $this->echo($this->a); } } } @@ -563,7 +612,7 @@ protected function saveString() */ protected function saveRegex() { - echo $this->a . $this->b; + $this->echo($this->a . $this->b); while (($this->a = $this->getChar()) !== false) { if ($this->a === '/') { @@ -571,7 +620,7 @@ protected function saveRegex() } if ($this->a === '\\') { - echo $this->a; + $this->echo($this->a); $this->a = $this->getChar(); } @@ -579,7 +628,7 @@ protected function saveRegex() throw new \RuntimeException('Unclosed regex pattern at position: ' . $this->index); } - echo $this->a; + $this->echo($this->a); } $this->b = $this->getReal(); } @@ -595,6 +644,20 @@ protected static function isAlphaNumeric($char) return preg_match('/^[\w\$\pL]$/', $char) === 1 || $char == '/'; } + protected function endsInKeyword() { + foreach(static::$keywords as $keyword) { + if (str_ends_with($this->output, $keyword)) { + return true; + } + if (str_ends_with($this->output, $keyword . " ")) { + return true; + } + } + return false; + } + + + /** * Replace patterns in the given string and store the replacement * diff --git a/tests/Resources/jshrink/input/regex_keywords.js b/tests/Resources/jshrink/input/regex_keywords.js new file mode 100644 index 0000000..2b12a6f --- /dev/null +++ b/tests/Resources/jshrink/input/regex_keywords.js @@ -0,0 +1,3 @@ +return /'/ + +typeof /'/ diff --git a/tests/Resources/jshrink/input/regex_spaces.js b/tests/Resources/jshrink/input/regex_spaces.js new file mode 100644 index 0000000..17a3d13 --- /dev/null +++ b/tests/Resources/jshrink/input/regex_spaces.js @@ -0,0 +1,7 @@ +function airplaneIsCarrierBased (model) { + return /^(FI-167|Swordfish|Fulmar|Firefly|F4F Wildcat|F6F-[35] Hellcat|Latécoère 298|A[567]M)$/.test( + model + ) +} + +console.log(airplaneIsCarrierBased('F6F-5 Hellcat')) diff --git a/tests/Resources/jshrink/input/regex_with_quote.js b/tests/Resources/jshrink/input/regex_with_quote.js new file mode 100644 index 0000000..ce2ef0e --- /dev/null +++ b/tests/Resources/jshrink/input/regex_with_quote.js @@ -0,0 +1 @@ +/^(")$/ \ No newline at end of file diff --git a/tests/Resources/jshrink/input/regex_with_quote_real.js b/tests/Resources/jshrink/input/regex_with_quote_real.js new file mode 100644 index 0000000..ab8ef00 --- /dev/null +++ b/tests/Resources/jshrink/input/regex_with_quote_real.js @@ -0,0 +1,3 @@ +function test (input) { + return /^(אחה"צ|אחרי הצהריים|בערב)$/.test(input) +} diff --git a/tests/Resources/jshrink/output/regex_keywords.js b/tests/Resources/jshrink/output/regex_keywords.js new file mode 100644 index 0000000..cc2a2c4 --- /dev/null +++ b/tests/Resources/jshrink/output/regex_keywords.js @@ -0,0 +1,2 @@ +return /'/ +typeof /'/ \ No newline at end of file diff --git a/tests/Resources/jshrink/output/regex_spaces.js b/tests/Resources/jshrink/output/regex_spaces.js new file mode 100644 index 0000000..c4e39cd --- /dev/null +++ b/tests/Resources/jshrink/output/regex_spaces.js @@ -0,0 +1,2 @@ +function airplaneIsCarrierBased(model){return /^(FI-167|Swordfish|Fulmar|Firefly|F4F Wildcat|F6F-[35] Hellcat|Latécoère 298|A[567]M)$/.test(model)} +console.log(airplaneIsCarrierBased('F6F-5 Hellcat')) \ No newline at end of file diff --git a/tests/Resources/jshrink/output/regex_with_quote.js b/tests/Resources/jshrink/output/regex_with_quote.js new file mode 100644 index 0000000..ce2ef0e --- /dev/null +++ b/tests/Resources/jshrink/output/regex_with_quote.js @@ -0,0 +1 @@ +/^(")$/ \ No newline at end of file diff --git a/tests/Resources/jshrink/output/regex_with_quote_real.js b/tests/Resources/jshrink/output/regex_with_quote_real.js new file mode 100644 index 0000000..15b0b48 --- /dev/null +++ b/tests/Resources/jshrink/output/regex_with_quote_real.js @@ -0,0 +1 @@ +function test(input){return /^(אחה"צ|אחרי הצהריים|בערב)$/.test(input)} \ No newline at end of file From 552a46cb69fa7d9ba2d6d5e095606edbc674ad00 Mon Sep 17 00:00:00 2001 From: Robert Hafner Date: Sun, 5 Mar 2023 17:50:16 -0600 Subject: [PATCH 2/2] More verbose errors in tests --- phpunit.xml.dist | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/phpunit.xml.dist b/phpunit.xml.dist index 54a534b..2dba21f 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -1,11 +1,19 @@ - + ./src/JShrink/ - + @@ -19,5 +27,5 @@ development - +