From 3e93f2ef80948a9319dcec6e935bd1cfbfb985ca Mon Sep 17 00:00:00 2001 From: jrfnl Date: Wed, 31 Jul 2024 04:19:53 +0200 Subject: [PATCH] Tokenizer/PHP: add tests for heredoc/nowdoc tokenization The PHP tokenizer contains logic to: * Retokenize the start/end tokens for nowdocs from `T_(START|END)_HEREDOC` to `T_(START|END)_NOWDOC`; * Retokenize the _contents_ of a heredoc/nowdoc to `T_HEREDOC`/`T_NOWDOC` tokens. * Retokenize the start token from `T_START_(HERE|NOW)DOC` to `T_STRING` if the heredoc/nowdoc is unclosed; * Ensure that each line in the contents has its own token. This commit adds tests safeguarding and documenting this part of the tokenizer. --- .../Core/Tokenizer/PHP/HeredocNowdocTest.inc | 39 ++++ .../Core/Tokenizer/PHP/HeredocNowdocTest.php | 213 ++++++++++++++++++ .../Tokenizer/PHP/HeredocParseErrorTest.inc | 11 + .../Tokenizer/PHP/HeredocParseErrorTest.php | 41 ++++ 4 files changed, 304 insertions(+) create mode 100644 tests/Core/Tokenizer/PHP/HeredocNowdocTest.inc create mode 100644 tests/Core/Tokenizer/PHP/HeredocNowdocTest.php create mode 100644 tests/Core/Tokenizer/PHP/HeredocParseErrorTest.inc create mode 100644 tests/Core/Tokenizer/PHP/HeredocParseErrorTest.php diff --git a/tests/Core/Tokenizer/PHP/HeredocNowdocTest.inc b/tests/Core/Tokenizer/PHP/HeredocNowdocTest.inc new file mode 100644 index 0000000000..5041dda157 --- /dev/null +++ b/tests/Core/Tokenizer/PHP/HeredocNowdocTest.inc @@ -0,0 +1,39 @@ + + * @copyright 2024 PHPCSStandards and contributors + * @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence + */ + +namespace PHP_CodeSniffer\Tests\Core\Tokenizer\PHP; + +use PHP_CodeSniffer\Tests\Core\Tokenizer\AbstractTokenizerTestCase; +use PHP_CodeSniffer\Util\Tokens; + +/** + * Tests the tokenization for heredoc/nowdoc constructs. + * + * Verifies that: + * - Nowdoc opener/closers are retokenized from `T_[START_|END_]HEREDOC` to `T_[START_|END_]NOWDOC`. + * - The contents of the heredoc/nowdoc is tokenized as `T_HEREDOC`/`T_NOWDOC`. + * - Each line of the contents has its own token, which includes the new line char. + * + * @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize + */ +final class HeredocNowdocTest extends AbstractTokenizerTestCase +{ + + + /** + * Verify tokenization a heredoc construct. + * + * @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment. + * + * @return void + */ + public function testHeredocSingleLine() + { + $expectedSequence = [ + [T_START_HEREDOC => '<< 'Some $var text'."\n"], + [T_END_HEREDOC => 'EOD'], + ]; + + $target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_HEREDOC); + + $this->checkTokenSequence($target, $expectedSequence); + + }//end testHeredocSingleLine() + + + /** + * Verify tokenization a nowdoc construct. + * + * @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment. + * + * @return void + */ + public function testNowdocSingleLine() + { + $expectedSequence = [ + [T_START_NOWDOC => "<<<'MARKER'\n"], + [T_NOWDOC => 'Some text'."\n"], + [T_END_NOWDOC => 'MARKER'], + ]; + + $target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_NOWDOC); + + $this->checkTokenSequence($target, $expectedSequence); + + }//end testNowdocSingleLine() + + + /** + * Verify tokenization a multiline heredoc construct. + * + * @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment. + * + * @return void + */ + public function testHeredocMultiLine() + { + $expectedSequence = [ + [T_START_HEREDOC => '<<<"😬"'."\n"], + [T_HEREDOC => 'Lorum ipsum'."\n"], + [T_HEREDOC => 'Some $var text'."\n"], + [T_HEREDOC => 'dolor sit amet'."\n"], + [T_END_HEREDOC => '😬'], + ]; + + $target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_HEREDOC); + + $this->checkTokenSequence($target, $expectedSequence); + + }//end testHeredocMultiLine() + + + /** + * Verify tokenization a multiline testNowdocSingleLine construct. + * + * @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment. + * + * @return void + */ + public function testNowdocMultiLine() + { + $expectedSequence = [ + [T_START_NOWDOC => "<<<'multi_line'\n"], + [T_NOWDOC => 'Lorum ipsum'."\n"], + [T_NOWDOC => 'Some text'."\n"], + [T_NOWDOC => 'dolor sit amet'."\n"], + [T_END_NOWDOC => 'multi_line'], + ]; + + $target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_NOWDOC); + + $this->checkTokenSequence($target, $expectedSequence); + + }//end testNowdocMultiLine() + + + /** + * Verify tokenization a multiline heredoc construct. + * + * @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment. + * + * @return void + */ + public function testHeredocEndsOnBlankLine() + { + $expectedSequence = [ + [T_START_HEREDOC => '<< 'Lorum ipsum'."\n"], + [T_HEREDOC => 'dolor sit amet'."\n"], + [T_HEREDOC => "\n"], + [T_END_HEREDOC => 'EOD'], + ]; + + $target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_HEREDOC); + + $this->checkTokenSequence($target, $expectedSequence); + + }//end testHeredocEndsOnBlankLine() + + + /** + * Verify tokenization a multiline testNowdocSingleLine construct. + * + * @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment. + * + * @return void + */ + public function testNowdocEndsOnBlankLine() + { + $expectedSequence = [ + [T_START_NOWDOC => "<<<'EOD'\n"], + [T_NOWDOC => 'Lorum ipsum'."\n"], + [T_NOWDOC => 'dolor sit amet'."\n"], + [T_NOWDOC => "\n"], + [T_END_NOWDOC => 'EOD'], + ]; + + $target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_NOWDOC); + + $this->checkTokenSequence($target, $expectedSequence); + + }//end testNowdocEndsOnBlankLine() + + + /** + * Test helper. Check a token sequence complies with an expected token sequence. + * + * @param int $startPtr The position in the file to start checking from. + * @param array> $expectedSequence The consecutive token constants and their contents to expect. + * + * @return void + */ + private function checkTokenSequence($startPtr, array $expectedSequence) + { + $tokens = $this->phpcsFile->getTokens(); + + $sequenceKey = 0; + $sequenceCount = count($expectedSequence); + + for ($i = $startPtr; $sequenceKey < $sequenceCount; $i++, $sequenceKey++) { + $currentItem = $expectedSequence[$sequenceKey]; + $expectedCode = key($currentItem); + $expectedType = Tokens::tokenName($expectedCode); + $expectedContent = current($currentItem); + $errorMsgSuffix = PHP_EOL.'(StackPtr: '.$i.' | Position in sequence: '.$sequenceKey.' | Expected: '.$expectedType.')'; + + $this->assertSame( + $expectedCode, + $tokens[$i]['code'], + 'Token tokenized as '.Tokens::tokenName($tokens[$i]['code']).', not '.$expectedType.' (code)'.$errorMsgSuffix + ); + + $this->assertSame( + $expectedType, + $tokens[$i]['type'], + 'Token tokenized as '.$tokens[$i]['type'].', not '.$expectedType.' (type)'.$errorMsgSuffix + ); + + $this->assertSame( + $expectedContent, + $tokens[$i]['content'], + 'Token content did not match expectations'.$errorMsgSuffix + ); + }//end for + + }//end checkTokenSequence() + + +}//end class diff --git a/tests/Core/Tokenizer/PHP/HeredocParseErrorTest.inc b/tests/Core/Tokenizer/PHP/HeredocParseErrorTest.inc new file mode 100644 index 0000000000..d552b12832 --- /dev/null +++ b/tests/Core/Tokenizer/PHP/HeredocParseErrorTest.inc @@ -0,0 +1,11 @@ +>>>>>> master diff --git a/tests/Core/Tokenizer/PHP/HeredocParseErrorTest.php b/tests/Core/Tokenizer/PHP/HeredocParseErrorTest.php new file mode 100644 index 0000000000..7cca49aac3 --- /dev/null +++ b/tests/Core/Tokenizer/PHP/HeredocParseErrorTest.php @@ -0,0 +1,41 @@ + + * @copyright 2024 PHPCSStandards and contributors + * @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence + */ + +namespace PHP_CodeSniffer\Tests\Core\Tokenizer\PHP; + +use PHP_CodeSniffer\Tests\Core\Tokenizer\AbstractTokenizerTestCase; + +/** + * Tests the tokenization for an unclosed heredoc construct. + * + * @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize + */ +final class HeredocParseErrorTest extends AbstractTokenizerTestCase +{ + + + /** + * Verify that a heredoc (and nowdoc) start token is retokenized to T_STRING if no closer is found. + * + * @return void + */ + public function testMergeConflict() + { + $tokens = $this->phpcsFile->getTokens(); + + $token = $this->getTargetToken('/* testUnclosedHeredoc */', [T_START_HEREDOC, T_STRING], '<<< HEAD'."\n"); + $tokenArray = $tokens[$token]; + + $this->assertSame(T_STRING, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_START_HEREDOC (code)'); + $this->assertSame('T_STRING', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_START_HEREDOC (type)'); + + }//end testMergeConflict() + + +}//end class