From e40b8204636f8018cb13810127c2962c87381bdc Mon Sep 17 00:00:00 2001 From: Rob Lourens Date: Thu, 10 Aug 2017 11:26:27 -0700 Subject: [PATCH] Parse template string variables, and the basic expressions that are allowed --- src/Parser.php | 68 +++++++++++ src/PhpTokenizer.php | 2 +- src/TokenKind.php | 1 - .../cases/lexical/stringLiteral27.php.tokens | 2 +- tests/cases/lexical/stringLiteral29.php | 2 + .../cases/lexical/stringLiteral29.php.tokens | 34 ++++++ tests/cases/parser/stringLiteral1.php | 3 + tests/cases/parser/stringLiteral1.php.tree | 56 +++++++++ tests/cases/parser/stringLiteral10.php | 3 + tests/cases/parser/stringLiteral10.php.tree | 77 ++++++++++++ tests/cases/parser/stringLiteral11.php | 3 + tests/cases/parser/stringLiteral11.php.tree | 86 +++++++++++++ tests/cases/parser/stringLiteral2.php | 3 + tests/cases/parser/stringLiteral2.php.tree | 73 +++++++++++ tests/cases/parser/stringLiteral3.php | 3 + tests/cases/parser/stringLiteral3.php.tree | 86 +++++++++++++ tests/cases/parser/stringLiteral4.php | 3 + tests/cases/parser/stringLiteral4.php.tree | 81 +++++++++++++ tests/cases/parser/stringLiteral5.php | 3 + tests/cases/parser/stringLiteral5.php.tree | 113 ++++++++++++++++++ tests/cases/parser/stringLiteral6.php | 3 + tests/cases/parser/stringLiteral6.php.tree | 81 +++++++++++++ tests/cases/parser/stringLiteral7.php | 3 + tests/cases/parser/stringLiteral7.php.tree | 113 ++++++++++++++++++ tests/cases/parser/stringLiteral8.php | 3 + tests/cases/parser/stringLiteral8.php.tree | 89 ++++++++++++++ tests/cases/parser/stringLiteral9.php | 3 + tests/cases/parser/stringLiteral9.php.tree | 97 +++++++++++++++ 28 files changed, 1091 insertions(+), 3 deletions(-) create mode 100644 tests/cases/lexical/stringLiteral29.php create mode 100644 tests/cases/lexical/stringLiteral29.php.tokens create mode 100644 tests/cases/parser/stringLiteral1.php create mode 100644 tests/cases/parser/stringLiteral1.php.tree create mode 100644 tests/cases/parser/stringLiteral10.php create mode 100644 tests/cases/parser/stringLiteral10.php.tree create mode 100644 tests/cases/parser/stringLiteral11.php create mode 100644 tests/cases/parser/stringLiteral11.php.tree create mode 100644 tests/cases/parser/stringLiteral2.php create mode 100644 tests/cases/parser/stringLiteral2.php.tree create mode 100644 tests/cases/parser/stringLiteral3.php create mode 100644 tests/cases/parser/stringLiteral3.php.tree create mode 100644 tests/cases/parser/stringLiteral4.php create mode 100644 tests/cases/parser/stringLiteral4.php.tree create mode 100644 tests/cases/parser/stringLiteral5.php create mode 100644 tests/cases/parser/stringLiteral5.php.tree create mode 100644 tests/cases/parser/stringLiteral6.php create mode 100644 tests/cases/parser/stringLiteral6.php.tree create mode 100644 tests/cases/parser/stringLiteral7.php create mode 100644 tests/cases/parser/stringLiteral7.php.tree create mode 100644 tests/cases/parser/stringLiteral8.php create mode 100644 tests/cases/parser/stringLiteral8.php.tree create mode 100644 tests/cases/parser/stringLiteral9.php create mode 100644 tests/cases/parser/stringLiteral9.php.tree diff --git a/src/Parser.php b/src/Parser.php index ee10082..70f9ea0 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -1015,6 +1015,9 @@ class Parser { case TokenKind::HeredocEnd: $expression->endQuote = $this->eat($startQuoteKind, TokenKind::HeredocEnd); return $expression; + case TokenKind::VariableName: + $expression->children[] = $this->parseTemplateStringExpression($expression); + continue; default: $expression->children[] = $this->getCurrentToken(); $this->advanceToken(); @@ -1025,6 +1028,71 @@ class Parser { return $expression; } + /** + * Double-quoted and heredoc strings support a basic set of expression types, described in http://php.net/manual/en/language.types.string.php#language.types.string.parsing + * Supported: $x, $x->p, $x[0], $x[$y] + * Not supported: $x->p1->p2, $x[0][1], etc. + * Since there is a relatively small finite set of allowed forms, I implement it here rather than trying to reuse the general expression parsing code. + */ + private function parseTemplateStringExpression($parentNode) { + $token = $this->getCurrentToken(); + if ($token->kind === TokenKind::VariableName) { + $var = $this->parseSimpleVariable($parentNode); + $token = $this->getCurrentToken(); + if ($token->kind === TokenKind::OpenBracketToken) { + return $this->parseTemplateStringSubscriptExpression($var); + } else if ($token->kind === TokenKind::ArrowToken) { + return $this->parseTemplateStringMemberAccessExpression($var); + } else { + return $var; + } + } + + return null; + } + + private function parseTemplateStringSubscriptExpression($postfixExpression) : SubscriptExpression { + $subscriptExpression = new SubscriptExpression(); + $subscriptExpression->parent = $postfixExpression->parent; + $postfixExpression->parent = $subscriptExpression; + + $subscriptExpression->postfixExpression = $postfixExpression; + $subscriptExpression->openBracketOrBrace = $this->eat(TokenKind::OpenBracketToken); // Only [] syntax is supported, not {} + $token = $this->getCurrentToken(); + if ($token->kind === TokenKind::VariableName) { + $subscriptExpression->accessExpression = $this->parseSimpleVariable($subscriptExpression); + } elseif ($token->kind === TokenKind::IntegerLiteralToken) { + $subscriptExpression->accessExpression = $this->parseNumericLiteralExpression($subscriptExpression); + } elseif ($token->kind === TokenKind::Name) { + $subscriptExpression->accessExpression = $this->parseTemplateStringSubscriptStringLiteral($subscriptExpression); + } else { + $subscriptExpression->accessExpression = new MissingToken(TokenKind::Expression, $token->fullStart); + } + + $subscriptExpression->closeBracketOrBrace = $this->eat(TokenKind::CloseBracketToken); + + return $subscriptExpression; + } + + private function parseTemplateStringSubscriptStringLiteral($parentNode) : StringLiteral { + $expression = new StringLiteral(); + $expression->parent = $parentNode; + $expression->children = $this->eat(TokenKind::Name); + return $expression; + } + + private function parseTemplateStringMemberAccessExpression($expression) : MemberAccessExpression { + $memberAccessExpression = new MemberAccessExpression(); + $memberAccessExpression->parent = $expression->parent; + $expression->parent = $memberAccessExpression; + + $memberAccessExpression->dereferencableExpression = $expression; + $memberAccessExpression->arrowToken = $this->eat(TokenKind::ArrowToken); + $memberAccessExpression->memberName = $this->eat(TokenKind::Name); + + return $memberAccessExpression; + } + private function parseNumericLiteralExpression($parentNode) { $numericLiteral = new NumericLiteral(); $numericLiteral->parent = $parentNode; diff --git a/src/PhpTokenizer.php b/src/PhpTokenizer.php index 3752247..06ef2ae 100644 --- a/src/PhpTokenizer.php +++ b/src/PhpTokenizer.php @@ -295,7 +295,7 @@ class PhpTokenizer implements TokenStreamProviderInterface { T_STRING_VARNAME => TokenKind::StringVarname, T_COMMENT => TokenKind::CommentToken, T_DOC_COMMENT => TokenKind::DocCommentToken, - T_NUM_STRING => TokenKind::NumStringToken + T_NUM_STRING => TokenKind::IntegerLiteralToken ]; const PARSE_CONTEXT_TO_PREFIX = [ diff --git a/src/TokenKind.php b/src/TokenKind.php index f1425ec..8f9e687 100644 --- a/src/TokenKind.php +++ b/src/TokenKind.php @@ -214,7 +214,6 @@ class TokenKind { const IntegerLiteralToken = 416; const CommentToken = 417; const DocCommentToken = 418; - const NumStringToken = 419; // TODO type annotations - PHP7 } diff --git a/tests/cases/lexical/stringLiteral27.php.tokens b/tests/cases/lexical/stringLiteral27.php.tokens index 97fee01..ed8fe39 100644 --- a/tests/cases/lexical/stringLiteral27.php.tokens +++ b/tests/cases/lexical/stringLiteral27.php.tokens @@ -16,7 +16,7 @@ "textLength": 1 }, { - "kind": "NumStringToken", + "kind": "IntegerLiteralToken", "textLength": 1 }, { diff --git a/tests/cases/lexical/stringLiteral29.php b/tests/cases/lexical/stringLiteral29.php new file mode 100644 index 0000000..208dd77 --- /dev/null +++ b/tests/cases/lexical/stringLiteral29.php @@ -0,0 +1,2 @@ +"; \ No newline at end of file diff --git a/tests/cases/parser/stringLiteral10.php.tree b/tests/cases/parser/stringLiteral10.php.tree new file mode 100644 index 0000000..e8712b6 --- /dev/null +++ b/tests/cases/parser/stringLiteral10.php.tree @@ -0,0 +1,77 @@ +{ + "SourceFileNode": { + "statementList": [ + { + "InlineHtml": { + "scriptSectionEndTag": null, + "text": null, + "scriptSectionStartTag": { + "kind": "ScriptSectionStartTag", + "textLength": 6 + } + } + }, + { + "ExpressionStatement": { + "expression": { + "AssignmentExpression": { + "leftOperand": { + "Variable": { + "dollar": null, + "name": { + "kind": "VariableName", + "textLength": 2 + } + } + }, + "operator": { + "kind": "EqualsToken", + "textLength": 1 + }, + "byRef": null, + "rightOperand": { + "StringLiteral": { + "startQuote": { + "kind": "DoubleQuoteToken", + "textLength": 1 + }, + "children": [ + { + "kind": "EncapsedAndWhitespace", + "textLength": 4 + }, + { + "Variable": { + "dollar": null, + "name": { + "kind": "VariableName", + "textLength": 4 + } + } + }, + { + "kind": "EncapsedAndWhitespace", + "textLength": 2 + } + ], + "endQuote": { + "kind": "DoubleQuoteToken", + "textLength": 1 + } + } + } + } + }, + "semicolon": { + "kind": "SemicolonToken", + "textLength": 1 + } + } + } + ], + "endOfFileToken": { + "kind": "EndOfFileToken", + "textLength": 0 + } + } +} \ No newline at end of file diff --git a/tests/cases/parser/stringLiteral11.php b/tests/cases/parser/stringLiteral11.php new file mode 100644 index 0000000..2d0bd99 --- /dev/null +++ b/tests/cases/parser/stringLiteral11.php @@ -0,0 +1,3 @@ +bar[0]}"; \ No newline at end of file diff --git a/tests/cases/parser/stringLiteral5.php.tree b/tests/cases/parser/stringLiteral5.php.tree new file mode 100644 index 0000000..05986de --- /dev/null +++ b/tests/cases/parser/stringLiteral5.php.tree @@ -0,0 +1,113 @@ +{ + "SourceFileNode": { + "statementList": [ + { + "InlineHtml": { + "scriptSectionEndTag": null, + "text": null, + "scriptSectionStartTag": { + "kind": "ScriptSectionStartTag", + "textLength": 6 + } + } + }, + { + "ExpressionStatement": { + "expression": { + "AssignmentExpression": { + "leftOperand": { + "Variable": { + "dollar": null, + "name": { + "kind": "VariableName", + "textLength": 2 + } + } + }, + "operator": { + "kind": "EqualsToken", + "textLength": 1 + }, + "byRef": null, + "rightOperand": { + "StringLiteral": { + "startQuote": { + "kind": "DoubleQuoteToken", + "textLength": 1 + }, + "children": [ + { + "kind": "EncapsedAndWhitespace", + "textLength": 5 + }, + { + "kind": "DollarOpenBraceToken", + "textLength": 2 + }, + { + "SubscriptExpression": { + "postfixExpression": { + "MemberAccessExpression": { + "dereferencableExpression": { + "Variable": { + "dollar": null, + "name": { + "kind": "VariableName", + "textLength": 4 + } + } + }, + "arrowToken": { + "kind": "ArrowToken", + "textLength": 2 + }, + "memberName": { + "kind": "Name", + "textLength": 3 + } + } + }, + "openBracketOrBrace": { + "kind": "OpenBracketToken", + "textLength": 1 + }, + "accessExpression": { + "NumericLiteral": { + "children": { + "kind": "IntegerLiteralToken", + "textLength": 1 + } + } + }, + "closeBracketOrBrace": { + "kind": "CloseBracketToken", + "textLength": 1 + } + } + }, + { + "kind": "CloseBraceToken", + "textLength": 1 + } + ], + "endQuote": { + "kind": "DoubleQuoteToken", + "textLength": 1 + } + } + } + } + }, + "semicolon": { + "kind": "SemicolonToken", + "textLength": 1 + } + } + } + ], + "endOfFileToken": { + "kind": "EndOfFileToken", + "textLength": 0 + } + } +} \ No newline at end of file diff --git a/tests/cases/parser/stringLiteral6.php b/tests/cases/parser/stringLiteral6.php new file mode 100644 index 0000000..fd4d522 --- /dev/null +++ b/tests/cases/parser/stringLiteral6.php @@ -0,0 +1,3 @@ +bar[0]}"; \ No newline at end of file diff --git a/tests/cases/parser/stringLiteral7.php.tree b/tests/cases/parser/stringLiteral7.php.tree new file mode 100644 index 0000000..55fe707 --- /dev/null +++ b/tests/cases/parser/stringLiteral7.php.tree @@ -0,0 +1,113 @@ +{ + "SourceFileNode": { + "statementList": [ + { + "InlineHtml": { + "scriptSectionEndTag": null, + "text": null, + "scriptSectionStartTag": { + "kind": "ScriptSectionStartTag", + "textLength": 6 + } + } + }, + { + "ExpressionStatement": { + "expression": { + "AssignmentExpression": { + "leftOperand": { + "Variable": { + "dollar": null, + "name": { + "kind": "VariableName", + "textLength": 2 + } + } + }, + "operator": { + "kind": "EqualsToken", + "textLength": 1 + }, + "byRef": null, + "rightOperand": { + "StringLiteral": { + "startQuote": { + "kind": "DoubleQuoteToken", + "textLength": 1 + }, + "children": [ + { + "kind": "EncapsedAndWhitespace", + "textLength": 5 + }, + { + "kind": "OpenBraceDollarToken", + "textLength": 1 + }, + { + "SubscriptExpression": { + "postfixExpression": { + "MemberAccessExpression": { + "dereferencableExpression": { + "Variable": { + "dollar": null, + "name": { + "kind": "VariableName", + "textLength": 4 + } + } + }, + "arrowToken": { + "kind": "ArrowToken", + "textLength": 2 + }, + "memberName": { + "kind": "Name", + "textLength": 3 + } + } + }, + "openBracketOrBrace": { + "kind": "OpenBracketToken", + "textLength": 1 + }, + "accessExpression": { + "NumericLiteral": { + "children": { + "kind": "IntegerLiteralToken", + "textLength": 1 + } + } + }, + "closeBracketOrBrace": { + "kind": "CloseBracketToken", + "textLength": 1 + } + } + }, + { + "kind": "CloseBraceToken", + "textLength": 1 + } + ], + "endQuote": { + "kind": "DoubleQuoteToken", + "textLength": 1 + } + } + } + } + }, + "semicolon": { + "kind": "SemicolonToken", + "textLength": 1 + } + } + } + ], + "endOfFileToken": { + "kind": "EndOfFileToken", + "textLength": 0 + } + } +} \ No newline at end of file diff --git a/tests/cases/parser/stringLiteral8.php b/tests/cases/parser/stringLiteral8.php new file mode 100644 index 0000000..af4802a --- /dev/null +++ b/tests/cases/parser/stringLiteral8.php @@ -0,0 +1,3 @@ +bar def"; \ No newline at end of file diff --git a/tests/cases/parser/stringLiteral8.php.tree b/tests/cases/parser/stringLiteral8.php.tree new file mode 100644 index 0000000..29c34f1 --- /dev/null +++ b/tests/cases/parser/stringLiteral8.php.tree @@ -0,0 +1,89 @@ +{ + "SourceFileNode": { + "statementList": [ + { + "InlineHtml": { + "scriptSectionEndTag": null, + "text": null, + "scriptSectionStartTag": { + "kind": "ScriptSectionStartTag", + "textLength": 6 + } + } + }, + { + "ExpressionStatement": { + "expression": { + "AssignmentExpression": { + "leftOperand": { + "Variable": { + "dollar": null, + "name": { + "kind": "VariableName", + "textLength": 2 + } + } + }, + "operator": { + "kind": "EqualsToken", + "textLength": 1 + }, + "byRef": null, + "rightOperand": { + "StringLiteral": { + "startQuote": { + "kind": "DoubleQuoteToken", + "textLength": 1 + }, + "children": [ + { + "kind": "EncapsedAndWhitespace", + "textLength": 4 + }, + { + "MemberAccessExpression": { + "dereferencableExpression": { + "Variable": { + "dollar": null, + "name": { + "kind": "VariableName", + "textLength": 4 + } + } + }, + "arrowToken": { + "kind": "ArrowToken", + "textLength": 2 + }, + "memberName": { + "kind": "Name", + "textLength": 3 + } + } + }, + { + "kind": "EncapsedAndWhitespace", + "textLength": 4 + } + ], + "endQuote": { + "kind": "DoubleQuoteToken", + "textLength": 1 + } + } + } + } + }, + "semicolon": { + "kind": "SemicolonToken", + "textLength": 1 + } + } + } + ], + "endOfFileToken": { + "kind": "EndOfFileToken", + "textLength": 0 + } + } +} \ No newline at end of file diff --git a/tests/cases/parser/stringLiteral9.php b/tests/cases/parser/stringLiteral9.php new file mode 100644 index 0000000..400b7fa --- /dev/null +++ b/tests/cases/parser/stringLiteral9.php @@ -0,0 +1,3 @@ +