#88 Add API for `getDocCommentText`, `getFirst*Node`
- reuse PhpTokenizer rather than constructing a separate regex because we'll need this sort of context-specific re-tokenizing functionality for incremental parsing anyways. - functional, but still a work-in-progress, just like the rest of the API :) Also moved hanspun lexer to experiments folder
This commit is contained in:
Родитель
143c44a4d6
Коммит
7a58ceaeb8
51
src/Node.php
51
src/Node.php
|
@ -77,6 +77,42 @@ class Node implements \JsonSerializable {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get's first child that is an instance of one of the provided classes.
|
||||||
|
* Returns null if there is no match.
|
||||||
|
*
|
||||||
|
* @param array ...$classNames
|
||||||
|
* @return Node|null
|
||||||
|
*/
|
||||||
|
public function getFirstChildNode(...$classNames) {
|
||||||
|
foreach ($this->getChildNodes() as $child) {
|
||||||
|
foreach ($classNames as $className) {
|
||||||
|
if ($child instanceof $className) {
|
||||||
|
return $child;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get's first descendant node that is an instance of one of the provided classes.
|
||||||
|
* Returns null if there is no match.
|
||||||
|
*
|
||||||
|
* @param array ...$classNames
|
||||||
|
* @return Node|null
|
||||||
|
*/
|
||||||
|
public function getFirstDescendantNode(...$classNames) {
|
||||||
|
foreach ($this->getDescendantNodes() as $descendant) {
|
||||||
|
foreach ($classNames as $className) {
|
||||||
|
if ($descendant instanceof $className) {
|
||||||
|
return $descendant;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets root of the syntax tree (returns self if has no parents)
|
* Gets root of the syntax tree (returns self if has no parents)
|
||||||
* @return Node
|
* @return Node
|
||||||
|
@ -349,6 +385,7 @@ class Node implements \JsonSerializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public function & getFileContents() : string {
|
public function & getFileContents() : string {
|
||||||
|
// TODO consider renaming to getSourceText
|
||||||
return $this->getRoot()->fileContents;
|
return $this->getRoot()->fileContents;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -369,6 +406,20 @@ class Node implements \JsonSerializable {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function getDocCommentText() {
|
||||||
|
$leadingTriviaText = $this->getLeadingCommentAndWhitespaceText();
|
||||||
|
$leadingTriviaTokens = PhpTokenizer::getTokensArrayFromContent(
|
||||||
|
$leadingTriviaText, ParseContext::SourceElements, $this->getFullStart(), false
|
||||||
|
);
|
||||||
|
for ($i = \count($leadingTriviaTokens) - 1; $i >= 0; $i--) {
|
||||||
|
$token = $leadingTriviaTokens[$i];
|
||||||
|
if ($token->kind === TokenKind::DocCommentToken) {
|
||||||
|
return $token->getText($this->getFileContents());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
public function __toString() {
|
public function __toString() {
|
||||||
return $this->getText();
|
return $this->getText();
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
<?php
|
||||||
|
/*---------------------------------------------------------------------------------------------
|
||||||
|
* Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||||||
|
*--------------------------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Microsoft\PhpParser;
|
||||||
|
|
||||||
|
class ParseContext {
|
||||||
|
const SourceElements = 0;
|
||||||
|
const BlockStatements = 1;
|
||||||
|
const ClassMembers = 2;
|
||||||
|
const IfClause2Elements = 3;
|
||||||
|
const SwitchStatementElements = 4;
|
||||||
|
const CaseStatementElements = 5;
|
||||||
|
const WhileStatementElements = 6;
|
||||||
|
const ForStatementElements = 7;
|
||||||
|
const ForeachStatementElements = 8;
|
||||||
|
const DeclareStatementElements = 9;
|
||||||
|
const InterfaceMembers = 10;
|
||||||
|
const TraitMembers = 11;
|
||||||
|
const Count = 12;
|
||||||
|
}
|
|
@ -2927,20 +2927,4 @@ class Associativity {
|
||||||
const None = 0;
|
const None = 0;
|
||||||
const Left = 1;
|
const Left = 1;
|
||||||
const Right = 2;
|
const Right = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
class ParseContext {
|
|
||||||
const SourceElements = 0;
|
|
||||||
const BlockStatements = 1;
|
|
||||||
const ClassMembers = 2;
|
|
||||||
const IfClause2Elements = 3;
|
|
||||||
const SwitchStatementElements = 4;
|
|
||||||
const CaseStatementElements = 5;
|
|
||||||
const WhileStatementElements = 6;
|
|
||||||
const ForStatementElements = 7;
|
|
||||||
const ForeachStatementElements = 8;
|
|
||||||
const DeclareStatementElements = 9;
|
|
||||||
const InterfaceMembers = 10;
|
|
||||||
const TraitMembers = 11;
|
|
||||||
const Count = 12;
|
|
||||||
}
|
|
|
@ -6,18 +6,24 @@
|
||||||
|
|
||||||
namespace Microsoft\PhpParser;
|
namespace Microsoft\PhpParser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tokenizes content using PHP's built-in `tokens_get_all`, and converts to "lightweight" Token representation.
|
||||||
|
*
|
||||||
|
* Initially we tried hand-spinning the lexer (see `experiments/Lexer.php`), but we had difficulties optimizing
|
||||||
|
* performance (especially when working with Unicode characters.)
|
||||||
|
*
|
||||||
|
* Class PhpTokenizer
|
||||||
|
* @package Microsoft\PhpParser
|
||||||
|
*/
|
||||||
class PhpTokenizer implements ITokenStreamProvider {
|
class PhpTokenizer implements ITokenStreamProvider {
|
||||||
public $pos;
|
public $pos;
|
||||||
public $endOfFilePos;
|
public $endOfFilePos;
|
||||||
private $token;
|
|
||||||
|
|
||||||
public $inScriptSection = false;
|
|
||||||
|
|
||||||
private $tokensArray;
|
private $tokensArray;
|
||||||
|
|
||||||
public function __construct($content) {
|
public function __construct($content) {
|
||||||
$tokens = \token_get_all($content);
|
$this->tokensArray = $this->getTokensArrayFromContent($content);
|
||||||
$this->initialize($tokens);
|
$this->endOfFilePos = \count($this->tokensArray) - 1;
|
||||||
$this->pos = 0;
|
$this->pos = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -43,11 +49,19 @@ class PhpTokenizer implements ITokenStreamProvider {
|
||||||
return $this->tokensArray;
|
return $this->tokensArray;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function initialize($tokens) {
|
public static function getTokensArrayFromContent(
|
||||||
|
$content, $parseContext = null, $initialPos = 0, $treatCommentsAsTrivia = true
|
||||||
|
) : array {
|
||||||
|
if ($parseContext !== null) {
|
||||||
|
$prefix = self::PARSE_CONTEXT_TO_PREFIX[$parseContext];
|
||||||
|
$content = $prefix . $content;
|
||||||
|
$passedPrefix = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
$tokens = \token_get_all($content);
|
||||||
|
|
||||||
$arr = array();
|
$arr = array();
|
||||||
$fullStart = 0;
|
$fullStart = $start = $pos = $initialPos;
|
||||||
$start = 0;
|
|
||||||
$pos = 0;
|
|
||||||
|
|
||||||
foreach ($tokens as $token) {
|
foreach ($tokens as $token) {
|
||||||
if (\is_array($token)) {
|
if (\is_array($token)) {
|
||||||
|
@ -60,6 +74,14 @@ class PhpTokenizer implements ITokenStreamProvider {
|
||||||
|
|
||||||
$pos += $strlen;
|
$pos += $strlen;
|
||||||
|
|
||||||
|
if ($parseContext !== null && !$passedPrefix) {
|
||||||
|
$passedPrefix = \count($prefix) < $pos;
|
||||||
|
if ($passedPrefix) {
|
||||||
|
$fullStart = $start = $pos = $initialPos;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
switch ($tokenKind) {
|
switch ($tokenKind) {
|
||||||
case T_OPEN_TAG:
|
case T_OPEN_TAG:
|
||||||
$arr[] = new Token(TokenKind::ScriptSectionStartTag, $fullStart, $start, $pos-$fullStart);
|
$arr[] = new Token(TokenKind::ScriptSectionStartTag, $fullStart, $start, $pos-$fullStart);
|
||||||
|
@ -67,8 +89,6 @@ class PhpTokenizer implements ITokenStreamProvider {
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
case T_WHITESPACE:
|
case T_WHITESPACE:
|
||||||
case T_COMMENT:
|
|
||||||
case T_DOC_COMMENT:
|
|
||||||
$start += $strlen;
|
$start += $strlen;
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -82,6 +102,11 @@ class PhpTokenizer implements ITokenStreamProvider {
|
||||||
}
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
if (($tokenKind === T_COMMENT || $tokenKind === T_DOC_COMMENT) && $treatCommentsAsTrivia) {
|
||||||
|
$start += $strlen;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
$newTokenKind = isset(self::TOKEN_MAP[$tokenKind])
|
$newTokenKind = isset(self::TOKEN_MAP[$tokenKind])
|
||||||
? self::TOKEN_MAP[$tokenKind]
|
? self::TOKEN_MAP[$tokenKind]
|
||||||
: $newTokenKind = TokenKind::Unknown;
|
: $newTokenKind = TokenKind::Unknown;
|
||||||
|
@ -92,8 +117,7 @@ class PhpTokenizer implements ITokenStreamProvider {
|
||||||
}
|
}
|
||||||
|
|
||||||
$arr[] = new Token(TokenKind::EndOfFileToken, $fullStart, $start, $pos - $fullStart);
|
$arr[] = new Token(TokenKind::EndOfFileToken, $fullStart, $start, $pos - $fullStart);
|
||||||
$this->tokensArray = $arr;
|
return $arr;
|
||||||
$this->endOfFilePos = \count($arr) - 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const TOKEN_MAP = [
|
const TOKEN_MAP = [
|
||||||
|
@ -267,6 +291,12 @@ class PhpTokenizer implements ITokenStreamProvider {
|
||||||
T_UNSET_CAST => TokenKind::UnsetCastToken,
|
T_UNSET_CAST => TokenKind::UnsetCastToken,
|
||||||
T_START_HEREDOC => TokenKind::HeredocStart,
|
T_START_HEREDOC => TokenKind::HeredocStart,
|
||||||
T_END_HEREDOC => TokenKind::HeredocEnd,
|
T_END_HEREDOC => TokenKind::HeredocEnd,
|
||||||
T_STRING_VARNAME => TokenKind::VariableName
|
T_STRING_VARNAME => TokenKind::VariableName,
|
||||||
|
T_COMMENT => TokenKind::CommentToken,
|
||||||
|
T_DOC_COMMENT => TokenKind::DocCommentToken
|
||||||
|
];
|
||||||
|
|
||||||
|
const PARSE_CONTEXT_TO_PREFIX = [
|
||||||
|
ParseContext::SourceElements => "<?php "
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
|
@ -212,6 +212,8 @@ class TokenKind {
|
||||||
const BoolCastToken = 414;
|
const BoolCastToken = 414;
|
||||||
const ArrayCastToken = 415;
|
const ArrayCastToken = 415;
|
||||||
const IntegerLiteralToken = 416;
|
const IntegerLiteralToken = 416;
|
||||||
|
const CommentToken = 417;
|
||||||
|
const DocCommentToken = 418;
|
||||||
|
|
||||||
// TODO type annotations - PHP7
|
// TODO type annotations - PHP7
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,18 +4,12 @@
|
||||||
* Licensed under the MIT License. See License.txt in the project root for license information.
|
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||||||
*--------------------------------------------------------------------------------------------*/
|
*--------------------------------------------------------------------------------------------*/
|
||||||
|
|
||||||
// TODO autoload classes
|
|
||||||
require_once(__DIR__ . "/../src/TokenStreamProviderFactory.php");
|
|
||||||
require_once(__DIR__ . "/../src/Parser.php");
|
|
||||||
require_once(__DIR__ . "/../src/Token.php");
|
|
||||||
|
|
||||||
use Microsoft\PhpParser\Node;
|
|
||||||
use Microsoft\PhpParser\Node\SourceFileNode;
|
use Microsoft\PhpParser\Node\SourceFileNode;
|
||||||
|
use Microsoft\PhpParser\Node\Statement\FunctionDeclaration;
|
||||||
use Microsoft\PhpParser\Node\Statement\IfStatementNode;
|
use Microsoft\PhpParser\Node\Statement\IfStatementNode;
|
||||||
use Microsoft\PhpParser\Node\Statement\NamespaceDefinition;
|
use Microsoft\PhpParser\Node\Statement\NamespaceDefinition;
|
||||||
use Microsoft\PhpParser\Parser;
|
use Microsoft\PhpParser\Parser;
|
||||||
use PHPUnit\Framework\TestCase;
|
use PHPUnit\Framework\TestCase;
|
||||||
use Microsoft\PhpParser\TokenKind;
|
|
||||||
|
|
||||||
class NodeApiTest extends TestCase {
|
class NodeApiTest extends TestCase {
|
||||||
const FILENAME_PATTERN = __dir__ . "/cases/{parser,}/*.php";
|
const FILENAME_PATTERN = __dir__ . "/cases/{parser,}/*.php";
|
||||||
|
@ -38,7 +32,7 @@ PHP;
|
||||||
|
|
||||||
public function testSourceFileNodePosition() {
|
public function testSourceFileNodePosition() {
|
||||||
$node = self::$sourceFileNode;
|
$node = self::$sourceFileNode;
|
||||||
$this->assertInstanceOf(\Microsoft\PhpParser\Node\Statement\FunctionDeclaration::class, $node->getDescendantNodeAtPosition(15));
|
$this->assertInstanceOf(FunctionDeclaration::class, $node->getDescendantNodeAtPosition(15));
|
||||||
$this->assertInstanceOf(\Microsoft\PhpParser\Node\Expression\Variable::class, $node->getDescendantNodeAtPosition(28));
|
$this->assertInstanceOf(\Microsoft\PhpParser\Node\Expression\Variable::class, $node->getDescendantNodeAtPosition(28));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -135,4 +129,65 @@ PHP;
|
||||||
"getFirstAncestor with no specified class names should return null."
|
"getFirstAncestor with no specified class names should return null."
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testGetDocCommentText() {
|
||||||
|
$this->AssertDocCommentTextOfNode(
|
||||||
|
FunctionDeclaration::class,
|
||||||
|
"<?php /** */ function b () { }",
|
||||||
|
"/** */"
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->AssertDocCommentTextOfNode(
|
||||||
|
FunctionDeclaration::class,
|
||||||
|
"<?php /***/ function b () { }",
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->AssertDocCommentTextOfNode(
|
||||||
|
FunctionDeclaration::class,
|
||||||
|
"<?php /*/** */ function b () { }",
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->AssertDocCommentTextOfNode(
|
||||||
|
FunctionDeclaration::class,
|
||||||
|
"<?php /**d */ function b () { }",
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->AssertDocCommentTextOfNode(
|
||||||
|
FunctionDeclaration::class,
|
||||||
|
"<?php /** hello */\n/** */ function b () { }",
|
||||||
|
"/** */"
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->AssertDocCommentTextOfNode(
|
||||||
|
FunctionDeclaration::class,
|
||||||
|
"<?php /** hello */\n/**\n*/ function b () { }",
|
||||||
|
"/**\n*/"
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->AssertDocCommentTextOfNode(
|
||||||
|
FunctionDeclaration::class,
|
||||||
|
"<?php function b () { }",
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->AssertDocCommentTextOfNode(
|
||||||
|
\Microsoft\PhpParser\Node\Statement\InlineHtml::class,
|
||||||
|
"/** hello */ <?php function b () { }",
|
||||||
|
null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function AssertDocCommentTextOfNode($nodeKind, $contents, $expectedDocCommentText) : array {
|
||||||
|
$parser = new Parser();
|
||||||
|
$ast = $parser->parseSourceFile($contents);
|
||||||
|
$functionDeclaration = $ast->getFirstDescendantNode($nodeKind);
|
||||||
|
$this->assertEquals(
|
||||||
|
$expectedDocCommentText,
|
||||||
|
$functionDeclaration->getDocCommentText()
|
||||||
|
);
|
||||||
|
return array($contents, $parser, $ast, $functionDeclaration);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Загрузка…
Ссылка в новой задаче