#88 Add API for `getDocCommentText`, `getFirst*Node`
- reuse PhpTokenizer rather than constructing a separate regex because we'll need this sort of context-specific re-tokenizing functionality for incremental parsing anyways. - functional, but still a work-in-progress, just like the rest of the API :) Also moved hanspun lexer to experiments folder
This commit is contained in:
Родитель
143c44a4d6
Коммит
7a58ceaeb8
51
src/Node.php
51
src/Node.php
|
@ -77,6 +77,42 @@ class Node implements \JsonSerializable {
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get's first child that is an instance of one of the provided classes.
|
||||
* Returns null if there is no match.
|
||||
*
|
||||
* @param array ...$classNames
|
||||
* @return Node|null
|
||||
*/
|
||||
public function getFirstChildNode(...$classNames) {
|
||||
foreach ($this->getChildNodes() as $child) {
|
||||
foreach ($classNames as $className) {
|
||||
if ($child instanceof $className) {
|
||||
return $child;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get's first descendant node that is an instance of one of the provided classes.
|
||||
* Returns null if there is no match.
|
||||
*
|
||||
* @param array ...$classNames
|
||||
* @return Node|null
|
||||
*/
|
||||
public function getFirstDescendantNode(...$classNames) {
|
||||
foreach ($this->getDescendantNodes() as $descendant) {
|
||||
foreach ($classNames as $className) {
|
||||
if ($descendant instanceof $className) {
|
||||
return $descendant;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets root of the syntax tree (returns self if has no parents)
|
||||
* @return Node
|
||||
|
@ -349,6 +385,7 @@ class Node implements \JsonSerializable {
|
|||
}
|
||||
|
||||
public function & getFileContents() : string {
|
||||
// TODO consider renaming to getSourceText
|
||||
return $this->getRoot()->fileContents;
|
||||
}
|
||||
|
||||
|
@ -369,6 +406,20 @@ class Node implements \JsonSerializable {
|
|||
return null;
|
||||
}
|
||||
|
||||
public function getDocCommentText() {
|
||||
$leadingTriviaText = $this->getLeadingCommentAndWhitespaceText();
|
||||
$leadingTriviaTokens = PhpTokenizer::getTokensArrayFromContent(
|
||||
$leadingTriviaText, ParseContext::SourceElements, $this->getFullStart(), false
|
||||
);
|
||||
for ($i = \count($leadingTriviaTokens) - 1; $i >= 0; $i--) {
|
||||
$token = $leadingTriviaTokens[$i];
|
||||
if ($token->kind === TokenKind::DocCommentToken) {
|
||||
return $token->getText($this->getFileContents());
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public function __toString() {
|
||||
return $this->getText();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
<?php
|
||||
/*---------------------------------------------------------------------------------------------
|
||||
* Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||||
*--------------------------------------------------------------------------------------------*/
|
||||
|
||||
namespace Microsoft\PhpParser;
|
||||
|
||||
class ParseContext {
|
||||
const SourceElements = 0;
|
||||
const BlockStatements = 1;
|
||||
const ClassMembers = 2;
|
||||
const IfClause2Elements = 3;
|
||||
const SwitchStatementElements = 4;
|
||||
const CaseStatementElements = 5;
|
||||
const WhileStatementElements = 6;
|
||||
const ForStatementElements = 7;
|
||||
const ForeachStatementElements = 8;
|
||||
const DeclareStatementElements = 9;
|
||||
const InterfaceMembers = 10;
|
||||
const TraitMembers = 11;
|
||||
const Count = 12;
|
||||
}
|
|
@ -2927,20 +2927,4 @@ class Associativity {
|
|||
const None = 0;
|
||||
const Left = 1;
|
||||
const Right = 2;
|
||||
}
|
||||
|
||||
class ParseContext {
|
||||
const SourceElements = 0;
|
||||
const BlockStatements = 1;
|
||||
const ClassMembers = 2;
|
||||
const IfClause2Elements = 3;
|
||||
const SwitchStatementElements = 4;
|
||||
const CaseStatementElements = 5;
|
||||
const WhileStatementElements = 6;
|
||||
const ForStatementElements = 7;
|
||||
const ForeachStatementElements = 8;
|
||||
const DeclareStatementElements = 9;
|
||||
const InterfaceMembers = 10;
|
||||
const TraitMembers = 11;
|
||||
const Count = 12;
|
||||
}
|
||||
}
|
|
@ -6,18 +6,24 @@
|
|||
|
||||
namespace Microsoft\PhpParser;
|
||||
|
||||
/**
|
||||
* Tokenizes content using PHP's built-in `tokens_get_all`, and converts to "lightweight" Token representation.
|
||||
*
|
||||
* Initially we tried hand-spinning the lexer (see `experiments/Lexer.php`), but we had difficulties optimizing
|
||||
* performance (especially when working with Unicode characters.)
|
||||
*
|
||||
* Class PhpTokenizer
|
||||
* @package Microsoft\PhpParser
|
||||
*/
|
||||
class PhpTokenizer implements ITokenStreamProvider {
|
||||
public $pos;
|
||||
public $endOfFilePos;
|
||||
private $token;
|
||||
|
||||
public $inScriptSection = false;
|
||||
|
||||
private $tokensArray;
|
||||
|
||||
public function __construct($content) {
|
||||
$tokens = \token_get_all($content);
|
||||
$this->initialize($tokens);
|
||||
$this->tokensArray = $this->getTokensArrayFromContent($content);
|
||||
$this->endOfFilePos = \count($this->tokensArray) - 1;
|
||||
$this->pos = 0;
|
||||
}
|
||||
|
||||
|
@ -43,11 +49,19 @@ class PhpTokenizer implements ITokenStreamProvider {
|
|||
return $this->tokensArray;
|
||||
}
|
||||
|
||||
private function initialize($tokens) {
|
||||
public static function getTokensArrayFromContent(
|
||||
$content, $parseContext = null, $initialPos = 0, $treatCommentsAsTrivia = true
|
||||
) : array {
|
||||
if ($parseContext !== null) {
|
||||
$prefix = self::PARSE_CONTEXT_TO_PREFIX[$parseContext];
|
||||
$content = $prefix . $content;
|
||||
$passedPrefix = false;
|
||||
}
|
||||
|
||||
$tokens = \token_get_all($content);
|
||||
|
||||
$arr = array();
|
||||
$fullStart = 0;
|
||||
$start = 0;
|
||||
$pos = 0;
|
||||
$fullStart = $start = $pos = $initialPos;
|
||||
|
||||
foreach ($tokens as $token) {
|
||||
if (\is_array($token)) {
|
||||
|
@ -60,6 +74,14 @@ class PhpTokenizer implements ITokenStreamProvider {
|
|||
|
||||
$pos += $strlen;
|
||||
|
||||
if ($parseContext !== null && !$passedPrefix) {
|
||||
$passedPrefix = \count($prefix) < $pos;
|
||||
if ($passedPrefix) {
|
||||
$fullStart = $start = $pos = $initialPos;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
switch ($tokenKind) {
|
||||
case T_OPEN_TAG:
|
||||
$arr[] = new Token(TokenKind::ScriptSectionStartTag, $fullStart, $start, $pos-$fullStart);
|
||||
|
@ -67,8 +89,6 @@ class PhpTokenizer implements ITokenStreamProvider {
|
|||
continue;
|
||||
|
||||
case T_WHITESPACE:
|
||||
case T_COMMENT:
|
||||
case T_DOC_COMMENT:
|
||||
$start += $strlen;
|
||||
continue;
|
||||
|
||||
|
@ -82,6 +102,11 @@ class PhpTokenizer implements ITokenStreamProvider {
|
|||
}
|
||||
|
||||
default:
|
||||
if (($tokenKind === T_COMMENT || $tokenKind === T_DOC_COMMENT) && $treatCommentsAsTrivia) {
|
||||
$start += $strlen;
|
||||
continue;
|
||||
}
|
||||
|
||||
$newTokenKind = isset(self::TOKEN_MAP[$tokenKind])
|
||||
? self::TOKEN_MAP[$tokenKind]
|
||||
: $newTokenKind = TokenKind::Unknown;
|
||||
|
@ -92,8 +117,7 @@ class PhpTokenizer implements ITokenStreamProvider {
|
|||
}
|
||||
|
||||
$arr[] = new Token(TokenKind::EndOfFileToken, $fullStart, $start, $pos - $fullStart);
|
||||
$this->tokensArray = $arr;
|
||||
$this->endOfFilePos = \count($arr) - 1;
|
||||
return $arr;
|
||||
}
|
||||
|
||||
const TOKEN_MAP = [
|
||||
|
@ -267,6 +291,12 @@ class PhpTokenizer implements ITokenStreamProvider {
|
|||
T_UNSET_CAST => TokenKind::UnsetCastToken,
|
||||
T_START_HEREDOC => TokenKind::HeredocStart,
|
||||
T_END_HEREDOC => TokenKind::HeredocEnd,
|
||||
T_STRING_VARNAME => TokenKind::VariableName
|
||||
T_STRING_VARNAME => TokenKind::VariableName,
|
||||
T_COMMENT => TokenKind::CommentToken,
|
||||
T_DOC_COMMENT => TokenKind::DocCommentToken
|
||||
];
|
||||
|
||||
const PARSE_CONTEXT_TO_PREFIX = [
|
||||
ParseContext::SourceElements => "<?php "
|
||||
];
|
||||
}
|
||||
|
|
|
@ -212,6 +212,8 @@ class TokenKind {
|
|||
const BoolCastToken = 414;
|
||||
const ArrayCastToken = 415;
|
||||
const IntegerLiteralToken = 416;
|
||||
const CommentToken = 417;
|
||||
const DocCommentToken = 418;
|
||||
|
||||
// TODO type annotations - PHP7
|
||||
}
|
||||
|
|
|
@ -4,18 +4,12 @@
|
|||
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||||
*--------------------------------------------------------------------------------------------*/
|
||||
|
||||
// TODO autoload classes
|
||||
require_once(__DIR__ . "/../src/TokenStreamProviderFactory.php");
|
||||
require_once(__DIR__ . "/../src/Parser.php");
|
||||
require_once(__DIR__ . "/../src/Token.php");
|
||||
|
||||
use Microsoft\PhpParser\Node;
|
||||
use Microsoft\PhpParser\Node\SourceFileNode;
|
||||
use Microsoft\PhpParser\Node\Statement\FunctionDeclaration;
|
||||
use Microsoft\PhpParser\Node\Statement\IfStatementNode;
|
||||
use Microsoft\PhpParser\Node\Statement\NamespaceDefinition;
|
||||
use Microsoft\PhpParser\Parser;
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Microsoft\PhpParser\TokenKind;
|
||||
|
||||
class NodeApiTest extends TestCase {
|
||||
const FILENAME_PATTERN = __dir__ . "/cases/{parser,}/*.php";
|
||||
|
@ -38,7 +32,7 @@ PHP;
|
|||
|
||||
public function testSourceFileNodePosition() {
|
||||
$node = self::$sourceFileNode;
|
||||
$this->assertInstanceOf(\Microsoft\PhpParser\Node\Statement\FunctionDeclaration::class, $node->getDescendantNodeAtPosition(15));
|
||||
$this->assertInstanceOf(FunctionDeclaration::class, $node->getDescendantNodeAtPosition(15));
|
||||
$this->assertInstanceOf(\Microsoft\PhpParser\Node\Expression\Variable::class, $node->getDescendantNodeAtPosition(28));
|
||||
}
|
||||
|
||||
|
@ -135,4 +129,65 @@ PHP;
|
|||
"getFirstAncestor with no specified class names should return null."
|
||||
);
|
||||
}
|
||||
|
||||
public function testGetDocCommentText() {
|
||||
$this->AssertDocCommentTextOfNode(
|
||||
FunctionDeclaration::class,
|
||||
"<?php /** */ function b () { }",
|
||||
"/** */"
|
||||
);
|
||||
|
||||
$this->AssertDocCommentTextOfNode(
|
||||
FunctionDeclaration::class,
|
||||
"<?php /***/ function b () { }",
|
||||
null
|
||||
);
|
||||
|
||||
$this->AssertDocCommentTextOfNode(
|
||||
FunctionDeclaration::class,
|
||||
"<?php /*/** */ function b () { }",
|
||||
null
|
||||
);
|
||||
|
||||
$this->AssertDocCommentTextOfNode(
|
||||
FunctionDeclaration::class,
|
||||
"<?php /**d */ function b () { }",
|
||||
null
|
||||
);
|
||||
|
||||
$this->AssertDocCommentTextOfNode(
|
||||
FunctionDeclaration::class,
|
||||
"<?php /** hello */\n/** */ function b () { }",
|
||||
"/** */"
|
||||
);
|
||||
|
||||
$this->AssertDocCommentTextOfNode(
|
||||
FunctionDeclaration::class,
|
||||
"<?php /** hello */\n/**\n*/ function b () { }",
|
||||
"/**\n*/"
|
||||
);
|
||||
|
||||
$this->AssertDocCommentTextOfNode(
|
||||
FunctionDeclaration::class,
|
||||
"<?php function b () { }",
|
||||
null
|
||||
);
|
||||
|
||||
$this->AssertDocCommentTextOfNode(
|
||||
\Microsoft\PhpParser\Node\Statement\InlineHtml::class,
|
||||
"/** hello */ <?php function b () { }",
|
||||
null
|
||||
);
|
||||
}
|
||||
|
||||
private function AssertDocCommentTextOfNode($nodeKind, $contents, $expectedDocCommentText) : array {
|
||||
$parser = new Parser();
|
||||
$ast = $parser->parseSourceFile($contents);
|
||||
$functionDeclaration = $ast->getFirstDescendantNode($nodeKind);
|
||||
$this->assertEquals(
|
||||
$expectedDocCommentText,
|
||||
$functionDeclaration->getDocCommentText()
|
||||
);
|
||||
return array($contents, $parser, $ast, $functionDeclaration);
|
||||
}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче