#88 Add API for `getDocCommentText`, `getFirst*Node`

- reuse PhpTokenizer rather than constructing a separate regex because we'll need this sort of context-specific re-tokenizing functionality for incremental parsing anyways.
- functional, but still a work-in-progress, just like the rest of the API :)

Also moved hanspun lexer to experiments folder
This commit is contained in:
Sara Itani 2017-02-07 15:44:21 -08:00
Родитель 143c44a4d6
Коммит 7a58ceaeb8
7 изменённых файлов: 184 добавлений и 39 удалений

Просмотреть файл

Просмотреть файл

@ -77,6 +77,42 @@ class Node implements \JsonSerializable {
return null; return null;
} }
/**
* Get's first child that is an instance of one of the provided classes.
* Returns null if there is no match.
*
* @param array ...$classNames
* @return Node|null
*/
public function getFirstChildNode(...$classNames) {
foreach ($this->getChildNodes() as $child) {
foreach ($classNames as $className) {
if ($child instanceof $className) {
return $child;
}
}
}
return null;
}
/**
* Get's first descendant node that is an instance of one of the provided classes.
* Returns null if there is no match.
*
* @param array ...$classNames
* @return Node|null
*/
public function getFirstDescendantNode(...$classNames) {
foreach ($this->getDescendantNodes() as $descendant) {
foreach ($classNames as $className) {
if ($descendant instanceof $className) {
return $descendant;
}
}
}
return null;
}
/** /**
* Gets root of the syntax tree (returns self if has no parents) * Gets root of the syntax tree (returns self if has no parents)
* @return Node * @return Node
@ -349,6 +385,7 @@ class Node implements \JsonSerializable {
} }
public function & getFileContents() : string { public function & getFileContents() : string {
// TODO consider renaming to getSourceText
return $this->getRoot()->fileContents; return $this->getRoot()->fileContents;
} }
@ -369,6 +406,20 @@ class Node implements \JsonSerializable {
return null; return null;
} }
public function getDocCommentText() {
$leadingTriviaText = $this->getLeadingCommentAndWhitespaceText();
$leadingTriviaTokens = PhpTokenizer::getTokensArrayFromContent(
$leadingTriviaText, ParseContext::SourceElements, $this->getFullStart(), false
);
for ($i = \count($leadingTriviaTokens) - 1; $i >= 0; $i--) {
$token = $leadingTriviaTokens[$i];
if ($token->kind === TokenKind::DocCommentToken) {
return $token->getText($this->getFileContents());
}
}
return null;
}
public function __toString() { public function __toString() {
return $this->getText(); return $this->getText();
} }

23
src/ParseContext.php Normal file
Просмотреть файл

@ -0,0 +1,23 @@
<?php
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
namespace Microsoft\PhpParser;
class ParseContext {
const SourceElements = 0;
const BlockStatements = 1;
const ClassMembers = 2;
const IfClause2Elements = 3;
const SwitchStatementElements = 4;
const CaseStatementElements = 5;
const WhileStatementElements = 6;
const ForStatementElements = 7;
const ForeachStatementElements = 8;
const DeclareStatementElements = 9;
const InterfaceMembers = 10;
const TraitMembers = 11;
const Count = 12;
}

Просмотреть файл

@ -2927,20 +2927,4 @@ class Associativity {
const None = 0; const None = 0;
const Left = 1; const Left = 1;
const Right = 2; const Right = 2;
} }
class ParseContext {
const SourceElements = 0;
const BlockStatements = 1;
const ClassMembers = 2;
const IfClause2Elements = 3;
const SwitchStatementElements = 4;
const CaseStatementElements = 5;
const WhileStatementElements = 6;
const ForStatementElements = 7;
const ForeachStatementElements = 8;
const DeclareStatementElements = 9;
const InterfaceMembers = 10;
const TraitMembers = 11;
const Count = 12;
}

Просмотреть файл

@ -6,18 +6,24 @@
namespace Microsoft\PhpParser; namespace Microsoft\PhpParser;
/**
* Tokenizes content using PHP's built-in `tokens_get_all`, and converts to "lightweight" Token representation.
*
* Initially we tried hand-spinning the lexer (see `experiments/Lexer.php`), but we had difficulties optimizing
* performance (especially when working with Unicode characters.)
*
* Class PhpTokenizer
* @package Microsoft\PhpParser
*/
class PhpTokenizer implements ITokenStreamProvider { class PhpTokenizer implements ITokenStreamProvider {
public $pos; public $pos;
public $endOfFilePos; public $endOfFilePos;
private $token;
public $inScriptSection = false;
private $tokensArray; private $tokensArray;
public function __construct($content) { public function __construct($content) {
$tokens = \token_get_all($content); $this->tokensArray = $this->getTokensArrayFromContent($content);
$this->initialize($tokens); $this->endOfFilePos = \count($this->tokensArray) - 1;
$this->pos = 0; $this->pos = 0;
} }
@ -43,11 +49,19 @@ class PhpTokenizer implements ITokenStreamProvider {
return $this->tokensArray; return $this->tokensArray;
} }
private function initialize($tokens) { public static function getTokensArrayFromContent(
$content, $parseContext = null, $initialPos = 0, $treatCommentsAsTrivia = true
) : array {
if ($parseContext !== null) {
$prefix = self::PARSE_CONTEXT_TO_PREFIX[$parseContext];
$content = $prefix . $content;
$passedPrefix = false;
}
$tokens = \token_get_all($content);
$arr = array(); $arr = array();
$fullStart = 0; $fullStart = $start = $pos = $initialPos;
$start = 0;
$pos = 0;
foreach ($tokens as $token) { foreach ($tokens as $token) {
if (\is_array($token)) { if (\is_array($token)) {
@ -60,6 +74,14 @@ class PhpTokenizer implements ITokenStreamProvider {
$pos += $strlen; $pos += $strlen;
if ($parseContext !== null && !$passedPrefix) {
$passedPrefix = \count($prefix) < $pos;
if ($passedPrefix) {
$fullStart = $start = $pos = $initialPos;
}
continue;
}
switch ($tokenKind) { switch ($tokenKind) {
case T_OPEN_TAG: case T_OPEN_TAG:
$arr[] = new Token(TokenKind::ScriptSectionStartTag, $fullStart, $start, $pos-$fullStart); $arr[] = new Token(TokenKind::ScriptSectionStartTag, $fullStart, $start, $pos-$fullStart);
@ -67,8 +89,6 @@ class PhpTokenizer implements ITokenStreamProvider {
continue; continue;
case T_WHITESPACE: case T_WHITESPACE:
case T_COMMENT:
case T_DOC_COMMENT:
$start += $strlen; $start += $strlen;
continue; continue;
@ -82,6 +102,11 @@ class PhpTokenizer implements ITokenStreamProvider {
} }
default: default:
if (($tokenKind === T_COMMENT || $tokenKind === T_DOC_COMMENT) && $treatCommentsAsTrivia) {
$start += $strlen;
continue;
}
$newTokenKind = isset(self::TOKEN_MAP[$tokenKind]) $newTokenKind = isset(self::TOKEN_MAP[$tokenKind])
? self::TOKEN_MAP[$tokenKind] ? self::TOKEN_MAP[$tokenKind]
: $newTokenKind = TokenKind::Unknown; : $newTokenKind = TokenKind::Unknown;
@ -92,8 +117,7 @@ class PhpTokenizer implements ITokenStreamProvider {
} }
$arr[] = new Token(TokenKind::EndOfFileToken, $fullStart, $start, $pos - $fullStart); $arr[] = new Token(TokenKind::EndOfFileToken, $fullStart, $start, $pos - $fullStart);
$this->tokensArray = $arr; return $arr;
$this->endOfFilePos = \count($arr) - 1;
} }
const TOKEN_MAP = [ const TOKEN_MAP = [
@ -267,6 +291,12 @@ class PhpTokenizer implements ITokenStreamProvider {
T_UNSET_CAST => TokenKind::UnsetCastToken, T_UNSET_CAST => TokenKind::UnsetCastToken,
T_START_HEREDOC => TokenKind::HeredocStart, T_START_HEREDOC => TokenKind::HeredocStart,
T_END_HEREDOC => TokenKind::HeredocEnd, T_END_HEREDOC => TokenKind::HeredocEnd,
T_STRING_VARNAME => TokenKind::VariableName T_STRING_VARNAME => TokenKind::VariableName,
T_COMMENT => TokenKind::CommentToken,
T_DOC_COMMENT => TokenKind::DocCommentToken
];
const PARSE_CONTEXT_TO_PREFIX = [
ParseContext::SourceElements => "<?php "
]; ];
} }

Просмотреть файл

@ -212,6 +212,8 @@ class TokenKind {
const BoolCastToken = 414; const BoolCastToken = 414;
const ArrayCastToken = 415; const ArrayCastToken = 415;
const IntegerLiteralToken = 416; const IntegerLiteralToken = 416;
const CommentToken = 417;
const DocCommentToken = 418;
// TODO type annotations - PHP7 // TODO type annotations - PHP7
} }

Просмотреть файл

@ -4,18 +4,12 @@
* Licensed under the MIT License. See License.txt in the project root for license information. * Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/ *--------------------------------------------------------------------------------------------*/
// TODO autoload classes
require_once(__DIR__ . "/../src/TokenStreamProviderFactory.php");
require_once(__DIR__ . "/../src/Parser.php");
require_once(__DIR__ . "/../src/Token.php");
use Microsoft\PhpParser\Node;
use Microsoft\PhpParser\Node\SourceFileNode; use Microsoft\PhpParser\Node\SourceFileNode;
use Microsoft\PhpParser\Node\Statement\FunctionDeclaration;
use Microsoft\PhpParser\Node\Statement\IfStatementNode; use Microsoft\PhpParser\Node\Statement\IfStatementNode;
use Microsoft\PhpParser\Node\Statement\NamespaceDefinition; use Microsoft\PhpParser\Node\Statement\NamespaceDefinition;
use Microsoft\PhpParser\Parser; use Microsoft\PhpParser\Parser;
use PHPUnit\Framework\TestCase; use PHPUnit\Framework\TestCase;
use Microsoft\PhpParser\TokenKind;
class NodeApiTest extends TestCase { class NodeApiTest extends TestCase {
const FILENAME_PATTERN = __dir__ . "/cases/{parser,}/*.php"; const FILENAME_PATTERN = __dir__ . "/cases/{parser,}/*.php";
@ -38,7 +32,7 @@ PHP;
public function testSourceFileNodePosition() { public function testSourceFileNodePosition() {
$node = self::$sourceFileNode; $node = self::$sourceFileNode;
$this->assertInstanceOf(\Microsoft\PhpParser\Node\Statement\FunctionDeclaration::class, $node->getDescendantNodeAtPosition(15)); $this->assertInstanceOf(FunctionDeclaration::class, $node->getDescendantNodeAtPosition(15));
$this->assertInstanceOf(\Microsoft\PhpParser\Node\Expression\Variable::class, $node->getDescendantNodeAtPosition(28)); $this->assertInstanceOf(\Microsoft\PhpParser\Node\Expression\Variable::class, $node->getDescendantNodeAtPosition(28));
} }
@ -135,4 +129,65 @@ PHP;
"getFirstAncestor with no specified class names should return null." "getFirstAncestor with no specified class names should return null."
); );
} }
public function testGetDocCommentText() {
$this->AssertDocCommentTextOfNode(
FunctionDeclaration::class,
"<?php /** */ function b () { }",
"/** */"
);
$this->AssertDocCommentTextOfNode(
FunctionDeclaration::class,
"<?php /***/ function b () { }",
null
);
$this->AssertDocCommentTextOfNode(
FunctionDeclaration::class,
"<?php /*/** */ function b () { }",
null
);
$this->AssertDocCommentTextOfNode(
FunctionDeclaration::class,
"<?php /**d */ function b () { }",
null
);
$this->AssertDocCommentTextOfNode(
FunctionDeclaration::class,
"<?php /** hello */\n/** */ function b () { }",
"/** */"
);
$this->AssertDocCommentTextOfNode(
FunctionDeclaration::class,
"<?php /** hello */\n/**\n*/ function b () { }",
"/**\n*/"
);
$this->AssertDocCommentTextOfNode(
FunctionDeclaration::class,
"<?php function b () { }",
null
);
$this->AssertDocCommentTextOfNode(
\Microsoft\PhpParser\Node\Statement\InlineHtml::class,
"/** hello */ <?php function b () { }",
null
);
}
private function AssertDocCommentTextOfNode($nodeKind, $contents, $expectedDocCommentText) : array {
$parser = new Parser();
$ast = $parser->parseSourceFile($contents);
$functionDeclaration = $ast->getFirstDescendantNode($nodeKind);
$this->assertEquals(
$expectedDocCommentText,
$functionDeclaration->getDocCommentText()
);
return array($contents, $parser, $ast, $functionDeclaration);
}
} }