Merge pull request #212 from TysonAndre/add-FilePositionMap

Efficiently look up lines/columns of many nodes
This commit is contained in:
Rob Lourens 2018-02-14 10:38:07 -08:00 коммит произвёл GitHub
Родитель cdac5b2405 d3deb113ea
Коммит 8bbc9a7f81
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 191 добавлений и 0 удалений

137
src/FilePositionMap.php Normal file
Просмотреть файл

@ -0,0 +1,137 @@
<?php
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
namespace Microsoft\PhpParser;
/**
* FilePositionMap can be used to get the line number for a large number of nodes (starting from 1).
* It works the most efficiently when the requested node is close to the previously requested node.
*
* Other designs that weren't chosen:
* - Precomputing all of the start/end offsets when initializing was slower - Some offsets weren't needed, and walking the tree was slower.
* - Caching line numbers for previously requested offsets wasn't really necessary, since offsets are usually close together and weren't requested repeatedly.
*/
class FilePositionMap {
/** @var string the full file contents */
private $fileContents;
/** @var int - Precomputed strlen($file_contents) */
private $fileContentsLength;
/** @var int the 0-based byte offset of the most recent request for a line number. */
private $currentOffset;
/** @var int the line number for $this->currentOffset (updated whenever currentOffset is updated) */
private $lineForCurrentOffset;
public function __construct(string $file_contents) {
$this->fileContents = $file_contents;
$this->fileContentsLength = \strlen($file_contents);
$this->currentOffset = 0;
$this->lineForCurrentOffset = 1;
}
/**
* @param Node $node the node to get the start line for.
* TODO deprecate and merge this and getTokenStartLine into getStartLine
* if https://github.com/Microsoft/tolerant-php-parser/issues/166 is fixed,
* (i.e. if there is a consistent way to get the start offset)
*/
public function getNodeStartLine(Node $node) : int {
return $this->getLineNumberForOffset($node->getStart());
}
/**
* @param Node $node the node to get the start line for.
*/
public function getTokenStartLine(Token $token) : int {
return $this->getLineNumberForOffset($token->start);
}
/**
* @param Node|Token $node
*/
public function getStartLine($node) : int {
if ($node instanceof Token) {
$offset = $node->start;
} else {
$offset = $node->getStart();
}
return $this->getLineNumberForOffset($offset);
}
/**
* @param Node|Token $node
* Similar to getStartLine but includes the column
*/
public function getStartLineCharacterPositionForOffset($node) : LineCharacterPosition {
if ($node instanceof Token) {
$offset = $node->start;
} else {
$offset = $node->getStart();
}
return $this->getLineCharacterPositionForOffset($offset);
}
/** @param Node|Token $node */
public function getEndLine($node) : int {
return $this->getLineNumberForOffset($node->getEndPosition());
}
/**
* @param Node|Token $node
* Similar to getStartLine but includes the column
*/
public function getEndLineCharacterPositionForOffset($node) : LineCharacterPosition {
return $this->getLineCharacterPositionForOffset($node);
}
/**
* @param Node|Token $node
* Similar to getStartLine but includes the column
*/
public function getLineCharacterPositionForOffset(int $offset) : LineCharacterPosition {
$line = $this->getLineNumberForOffset($offset);
$character = $this->getColumnForOffset($offset);
return new LineCharacterPosition($line, $character);
}
public function getLineNumberForOffset(int $offset) : int {
if ($offset < 0) {
$offset = 0;
} elseif ($offset > $this->fileContentsLength) {
$offset = $this->fileContentsLength;
}
$currentOffset = $this->currentOffset;
if ($offset > $currentOffset) {
$this->lineForCurrentOffset += \substr_count($this->fileContents, "\n", $currentOffset, $offset - $currentOffset);
$this->currentOffset = $offset;
} elseif ($offset < $currentOffset) {
$this->lineForCurrentOffset -= \substr_count($this->fileContents, "\n", $offset, $currentOffset - $offset);
$this->currentOffset = $offset;
}
return $this->lineForCurrentOffset;
}
/**
* @return int - gets the 1-based column offset
*/
public function getColumnForOffset(int $offset) : int {
$length = $this->fileContentsLength;
if ($offset <= 1) {
return 1;
} elseif ($offset > $length) {
$offset = $length;
}
// Postcondition: offset >= 1, ($lastNewlinePos < $offset)
// If there was no previous newline, lastNewlinePos = 0
// Start strrpos check from the character before the current character,
// in case the current character is a newline.
$lastNewlinePos = \strrpos($this->fileContents, "\n", -$length + $offset - 1);
return 1 + $offset - ($lastNewlinePos === false ? 0 : $lastNewlinePos + 1);
}
}

Просмотреть файл

@ -0,0 +1,54 @@
<?php
use Microsoft\PhpParser\FilePositionMap;
use Microsoft\PhpParser\LineCharacterPosition;
use Microsoft\PhpParser\Node;
use Microsoft\PhpParser\Token;
use PHPUnit\Framework\TestCase;
class FilePositionMapTest extends TestCase {
public function expectLineCharacterPositionEquals(int $line, int $character, LineCharacterPosition $position) {
$this->assertSame($line, $position->line, "Expected same line");
$this->assertSame($character, $position->character, "Expected same character");
}
/**
* The map keeps the current offset and line -
* Move the requests forward, backwards, and to the same position as the previous request to verify that this is done properly.
*/
public function testLineCharacterPosition() {
$map = new FilePositionMap("foo\n\nbar\n");
$this->expectLineCharacterPositionEquals(1, 1, $map->getLineCharacterPositionForOffset(0));
$this->expectLineCharacterPositionEquals(1, 1, $map->getLineCharacterPositionForOffset(-1));
$this->expectLineCharacterPositionEquals(1, 3, $map->getLineCharacterPositionForOffset(2));
$this->expectLineCharacterPositionEquals(1, 4, $map->getLineCharacterPositionForOffset(3));
$this->expectLineCharacterPositionEquals(2, 1, $map->getLineCharacterPositionForOffset(4));
$this->expectLineCharacterPositionEquals(1, 4, $map->getLineCharacterPositionForOffset(3));
$this->expectLineCharacterPositionEquals(3, 1, $map->getLineCharacterPositionForOffset(5));
$this->expectLineCharacterPositionEquals(3, 4, $map->getLineCharacterPositionForOffset(8));
$this->expectLineCharacterPositionEquals(3, 4, $map->getLineCharacterPositionForOffset(8));
$this->expectLineCharacterPositionEquals(4, 1, $map->getLineCharacterPositionForOffset(9));
$this->expectLineCharacterPositionEquals(4, 1, $map->getLineCharacterPositionForOffset(12));
$this->expectLineCharacterPositionEquals(1, 4, $map->getLineCharacterPositionForOffset(3));
}
/**
* The map keeps the current offset and line -
* Move the requests forward, backwards, and to the same position as the previous request to verify that this is done properly.
*/
public function testLineNumber() {
$map = new FilePositionMap("foo\n\nbar\n");
$this->assertSame(1, $map->getLineNumberForOffset(0));
$this->assertSame(1, $map->getLineNumberForOffset(-1));
$this->assertSame(1, $map->getLineNumberForOffset(2));
$this->assertSame(1, $map->getLineNumberForOffset(3));
$this->assertSame(2, $map->getLineNumberForOffset(4));
$this->assertSame(3, $map->getLineNumberForOffset(5));
$this->assertSame(3, $map->getLineNumberForOffset(8));
$this->assertSame(3, $map->getLineNumberForOffset(8));
$this->assertSame(4, $map->getLineNumberForOffset(9));
$this->assertSame(4, $map->getLineNumberForOffset(12));
$this->assertSame(1, $map->getLineNumberForOffset(3));
$this->assertSame(2, $map->getLineNumberForOffset(4));
}
}