зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1551916
- Optimize column-number computations for offsets more than |ColumnChunkLength = 128| code units into a line by saving column information at 128-unit increments (rounded down to the nearest code point start) so that at most (length of... r=arai
...longest code point encoding - 1) + ColumnChunkLength - 1 units must be observed when computing a column number. r=arai Depends on D31301 Differential Revision: https://phabricator.services.mozilla.com/D31302 --HG-- extra : moz-landing-system : lando
This commit is contained in:
Родитель
069de399d8
Коммит
d18c7116eb
|
@ -495,6 +495,9 @@ TokenStreamAnyChars::TokenStreamAnyChars(JSContext* cx,
|
|||
const ReadOnlyCompileOptions& options,
|
||||
StrictModeGetter* smg)
|
||||
: srcCoords(cx, options.lineno, options.scriptSourceOffset),
|
||||
#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
|
||||
longLineColumnInfo_(cx),
|
||||
#endif // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
|
||||
options_(options),
|
||||
tokens(),
|
||||
cursor_(0),
|
||||
|
@ -689,15 +692,210 @@ inline void SourceUnits<Utf8Unit>::assertNextCodePoint(
|
|||
|
||||
#endif // DEBUG
|
||||
|
||||
template <typename Unit>
|
||||
static size_t ComputeColumn(const Unit* begin, const Unit* end) {
|
||||
#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
|
||||
return unicode::CountCodePoints(begin, end);
|
||||
#else
|
||||
return PointerRangeSize(begin, end);
|
||||
#endif // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
|
||||
|
||||
static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary(
|
||||
const Utf8Unit** ptr, const Utf8Unit* limit) {
|
||||
MOZ_ASSERT(*ptr <= limit);
|
||||
|
||||
// |limit| is a code point boundary.
|
||||
if (MOZ_UNLIKELY(*ptr == limit)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Otherwise rewind past trailing units to the start of the code point.
|
||||
# ifdef DEBUG
|
||||
size_t retracted = 0;
|
||||
# endif
|
||||
while (MOZ_UNLIKELY(IsTrailingUnit((*ptr)[0]))) {
|
||||
--*ptr;
|
||||
# ifdef DEBUG
|
||||
retracted++;
|
||||
# endif
|
||||
}
|
||||
|
||||
MOZ_ASSERT(retracted < 4,
|
||||
"the longest UTF-8 code point is four units, so this should never "
|
||||
"retract more than three units");
|
||||
}
|
||||
|
||||
static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary(
|
||||
const char16_t** ptr, const char16_t* limit) {
|
||||
MOZ_ASSERT(*ptr <= limit);
|
||||
|
||||
// |limit| is a code point boundary.
|
||||
if (MOZ_UNLIKELY(*ptr == limit)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Otherwise the pointer must be retracted by one iff it splits a two-unit
|
||||
// code point.
|
||||
if (MOZ_UNLIKELY(unicode::IsTrailSurrogate((*ptr)[0]))) {
|
||||
// Outside test suites testing garbage WTF-16, it's basically guaranteed
|
||||
// here that |(*ptr)[-1] (*ptr)[0]| is a surrogate pair.
|
||||
if (MOZ_LIKELY(unicode::IsLeadSurrogate((*ptr)[-1]))) {
|
||||
--*ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Unit>
|
||||
uint32_t TokenStreamAnyChars::computePartialColumn(
|
||||
const LineToken lineToken, const uint32_t offset,
|
||||
const SourceUnits<Unit>& sourceUnits) const {
|
||||
lineToken.assertConsistentOffset(offset);
|
||||
|
||||
const uint32_t line = lineNumber(lineToken);
|
||||
const uint32_t start = srcCoords.lineStart(lineToken);
|
||||
|
||||
// Reset the previous offset/column cache for this line, if the previous
|
||||
// lookup wasn't on this line.
|
||||
if (line != lineOfLastColumnComputation_) {
|
||||
lineOfLastColumnComputation_ = line;
|
||||
lastChunkVectorForLine_ = nullptr;
|
||||
lastOffsetOfComputedColumn_ = start;
|
||||
lastComputedColumn_ = 0;
|
||||
}
|
||||
|
||||
// Compute and return the final column number from a partial offset/column,
|
||||
// using the last-cached offset/column if they're more optimal.
|
||||
auto ColumnFromPartial = [this, offset, &sourceUnits](uint32_t partialOffset,
|
||||
uint32_t partialCols) {
|
||||
MOZ_ASSERT(partialOffset <= offset);
|
||||
|
||||
// If the last lookup on this line was closer to |offset|, use it.
|
||||
if (partialOffset < this->lastOffsetOfComputedColumn_ &&
|
||||
this->lastOffsetOfComputedColumn_ <= offset) {
|
||||
partialOffset = this->lastOffsetOfComputedColumn_;
|
||||
partialCols = this->lastComputedColumn_;
|
||||
}
|
||||
|
||||
const Unit* begin = sourceUnits.codeUnitPtrAt(partialOffset);
|
||||
const Unit* end = sourceUnits.codeUnitPtrAt(offset);
|
||||
|
||||
partialOffset += PointerRangeSize(begin, end);
|
||||
partialCols += AssertedCast<uint32_t>(unicode::CountCodePoints(begin, end));
|
||||
|
||||
this->lastOffsetOfComputedColumn_ = partialOffset;
|
||||
this->lastComputedColumn_ = partialCols;
|
||||
return partialCols;
|
||||
};
|
||||
|
||||
const uint32_t offsetInLine = offset - start;
|
||||
|
||||
// The index within a relevant |Vector<uint32_t>| of the nearest chunk
|
||||
// info...if it's been computed at all.
|
||||
const uint32_t chunkIndex = offsetInLine / ColumnChunkLength;
|
||||
|
||||
// Compute the column from the start of the line if chunk information would
|
||||
// direct us to the start of the line -- including if the line's too short to
|
||||
// be chunked.
|
||||
if (chunkIndex == 0) {
|
||||
return ColumnFromPartial(start, 0);
|
||||
}
|
||||
|
||||
// If this line has no chunk vector yet, insert one in the hash map. (The
|
||||
// required index is allocated and filled further down.)
|
||||
if (!lastChunkVectorForLine_) {
|
||||
auto ptr = longLineColumnInfo_.lookupForAdd(line);
|
||||
if (!ptr) {
|
||||
// This could rehash and invalidate a cached vector pointer, but the outer
|
||||
// condition means we don't have a cached pointer.
|
||||
if (!longLineColumnInfo_.add(ptr, line, Vector<uint32_t>(cx))) {
|
||||
// In case of OOM, just count columns from the start of the line.
|
||||
cx->recoverFromOutOfMemory();
|
||||
return ColumnFromPartial(start, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Note that adding elements to this vector won't invalidate this pointer.
|
||||
lastChunkVectorForLine_ = &ptr->value();
|
||||
}
|
||||
|
||||
const Unit* const limit = sourceUnits.codeUnitPtrAt(offset);
|
||||
|
||||
auto RetractedOffsetOfChunk = [
|
||||
# ifdef DEBUG
|
||||
this,
|
||||
# endif
|
||||
start, limit,
|
||||
&sourceUnits](uint32_t index) {
|
||||
MOZ_ASSERT(index < this->lastChunkVectorForLine_->length());
|
||||
|
||||
uint32_t naiveOffset = start + index * ColumnChunkLength;
|
||||
const Unit* naivePtr = sourceUnits.codeUnitPtrAt(naiveOffset);
|
||||
|
||||
const Unit* actualPtr = naivePtr;
|
||||
RetractPointerToCodePointBoundary(&actualPtr, limit);
|
||||
|
||||
return naiveOffset - PointerRangeSize(actualPtr, naivePtr);
|
||||
};
|
||||
|
||||
uint32_t partialOffset;
|
||||
uint32_t partialColumn;
|
||||
|
||||
auto entriesLen = AssertedCast<uint32_t>(lastChunkVectorForLine_->length());
|
||||
if (entriesLen <= chunkIndex) {
|
||||
// Extend the vector from its last entry or the start of the line. (This is
|
||||
// also a suitable partial start point if we must recover from OOM.)
|
||||
if (entriesLen > 0) {
|
||||
partialOffset = RetractedOffsetOfChunk(entriesLen - 1);
|
||||
partialColumn = (*lastChunkVectorForLine_)[entriesLen - 1];
|
||||
} else {
|
||||
partialOffset = start;
|
||||
partialColumn = 0;
|
||||
}
|
||||
|
||||
if (!lastChunkVectorForLine_->reserve(chunkIndex + 1)) {
|
||||
// As earlier, just start from the greatest offset/column in case of OOM.
|
||||
cx->recoverFromOutOfMemory();
|
||||
return ColumnFromPartial(partialOffset, partialColumn);
|
||||
}
|
||||
|
||||
// OOM is no longer possible now. \o/
|
||||
|
||||
// The vector always begins with the column of the line start, i.e. zero.
|
||||
if (entriesLen == 0) {
|
||||
lastChunkVectorForLine_->infallibleAppend(0);
|
||||
entriesLen++;
|
||||
}
|
||||
|
||||
do {
|
||||
const Unit* const begin = sourceUnits.codeUnitPtrAt(partialOffset);
|
||||
const Unit* chunkLimit = sourceUnits.codeUnitPtrAt(
|
||||
start + std::min(entriesLen * ColumnChunkLength, offsetInLine));
|
||||
|
||||
MOZ_ASSERT(begin < chunkLimit);
|
||||
MOZ_ASSERT(chunkLimit <= limit);
|
||||
|
||||
static_assert(ColumnChunkLength > SourceUnitTraits<Unit>::maxUnitsLength,
|
||||
"chunk length in code units must be able to contain the "
|
||||
"largest encoding of a code point, for retracting below to "
|
||||
"never underflow");
|
||||
|
||||
// Prior tokenizing ensured that [begin, limit) is validly encoded, and
|
||||
// |begin < chunkLimit|, so any retraction here can't underflow.
|
||||
RetractPointerToCodePointBoundary(&chunkLimit, limit);
|
||||
|
||||
MOZ_ASSERT(begin < chunkLimit);
|
||||
MOZ_ASSERT(chunkLimit <= limit);
|
||||
|
||||
partialOffset += PointerRangeSize(begin, chunkLimit);
|
||||
partialColumn += unicode::CountCodePoints(begin, chunkLimit);
|
||||
|
||||
lastChunkVectorForLine_->infallibleAppend(partialColumn);
|
||||
entriesLen++;
|
||||
} while (entriesLen < chunkIndex + 1);
|
||||
} else {
|
||||
partialOffset = RetractedOffsetOfChunk(chunkIndex);
|
||||
partialColumn = (*lastChunkVectorForLine_)[chunkIndex];
|
||||
}
|
||||
|
||||
return ColumnFromPartial(partialOffset, partialColumn);
|
||||
}
|
||||
|
||||
#endif // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
|
||||
|
||||
template <typename Unit, class AnyCharsAccess>
|
||||
uint32_t GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeColumn(
|
||||
LineToken lineToken, uint32_t offset) const {
|
||||
|
@ -705,26 +903,14 @@ uint32_t GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeColumn(
|
|||
|
||||
const TokenStreamAnyChars& anyChars = anyCharsAccess();
|
||||
|
||||
uint32_t lineNumber = anyChars.srcCoords.lineNumber(lineToken);
|
||||
uint32_t partialCols =
|
||||
#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
|
||||
anyChars.computePartialColumn(lineToken, offset, this->sourceUnits)
|
||||
#else
|
||||
offset - anyChars.lineStart(lineToken)
|
||||
#endif // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
|
||||
;
|
||||
|
||||
uint32_t beginOffset;
|
||||
uint32_t partialCols;
|
||||
if (lineNumber == lastLineForColumn_ && lastOffsetForColumn_ <= offset) {
|
||||
beginOffset = lastOffsetForColumn_;
|
||||
partialCols = lastColumn_;
|
||||
} else {
|
||||
beginOffset = anyChars.lineStart(lineToken);
|
||||
partialCols = 0;
|
||||
}
|
||||
|
||||
const Unit* begin = this->sourceUnits.codeUnitPtrAt(beginOffset);
|
||||
const Unit* end = this->sourceUnits.codeUnitPtrAt(offset);
|
||||
|
||||
partialCols += AssertedCast<uint32_t>(ComputeColumn(begin, end));
|
||||
|
||||
lastLineForColumn_ = lineNumber;
|
||||
lastOffsetForColumn_ = offset;
|
||||
lastColumn_ = partialCols;
|
||||
return (lineToken.isFirstLine() ? anyChars.options_.column : 0) + partialCols;
|
||||
}
|
||||
|
||||
|
|
|
@ -207,6 +207,7 @@
|
|||
#include "frontend/Token.h"
|
||||
#include "frontend/TokenKind.h"
|
||||
#include "js/CompileOptions.h"
|
||||
#include "js/HashTable.h" // js::HashMap
|
||||
#include "js/RegExpFlags.h" // JS::RegExpFlags
|
||||
#include "js/UniquePtr.h"
|
||||
#include "js/Vector.h"
|
||||
|
@ -318,6 +319,9 @@ class MOZ_STACK_CLASS TokenStreamPosition final {
|
|||
Token lookaheadTokens[TokenStreamShared::maxLookahead];
|
||||
} JS_HAZ_ROOTED;
|
||||
|
||||
template <typename Unit>
|
||||
class SourceUnits;
|
||||
|
||||
// Column numbers *ought* be in terms of counts of code points, but in the past
|
||||
// we counted code units. Set this to 0 to keep returning counts of code units
|
||||
// (even for UTF-8, which is clearly wrong, but we don't ship UTF-8 yet so this
|
||||
|
@ -601,6 +605,8 @@ class TokenStreamAnyChars : public TokenStreamShared {
|
|||
|
||||
/** Return the offset of the start of the line for |lineToken|. */
|
||||
uint32_t lineStart(LineToken lineToken) const {
|
||||
MOZ_ASSERT(lineToken.index + 1 < lineStartOffsets_.length(),
|
||||
"recorded line-start information must be available");
|
||||
return lineStartOffsets_[lineToken.index];
|
||||
}
|
||||
};
|
||||
|
@ -639,6 +645,64 @@ class TokenStreamAnyChars : public TokenStreamShared {
|
|||
MOZ_ALWAYS_INLINE void updateFlagsForEOL() { flags.isDirtyLine = false; }
|
||||
|
||||
private:
|
||||
#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
|
||||
/**
|
||||
* Compute the "partial" column number in Unicode code points of the absolute
|
||||
* |offset| within source text on the line of |lineToken| (which must have
|
||||
* been computed from |offset|).
|
||||
*
|
||||
* A partial column number on a line that isn't the first line is just the
|
||||
* actual column number. But a partial column number on the first line is the
|
||||
* column number *ignoring the initial line/column of the script*. For
|
||||
* example, consider this HTML with line/column number keys:
|
||||
*
|
||||
* 1 2 3
|
||||
* 0123456789012345678901234 567890
|
||||
* ------------------------------------
|
||||
* 1 | <html>
|
||||
* 2 | <head>
|
||||
* 3 | <script>var x = 3; x < 4;
|
||||
* 4 | const y = 7;</script>
|
||||
* 5 | </head>
|
||||
* 6 | <body></body>
|
||||
* 7 | </html>
|
||||
*
|
||||
* The script would be compiled specifying initial (line, column) of (3, 10)
|
||||
* using |JS::ReadOnlyCompileOptions::{lineno,column}|. And the column
|
||||
* reported by |computeColumn| for the "v" of |var| would be 10. But the
|
||||
* partial column number of the "v" in |var|, that this function returns,
|
||||
* would be 0. On the other hand, the column reported by |computeColumn| and
|
||||
* the partial column number returned by this function for the "c" in |const|
|
||||
* would both be 0, because it's not in the first line of source text.
|
||||
*
|
||||
* The partial column is with respect *only* to the JavaScript source text as
|
||||
* SpiderMonkey sees it. In the example, the "<" is converted to "<" by
|
||||
* the browser before SpiderMonkey would see it. So the partial column of the
|
||||
* "4" in the inequality would be 16, not 19.
|
||||
*
|
||||
* Code points are not all equal length, so counting requires *some* kind of
|
||||
* linear-time counting from the start of the line. This function attempts
|
||||
* various tricks to reduce this cost. If these optimizations succeed,
|
||||
* repeated calls to this function on a line will pay a one-time cost linear
|
||||
* in the length of the line, then each call pays a separate constant-time
|
||||
* cost. If the optimizations do not succeed, this function works in time
|
||||
* linear in the length of the line.
|
||||
*
|
||||
* It's unusual for a function in *this* class to be |Unit|-templated, but
|
||||
* while this operation manages |Unit|-agnostic fields in this class and in
|
||||
* |srcCoords|, it must *perform* |Unit|-sensitive computations to fill them.
|
||||
* And this is the best place to do that.
|
||||
*/
|
||||
template <typename Unit>
|
||||
uint32_t computePartialColumn(const LineToken lineToken,
|
||||
const uint32_t offset,
|
||||
const SourceUnits<Unit>& sourceUnits) const;
|
||||
#endif // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
|
||||
|
||||
/**
|
||||
* Update line/column information for the start of a new line at
|
||||
* |lineStartOffset|.
|
||||
*/
|
||||
MOZ_MUST_USE MOZ_ALWAYS_INLINE bool internalUpdateLineInfoForEOL(
|
||||
uint32_t lineStartOffset);
|
||||
|
||||
|
@ -686,6 +750,22 @@ class TokenStreamAnyChars : public TokenStreamShared {
|
|||
|
||||
const char* getFilename() const { return filename_; }
|
||||
|
||||
private:
|
||||
#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
|
||||
static constexpr uint32_t ColumnChunkLength = 128;
|
||||
|
||||
/**
|
||||
* Line number (of lines at least |ColumnChunkLength| code units long) to
|
||||
* a sequence of the column numbers at |ColumnChunkLength| boundaries rewound
|
||||
* (if needed) to the nearest code point boundary.
|
||||
*
|
||||
* Entries appear in this map only when a column computation of sufficient
|
||||
* distance is performed on a line, and the vectors are lazily filled as
|
||||
* greater offsets within lines require column computations.
|
||||
*/
|
||||
mutable HashMap<uint32_t, Vector<uint32_t>> longLineColumnInfo_;
|
||||
#endif // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
|
||||
|
||||
protected:
|
||||
// Options used for parsing/tokenizing.
|
||||
const JS::ReadOnlyCompileOptions& options_;
|
||||
|
@ -730,6 +810,29 @@ class TokenStreamAnyChars : public TokenStreamShared {
|
|||
JSContext* const cx;
|
||||
bool mutedErrors;
|
||||
StrictModeGetter* strictModeGetter; // used to test for strict mode
|
||||
|
||||
#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
|
||||
// Computing accurate column numbers requires at *some* point linearly
|
||||
// iterating through prior source units in the line to properly account for
|
||||
// multi-unit code points. This is quadratic if counting happens repeatedly.
|
||||
//
|
||||
// But usually we need columns for advancing offsets through scripts. By
|
||||
// caching the last ((line number, offset) => relative column) mapping (in
|
||||
// similar manner to how |SourceUnits::lastIndex_| is used to cache
|
||||
// (offset => line number) mappings) we can usually avoid re-iterating through
|
||||
// the common line prefix.
|
||||
//
|
||||
// Additionally, we avoid hash table lookup costs by caching the
|
||||
// |Vector<uint32_t>*| for the line of the last lookup. (|nullptr| means we
|
||||
// have to look it up -- or it hasn't been created yet.) This pointer is
|
||||
// invalidated when a lookup on a new line occurs, but as it's not a pointer
|
||||
// at literal element data, it's *not* invalidated when new entries are added
|
||||
// to such a vector.
|
||||
mutable uint32_t lineOfLastColumnComputation_ = UINT32_MAX;
|
||||
mutable Vector<uint32_t>* lastChunkVectorForLine_ = nullptr;
|
||||
mutable uint32_t lastOffsetOfComputedColumn_ = UINT32_MAX;
|
||||
mutable uint32_t lastComputedColumn_ = 0;
|
||||
#endif // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
|
||||
};
|
||||
|
||||
constexpr char16_t CodeUnitValue(char16_t unit) { return unit; }
|
||||
|
@ -1677,22 +1780,14 @@ class GeneralTokenStreamChars : public SpecializedTokenStreamCharsBase<Unit> {
|
|||
return static_cast<TokenStreamSpecific*>(this);
|
||||
}
|
||||
|
||||
private:
|
||||
// Computing accurate column numbers requires linearly iterating through all
|
||||
// source units in the line to account for multi-unit code points; on long
|
||||
// lines requiring many column computations, this becomes quadratic.
|
||||
//
|
||||
// However, because usually we need columns for advancing offsets through
|
||||
// scripts, caching the last ((line number, offset) => relative column)
|
||||
// mapping -- in similar manner to how |SourceUnits::lastIndex_| is used to
|
||||
// cache (offset => line number) mappings -- lets us avoid re-iterating
|
||||
// through the line prefix in most cases.
|
||||
|
||||
mutable uint32_t lastLineForColumn_ = UINT32_MAX;
|
||||
mutable uint32_t lastOffsetForColumn_ = UINT32_MAX;
|
||||
mutable uint32_t lastColumn_ = 0;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Compute the column number in Unicode code points of the absolute |offset|
|
||||
* within source text on the line corresponding to |lineToken|.
|
||||
*
|
||||
* |offset| must be a code point boundary, preceded only by validly-encoded
|
||||
* source units. (It doesn't have to be *followed* by valid source units.)
|
||||
*/
|
||||
uint32_t computeColumn(LineToken lineToken, uint32_t offset) const;
|
||||
void computeLineAndColumn(uint32_t offset, uint32_t* line,
|
||||
uint32_t* column) const;
|
||||
|
|
|
@ -0,0 +1,403 @@
|
|||
// |reftest| skip-if(!this.hasOwnProperty('Reflect')||!Reflect.parse) -- uses Reflect.parse(..., { loc: true}) to trigger the column-computing API
|
||||
/*
|
||||
* Any copyright is dedicated to the Public Domain.
|
||||
* http://creativecommons.org/licenses/publicdomain/
|
||||
*/
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
var BUGNUMBER = 1551916;
|
||||
var summary =
|
||||
"Optimize computing a column number as count of code points by caching " +
|
||||
"column numbers (and whether each chunk might contain anything multi-unit) " +
|
||||
"and counting forward from them";
|
||||
|
||||
print(BUGNUMBER + ": " + summary);
|
||||
|
||||
/**************
|
||||
* BEGIN TEST *
|
||||
**************/
|
||||
|
||||
// Various testing of column-number computations, with respect to counting as
|
||||
// code points or units, for very long lines.
|
||||
//
|
||||
// This test should be valid regardless whether
|
||||
// |JS_COLUMN_DIMENSION_IS_CODE_POINTS()| is 0 or 1. It also *should* pass no
|
||||
// matter what valid value |TokenStreamAnyChars::ColumnChunkLength| takes (it
|
||||
// must be at least 4 so that the maximum-length code point in UTF-8/16 will fit
|
||||
// in a single chunk), because the value of that constant should be externally
|
||||
// invisible save for perf effects. (As a result, recompiling and running this
|
||||
// test with a variety of different values assigned to that constant is a good
|
||||
// smoke-test of column number computation, that doesn't require having to
|
||||
// closely inspect any column-related code.)
|
||||
//
|
||||
// However, this test is structured on the assumption that that constant has the
|
||||
// value 128, in order to exercise in targeted fashion various column number
|
||||
// computation edge cases.
|
||||
//
|
||||
// All this testing *could* be written to not be done with |Reflect.parse| --
|
||||
// backwards column computations happen even when compiling normal code, in some
|
||||
// cases. But it's much more the exception than the rule. And |Reflect.parse|
|
||||
// has *very* predictable column-computation operations (basically start/end
|
||||
// coordinates are computed immediately when the end of an AST node is reached)
|
||||
// that make it easier to recognize what the exact pattern of computations for
|
||||
// which offsets will look like.
|
||||
|
||||
// Helper function for checking node location tersely.
|
||||
function checkLoc(node, expectedStart, expectedEnd)
|
||||
{
|
||||
let start = node.loc.start;
|
||||
|
||||
assertEq(start.line, expectedStart[0],
|
||||
"start line number must be as expected");
|
||||
assertEq(start.column, expectedStart[1],
|
||||
"start column number must be as expected");
|
||||
|
||||
let end = node.loc.end;
|
||||
|
||||
assertEq(end.line, expectedEnd[0], "end line number must be as expected");
|
||||
assertEq(end.column, expectedEnd[1],
|
||||
"end column number must be as expected");
|
||||
}
|
||||
|
||||
function lengthInCodePoints(str)
|
||||
{
|
||||
return [...str].length;
|
||||
}
|
||||
|
||||
// True if column numbers are counts of code points, false otherwise. This
|
||||
// constant can be used to short-circuit testing that isn't point/unit-agnostic.
|
||||
const columnsAreCodePoints = (function()
|
||||
{
|
||||
var columnTypes = [];
|
||||
|
||||
function checkColumn(actual, expectedPoints, expectedUnits)
|
||||
{
|
||||
if (actual === expectedPoints)
|
||||
columnTypes.push("p");
|
||||
else if (actual === expectedUnits)
|
||||
columnTypes.push("u");
|
||||
else
|
||||
columnTypes.push("x");
|
||||
}
|
||||
|
||||
var script = Reflect.parse('"😱😱😱😱";', { loc: true });
|
||||
assertEq(script.type, "Program");
|
||||
assertEq(script.loc.start.line, 1);
|
||||
assertEq(script.loc.end.line, 1);
|
||||
assertEq(script.loc.start.column, 0);
|
||||
checkColumn(script.loc.end.column, 7, 11);
|
||||
|
||||
var body = script.body;
|
||||
assertEq(body.length, 1);
|
||||
|
||||
var stmt = body[0];
|
||||
assertEq(stmt.type, "ExpressionStatement");
|
||||
assertEq(stmt.loc.start.line, 1);
|
||||
assertEq(stmt.loc.end.line, 1);
|
||||
assertEq(stmt.loc.start.column, 0);
|
||||
checkColumn(stmt.loc.end.column, 7, 11);
|
||||
|
||||
var expr = stmt.expression;
|
||||
assertEq(expr.type, "Literal");
|
||||
assertEq(expr.value, "😱😱😱😱");
|
||||
assertEq(expr.loc.start.line, 1);
|
||||
assertEq(expr.loc.end.line, 1);
|
||||
assertEq(expr.loc.start.column, 0);
|
||||
checkColumn(expr.loc.end.column, 6, 10);
|
||||
|
||||
var checkResult = columnTypes.join(",");
|
||||
|
||||
assertEq(checkResult === "p,p,p" || checkResult === "u,u,u", true,
|
||||
"columns must be wholly code points or units: " + checkResult);
|
||||
|
||||
return checkResult === "p,p,p";
|
||||
})();
|
||||
|
||||
// Start with some basic sanity-testing, without regard to exactly when, how, or
|
||||
// in what order (offset => column) computations are performed.
|
||||
function testSimple()
|
||||
{
|
||||
if (!columnsAreCodePoints)
|
||||
return;
|
||||
|
||||
// Array elements within the full |simpleCode| string constructed below are
|
||||
// one-element arrays containing the string "😱😱#x" where "#" is the
|
||||
// character that, in C++, could be written as |'(' + i| where |i| is the
|
||||
// index of the array within the outer array.
|
||||
let simpleCodeArray =
|
||||
[
|
||||
'var Q = [[', // column 0, offset 0
|
||||
// REPEAT
|
||||
'"😱😱(x"],["', // column 10, offset 10
|
||||
'😱😱)x"],["😱', // column 20, offset 22
|
||||
'😱*x"],["😱😱', // column 30, offset 35
|
||||
'+x"],["😱😱,', // column 40, offset 48
|
||||
'x"],["😱😱-x', // column 50, offset 60
|
||||
'"],["😱😱.x"', // column 60, offset 72
|
||||
'],["😱😱/x"]', // column 70, offset 84
|
||||
',["😱😱0x"],', // column 80, offset 96
|
||||
'["😱😱1x"],[', // column 90, offset 108
|
||||
// REPEAT
|
||||
'"😱😱2x"],["', // column 100, offset 120 -- chunk limit between "]
|
||||
'😱😱3x"],["😱', // column 110, offset 132
|
||||
'😱4x"],["😱😱', // column 120, offset 145
|
||||
'5x"],["😱😱6', // column 130, offset 158
|
||||
'x"],["😱😱7x', // column 140, offset 170
|
||||
'"],["😱😱8x"', // column 150, offset 182
|
||||
'],["😱😱9x"]', // column 160, offset 194
|
||||
',["😱😱:x"],', // column 170, offset 206
|
||||
'["😱😱;x"],[', // column 180, offset 218
|
||||
// REPEAT
|
||||
'"😱😱<x"],["', // column 190, offset 230
|
||||
'😱😱=x"],["😱', // column 200, offset 242
|
||||
'😱>x"],["😱😱', // column 210, offset 255 -- chunk limit splits first 😱
|
||||
'?x"],["😱😱@', // column 220, offset 268
|
||||
'x"],["😱😱Ax', // column 230, offset 280
|
||||
'"],["😱😱Bx"', // column 240, offset 292
|
||||
'],["😱😱Cx"]', // column 250, offset 304
|
||||
',["😱😱Dx"],', // column 260, offset 316
|
||||
'["😱😱Ex"],[', // column 270, offset 328
|
||||
// REPEAT
|
||||
'"😱😱Fx"],["', // column 280, offset 340
|
||||
'😱😱Gx"],["😱', // column 290, offset 352
|
||||
'😱Hx"],["😱😱', // column 300, offset 365
|
||||
'Ix"],["😱😱J', // column 310, offset 378 -- chunk limit between ["
|
||||
'x"],["😱😱Kx', // column 320, offset 390
|
||||
'"],["😱😱Lx"', // column 330, offset 402
|
||||
'],["😱😱Mx"]', // column 340, offset 414
|
||||
',["😱😱Nx"],', // column 350, offset 426
|
||||
'["😱😱Ox"]];', // column 360 (10 long), offset 438 (+12 to end)
|
||||
];
|
||||
let simpleCode = simpleCodeArray.join("");
|
||||
|
||||
// |simpleCode| overall contains this many code points. (This number is
|
||||
// chosen to be several |TokenStreamAnyChars::ColumnChunkLength = 128| chunks
|
||||
// long so that long-line handling is exercised, and the relevant vector
|
||||
// increased in length, for more than one chunk [which would be too short to
|
||||
// trigger chunking] and for more than two chunks [so that vector extension
|
||||
// will eventually occur].)
|
||||
const CodePointLength = 370;
|
||||
|
||||
assertEq(lengthInCodePoints(simpleCode), CodePointLength,
|
||||
"code point count should be correct");
|
||||
|
||||
// |simpleCodeArray| contains this many REPEAT-delimited cycles.
|
||||
const RepetitionNumber = 4;
|
||||
|
||||
// Each cycle consists of this many elements.
|
||||
const ElementsPerCycle = 9;
|
||||
|
||||
// Each element in a cycle has at least this many 😱.
|
||||
const MinFaceScreamingPerElementInCycle = 2;
|
||||
|
||||
// Each cycle consists of many elements with three 😱.
|
||||
const ElementsInCycleWithThreeFaceScreaming = 2;
|
||||
|
||||
// Compute the overall number of UTF-16 code units. (UTF-16 because this is a
|
||||
// JS string as input.)
|
||||
const OverallCodeUnitCount =
|
||||
CodePointLength +
|
||||
RepetitionNumber * (ElementsPerCycle * MinFaceScreamingPerElementInCycle +
|
||||
ElementsInCycleWithThreeFaceScreaming);
|
||||
|
||||
// Code units != code points.
|
||||
assertEq(OverallCodeUnitCount > CodePointLength, true,
|
||||
"string contains code points outside BMP, so length in units " +
|
||||
"exceeds length in points");
|
||||
|
||||
// The overall computed number of code units has this exact numeric value.
|
||||
assertEq(OverallCodeUnitCount, 450,
|
||||
"code unit count computation produces this value");
|
||||
|
||||
// The overall computed number of code units matches the string length.
|
||||
assertEq(simpleCode.length, OverallCodeUnitCount, "string length must match");
|
||||
|
||||
// Evaluate the string.
|
||||
var Q;
|
||||
eval(simpleCode);
|
||||
|
||||
// Verify characteristics of the resulting execution.
|
||||
assertEq(Array.isArray(Q), true);
|
||||
|
||||
const NumArrayElements = 40;
|
||||
assertEq(Q.length, NumArrayElements);
|
||||
Q.forEach((v, i) => {
|
||||
assertEq(Array.isArray(v), true);
|
||||
assertEq(v.length, 1);
|
||||
assertEq(v[0], "😱😱" + String.fromCharCode('('.charCodeAt(0) + i) + "x");
|
||||
});
|
||||
|
||||
let parseTree = Reflect.parse(simpleCode, { loc: true });
|
||||
|
||||
// Check the overall script.
|
||||
assertEq(parseTree.type, "Program");
|
||||
checkLoc(parseTree, [1, 0], [1, 370]);
|
||||
assertEq(parseTree.body.length, 1);
|
||||
|
||||
// Check the coordinates of the declaration.
|
||||
let varDecl = parseTree.body[0];
|
||||
assertEq(varDecl.type, "VariableDeclaration");
|
||||
checkLoc(varDecl, [1, 0], [1, 369]);
|
||||
|
||||
// ...and its initializing expression.
|
||||
let varInit = varDecl.declarations[0].init;
|
||||
assertEq(varInit.type, "ArrayExpression");
|
||||
checkLoc(varInit, [1, 8], [1, 369]);
|
||||
|
||||
// ...and then every literal inside it.
|
||||
assertEq(varInit.elements.length, NumArrayElements, "array literal length");
|
||||
|
||||
const ItemLength = lengthInCodePoints('["😱😱#x"],');
|
||||
assertEq(ItemLength, 9, "item length check");
|
||||
|
||||
for (let i = 0; i < NumArrayElements; i++)
|
||||
{
|
||||
let elem = varInit.elements[i];
|
||||
assertEq(elem.type, "ArrayExpression");
|
||||
|
||||
let startCol = 9 + i * ItemLength;
|
||||
let endCol = startCol + ItemLength - 1;
|
||||
checkLoc(elem, [1, startCol], [1, endCol]);
|
||||
|
||||
let arrayElems = elem.elements;
|
||||
assertEq(arrayElems.length, 1);
|
||||
|
||||
let str = arrayElems[0];
|
||||
assertEq(str.type, "Literal");
|
||||
assertEq(str.value,
|
||||
"😱😱" + String.fromCharCode('('.charCodeAt(0) + i) + "x");
|
||||
checkLoc(str, [1, startCol + 1], [1, endCol - 1]);
|
||||
}
|
||||
}
|
||||
testSimple();
|
||||
|
||||
// Test |ChunkInfo::unitsType() == UnitsType::GuaranteedSingleUnit| -- not that
|
||||
// it should be observable, precisely, but effects of mis-applying or
|
||||
// miscomputing it would in principle be observable if such were happening.
|
||||
// This test also is intended to to be useful for (manually, in a debugger)
|
||||
// verifying that the optimization is computed and kicks in correctly.
|
||||
function testGuaranteedSingleUnit()
|
||||
{
|
||||
if (!columnsAreCodePoints)
|
||||
return;
|
||||
|
||||
// Begin a few array literals in a first chunk to test column computation in
|
||||
// that first chunk.
|
||||
//
|
||||
// End some of them in the first chunk to test columns *before* we know we
|
||||
// have a long line.
|
||||
//
|
||||
// End one array *outside* the first chunk to test a computation inside a
|
||||
// first chunk *after* we know we have a long line and have computed a first
|
||||
// chunk.
|
||||
let mixedChunksCode = "var Z = [ [ [],"; // column 0, offset 0
|
||||
assertEq(mixedChunksCode.length, 15);
|
||||
assertEq(lengthInCodePoints(mixedChunksCode), 15);
|
||||
|
||||
mixedChunksCode +=
|
||||
" ".repeat(128 - mixedChunksCode.length); // column 15, offset 15
|
||||
assertEq(mixedChunksCode.length, 128);
|
||||
assertEq(lengthInCodePoints(mixedChunksCode), 128);
|
||||
|
||||
// Fill out a second chunk as also single-unit, with an outer array literal
|
||||
// that begins in this chunk but finishes in the next (to test column
|
||||
// computation in a prior, guaranteed-single-unit chunk).
|
||||
mixedChunksCode += "[" + "[],".repeat(42) + " "; // column 128, offset 128
|
||||
assertEq(mixedChunksCode.length, 256);
|
||||
assertEq(lengthInCodePoints(mixedChunksCode), 256);
|
||||
|
||||
// Add a third chunk with one last empty nested array literal (so that we
|
||||
// tack on another chunk, and conclude the second chunk is single-unit, before
|
||||
// closing the enclosing array literal). Then close the enclosing array
|
||||
// literal. Finally start a new string literal element containing
|
||||
// multi-unit code points. For good measure, make the chunk *end* in the
|
||||
// middle of such a code point, so that the relevant chunk limit must be
|
||||
// retracted one code unit.
|
||||
mixedChunksCode += "[] ], '" + "😱".repeat(61); // column 256, offset 256
|
||||
assertEq(mixedChunksCode.length, 384 + 1);
|
||||
assertEq(lengthInCodePoints(mixedChunksCode), 324);
|
||||
|
||||
// Wrap things up. Terminate the string, then terminate the nested array
|
||||
// literal to trigger a column computation within the first chunk that can
|
||||
// benefit from knowing the first chunk is all single-unit. Next add a *new*
|
||||
// element to the outermost array, a string literal that contains a line
|
||||
// terminator. The terminator invalidates the column computation cache, so
|
||||
// when the outermost array is closed, location info for it will not hit the
|
||||
// cache. Finally, tack on the terminating semicolon for good measure.
|
||||
mixedChunksCode += "' ], '\u2028' ];"; // column 324, offset 385
|
||||
assertEq(mixedChunksCode.length, 396);
|
||||
assertEq(lengthInCodePoints(mixedChunksCode), 335);
|
||||
|
||||
let parseTree = Reflect.parse(mixedChunksCode, { loc: true });
|
||||
|
||||
// Check the overall script.
|
||||
assertEq(parseTree.type, "Program");
|
||||
checkLoc(parseTree, [1, 0], [2, 4]);
|
||||
assertEq(parseTree.body.length, 1);
|
||||
|
||||
// Check the coordinates of the declaration.
|
||||
let varDecl = parseTree.body[0];
|
||||
assertEq(varDecl.type, "VariableDeclaration");
|
||||
checkLoc(varDecl, [1, 0], [2, 3]);
|
||||
|
||||
// ...and its initializing expression.
|
||||
let varInit = varDecl.declarations[0].init;
|
||||
assertEq(varInit.type, "ArrayExpression");
|
||||
checkLoc(varInit, [1, 8], [2, 3]);
|
||||
|
||||
let outerArrayElements = varInit.elements;
|
||||
assertEq(outerArrayElements.length, 2);
|
||||
|
||||
{
|
||||
// Next the first element, the array inside the initializing expression.
|
||||
let nestedArray = varInit.elements[0];
|
||||
assertEq(nestedArray.type, "ArrayExpression");
|
||||
checkLoc(nestedArray, [1, 10], [1, 327]);
|
||||
|
||||
// Now inside that nested array.
|
||||
let nestedArrayElements = nestedArray.elements;
|
||||
assertEq(nestedArrayElements.length, 3);
|
||||
|
||||
// First the [] in chunk #0
|
||||
let emptyArray = nestedArrayElements[0];
|
||||
assertEq(emptyArray.type, "ArrayExpression");
|
||||
assertEq(emptyArray.elements.length, 0);
|
||||
checkLoc(emptyArray, [1, 12], [1, 14]);
|
||||
|
||||
// Then the big array of empty arrays starting in chunk #1 and ending just
|
||||
// barely in chunk #2.
|
||||
let bigArrayOfEmpties = nestedArrayElements[1];
|
||||
assertEq(bigArrayOfEmpties.type, "ArrayExpression");
|
||||
assertEq(bigArrayOfEmpties.elements.length, 42 + 1);
|
||||
bigArrayOfEmpties.elements.forEach((elem, i) => {
|
||||
assertEq(elem.type, "ArrayExpression");
|
||||
assertEq(elem.elements.length, 0);
|
||||
if (i !== 42)
|
||||
checkLoc(elem, [1, 129 + i * 3], [1, 131 + i * 3]);
|
||||
else
|
||||
checkLoc(elem, [1, 256], [1, 258]); // last element was hand-placed
|
||||
});
|
||||
|
||||
// Then the string literal of multi-unit code points beginning in chunk #2
|
||||
// and ending just into chunk #3 on a second line.
|
||||
let multiUnitStringLiteral = nestedArrayElements[2];
|
||||
assertEq(multiUnitStringLiteral.type, "Literal");
|
||||
assertEq(multiUnitStringLiteral.value, "😱".repeat(61));
|
||||
checkLoc(multiUnitStringLiteral, [1, 262], [1, 325]);
|
||||
}
|
||||
|
||||
{
|
||||
// Finally, the string literal containing a line terminator as element in
|
||||
// the outermost array.
|
||||
let stringLiteralWithEmbeddedTerminator = outerArrayElements[1];
|
||||
assertEq(stringLiteralWithEmbeddedTerminator.type, "Literal");
|
||||
assertEq(stringLiteralWithEmbeddedTerminator.value, "\u2028");
|
||||
checkLoc(stringLiteralWithEmbeddedTerminator, [1, 329], [2, 1]);
|
||||
}
|
||||
}
|
||||
testGuaranteedSingleUnit();
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
||||
|
||||
print("Testing completed");
|
Загрузка…
Ссылка в новой задаче