Bug 1551916 - Optimize column-number computations for offsets more than |ColumnChunkLength = 128| code units into a line by saving column information at 128-unit increments (rounded down to the nearest code point start) so that at most (length of... r=arai

...longest code point encoding - 1) + ColumnChunkLength - 1 units must be observed when computing a column number.  r=arai

Depends on D31301

Differential Revision: https://phabricator.services.mozilla.com/D31302

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Jeff Walden 2019-05-17 03:21:04 +00:00
Родитель 069de399d8
Коммит d18c7116eb
3 изменённых файлов: 724 добавлений и 40 удалений

Просмотреть файл

@ -495,6 +495,9 @@ TokenStreamAnyChars::TokenStreamAnyChars(JSContext* cx,
const ReadOnlyCompileOptions& options,
StrictModeGetter* smg)
: srcCoords(cx, options.lineno, options.scriptSourceOffset),
#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
longLineColumnInfo_(cx),
#endif // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
options_(options),
tokens(),
cursor_(0),
@ -689,15 +692,210 @@ inline void SourceUnits<Utf8Unit>::assertNextCodePoint(
#endif // DEBUG
template <typename Unit>
static size_t ComputeColumn(const Unit* begin, const Unit* end) {
#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
return unicode::CountCodePoints(begin, end);
#else
return PointerRangeSize(begin, end);
#endif // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary(
const Utf8Unit** ptr, const Utf8Unit* limit) {
MOZ_ASSERT(*ptr <= limit);
// |limit| is a code point boundary.
if (MOZ_UNLIKELY(*ptr == limit)) {
return;
}
// Otherwise rewind past trailing units to the start of the code point.
# ifdef DEBUG
size_t retracted = 0;
# endif
while (MOZ_UNLIKELY(IsTrailingUnit((*ptr)[0]))) {
--*ptr;
# ifdef DEBUG
retracted++;
# endif
}
MOZ_ASSERT(retracted < 4,
"the longest UTF-8 code point is four units, so this should never "
"retract more than three units");
}
static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary(
const char16_t** ptr, const char16_t* limit) {
MOZ_ASSERT(*ptr <= limit);
// |limit| is a code point boundary.
if (MOZ_UNLIKELY(*ptr == limit)) {
return;
}
// Otherwise the pointer must be retracted by one iff it splits a two-unit
// code point.
if (MOZ_UNLIKELY(unicode::IsTrailSurrogate((*ptr)[0]))) {
// Outside test suites testing garbage WTF-16, it's basically guaranteed
// here that |(*ptr)[-1] (*ptr)[0]| is a surrogate pair.
if (MOZ_LIKELY(unicode::IsLeadSurrogate((*ptr)[-1]))) {
--*ptr;
}
}
}
template <typename Unit>
uint32_t TokenStreamAnyChars::computePartialColumn(
const LineToken lineToken, const uint32_t offset,
const SourceUnits<Unit>& sourceUnits) const {
lineToken.assertConsistentOffset(offset);
const uint32_t line = lineNumber(lineToken);
const uint32_t start = srcCoords.lineStart(lineToken);
// Reset the previous offset/column cache for this line, if the previous
// lookup wasn't on this line.
if (line != lineOfLastColumnComputation_) {
lineOfLastColumnComputation_ = line;
lastChunkVectorForLine_ = nullptr;
lastOffsetOfComputedColumn_ = start;
lastComputedColumn_ = 0;
}
// Compute and return the final column number from a partial offset/column,
// using the last-cached offset/column if they're more optimal.
auto ColumnFromPartial = [this, offset, &sourceUnits](uint32_t partialOffset,
uint32_t partialCols) {
MOZ_ASSERT(partialOffset <= offset);
// If the last lookup on this line was closer to |offset|, use it.
if (partialOffset < this->lastOffsetOfComputedColumn_ &&
this->lastOffsetOfComputedColumn_ <= offset) {
partialOffset = this->lastOffsetOfComputedColumn_;
partialCols = this->lastComputedColumn_;
}
const Unit* begin = sourceUnits.codeUnitPtrAt(partialOffset);
const Unit* end = sourceUnits.codeUnitPtrAt(offset);
partialOffset += PointerRangeSize(begin, end);
partialCols += AssertedCast<uint32_t>(unicode::CountCodePoints(begin, end));
this->lastOffsetOfComputedColumn_ = partialOffset;
this->lastComputedColumn_ = partialCols;
return partialCols;
};
const uint32_t offsetInLine = offset - start;
// The index within a relevant |Vector<uint32_t>| of the nearest chunk
// info...if it's been computed at all.
const uint32_t chunkIndex = offsetInLine / ColumnChunkLength;
// Compute the column from the start of the line if chunk information would
// direct us to the start of the line -- including if the line's too short to
// be chunked.
if (chunkIndex == 0) {
return ColumnFromPartial(start, 0);
}
// If this line has no chunk vector yet, insert one in the hash map. (The
// required index is allocated and filled further down.)
if (!lastChunkVectorForLine_) {
auto ptr = longLineColumnInfo_.lookupForAdd(line);
if (!ptr) {
// This could rehash and invalidate a cached vector pointer, but the outer
// condition means we don't have a cached pointer.
if (!longLineColumnInfo_.add(ptr, line, Vector<uint32_t>(cx))) {
// In case of OOM, just count columns from the start of the line.
cx->recoverFromOutOfMemory();
return ColumnFromPartial(start, 0);
}
}
// Note that adding elements to this vector won't invalidate this pointer.
lastChunkVectorForLine_ = &ptr->value();
}
const Unit* const limit = sourceUnits.codeUnitPtrAt(offset);
auto RetractedOffsetOfChunk = [
# ifdef DEBUG
this,
# endif
start, limit,
&sourceUnits](uint32_t index) {
MOZ_ASSERT(index < this->lastChunkVectorForLine_->length());
uint32_t naiveOffset = start + index * ColumnChunkLength;
const Unit* naivePtr = sourceUnits.codeUnitPtrAt(naiveOffset);
const Unit* actualPtr = naivePtr;
RetractPointerToCodePointBoundary(&actualPtr, limit);
return naiveOffset - PointerRangeSize(actualPtr, naivePtr);
};
uint32_t partialOffset;
uint32_t partialColumn;
auto entriesLen = AssertedCast<uint32_t>(lastChunkVectorForLine_->length());
if (entriesLen <= chunkIndex) {
// Extend the vector from its last entry or the start of the line. (This is
// also a suitable partial start point if we must recover from OOM.)
if (entriesLen > 0) {
partialOffset = RetractedOffsetOfChunk(entriesLen - 1);
partialColumn = (*lastChunkVectorForLine_)[entriesLen - 1];
} else {
partialOffset = start;
partialColumn = 0;
}
if (!lastChunkVectorForLine_->reserve(chunkIndex + 1)) {
// As earlier, just start from the greatest offset/column in case of OOM.
cx->recoverFromOutOfMemory();
return ColumnFromPartial(partialOffset, partialColumn);
}
// OOM is no longer possible now. \o/
// The vector always begins with the column of the line start, i.e. zero.
if (entriesLen == 0) {
lastChunkVectorForLine_->infallibleAppend(0);
entriesLen++;
}
do {
const Unit* const begin = sourceUnits.codeUnitPtrAt(partialOffset);
const Unit* chunkLimit = sourceUnits.codeUnitPtrAt(
start + std::min(entriesLen * ColumnChunkLength, offsetInLine));
MOZ_ASSERT(begin < chunkLimit);
MOZ_ASSERT(chunkLimit <= limit);
static_assert(ColumnChunkLength > SourceUnitTraits<Unit>::maxUnitsLength,
"chunk length in code units must be able to contain the "
"largest encoding of a code point, for retracting below to "
"never underflow");
// Prior tokenizing ensured that [begin, limit) is validly encoded, and
// |begin < chunkLimit|, so any retraction here can't underflow.
RetractPointerToCodePointBoundary(&chunkLimit, limit);
MOZ_ASSERT(begin < chunkLimit);
MOZ_ASSERT(chunkLimit <= limit);
partialOffset += PointerRangeSize(begin, chunkLimit);
partialColumn += unicode::CountCodePoints(begin, chunkLimit);
lastChunkVectorForLine_->infallibleAppend(partialColumn);
entriesLen++;
} while (entriesLen < chunkIndex + 1);
} else {
partialOffset = RetractedOffsetOfChunk(chunkIndex);
partialColumn = (*lastChunkVectorForLine_)[chunkIndex];
}
return ColumnFromPartial(partialOffset, partialColumn);
}
#endif // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
template <typename Unit, class AnyCharsAccess>
uint32_t GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeColumn(
LineToken lineToken, uint32_t offset) const {
@ -705,26 +903,14 @@ uint32_t GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeColumn(
const TokenStreamAnyChars& anyChars = anyCharsAccess();
uint32_t lineNumber = anyChars.srcCoords.lineNumber(lineToken);
uint32_t partialCols =
#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
anyChars.computePartialColumn(lineToken, offset, this->sourceUnits)
#else
offset - anyChars.lineStart(lineToken)
#endif // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
;
uint32_t beginOffset;
uint32_t partialCols;
if (lineNumber == lastLineForColumn_ && lastOffsetForColumn_ <= offset) {
beginOffset = lastOffsetForColumn_;
partialCols = lastColumn_;
} else {
beginOffset = anyChars.lineStart(lineToken);
partialCols = 0;
}
const Unit* begin = this->sourceUnits.codeUnitPtrAt(beginOffset);
const Unit* end = this->sourceUnits.codeUnitPtrAt(offset);
partialCols += AssertedCast<uint32_t>(ComputeColumn(begin, end));
lastLineForColumn_ = lineNumber;
lastOffsetForColumn_ = offset;
lastColumn_ = partialCols;
return (lineToken.isFirstLine() ? anyChars.options_.column : 0) + partialCols;
}

Просмотреть файл

@ -207,6 +207,7 @@
#include "frontend/Token.h"
#include "frontend/TokenKind.h"
#include "js/CompileOptions.h"
#include "js/HashTable.h" // js::HashMap
#include "js/RegExpFlags.h" // JS::RegExpFlags
#include "js/UniquePtr.h"
#include "js/Vector.h"
@ -318,6 +319,9 @@ class MOZ_STACK_CLASS TokenStreamPosition final {
Token lookaheadTokens[TokenStreamShared::maxLookahead];
} JS_HAZ_ROOTED;
template <typename Unit>
class SourceUnits;
// Column numbers *ought* be in terms of counts of code points, but in the past
// we counted code units. Set this to 0 to keep returning counts of code units
// (even for UTF-8, which is clearly wrong, but we don't ship UTF-8 yet so this
@ -601,6 +605,8 @@ class TokenStreamAnyChars : public TokenStreamShared {
/** Return the offset of the start of the line for |lineToken|. */
uint32_t lineStart(LineToken lineToken) const {
MOZ_ASSERT(lineToken.index + 1 < lineStartOffsets_.length(),
"recorded line-start information must be available");
return lineStartOffsets_[lineToken.index];
}
};
@ -639,6 +645,64 @@ class TokenStreamAnyChars : public TokenStreamShared {
MOZ_ALWAYS_INLINE void updateFlagsForEOL() { flags.isDirtyLine = false; }
private:
#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
/**
* Compute the "partial" column number in Unicode code points of the absolute
* |offset| within source text on the line of |lineToken| (which must have
* been computed from |offset|).
*
* A partial column number on a line that isn't the first line is just the
* actual column number. But a partial column number on the first line is the
* column number *ignoring the initial line/column of the script*. For
* example, consider this HTML with line/column number keys:
*
* 1 2 3
* 0123456789012345678901234 567890
* ------------------------------------
* 1 | <html>
* 2 | <head>
* 3 | <script>var x = 3; x &lt; 4;
* 4 | const y = 7;</script>
* 5 | </head>
* 6 | <body></body>
* 7 | </html>
*
* The script would be compiled specifying initial (line, column) of (3, 10)
* using |JS::ReadOnlyCompileOptions::{lineno,column}|. And the column
* reported by |computeColumn| for the "v" of |var| would be 10. But the
* partial column number of the "v" in |var|, that this function returns,
* would be 0. On the other hand, the column reported by |computeColumn| and
* the partial column number returned by this function for the "c" in |const|
* would both be 0, because it's not in the first line of source text.
*
* The partial column is with respect *only* to the JavaScript source text as
* SpiderMonkey sees it. In the example, the "&lt;" is converted to "<" by
* the browser before SpiderMonkey would see it. So the partial column of the
* "4" in the inequality would be 16, not 19.
*
* Code points are not all equal length, so counting requires *some* kind of
* linear-time counting from the start of the line. This function attempts
* various tricks to reduce this cost. If these optimizations succeed,
* repeated calls to this function on a line will pay a one-time cost linear
* in the length of the line, then each call pays a separate constant-time
* cost. If the optimizations do not succeed, this function works in time
* linear in the length of the line.
*
* It's unusual for a function in *this* class to be |Unit|-templated, but
* while this operation manages |Unit|-agnostic fields in this class and in
* |srcCoords|, it must *perform* |Unit|-sensitive computations to fill them.
* And this is the best place to do that.
*/
template <typename Unit>
uint32_t computePartialColumn(const LineToken lineToken,
const uint32_t offset,
const SourceUnits<Unit>& sourceUnits) const;
#endif // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
/**
* Update line/column information for the start of a new line at
* |lineStartOffset|.
*/
MOZ_MUST_USE MOZ_ALWAYS_INLINE bool internalUpdateLineInfoForEOL(
uint32_t lineStartOffset);
@ -686,6 +750,22 @@ class TokenStreamAnyChars : public TokenStreamShared {
const char* getFilename() const { return filename_; }
private:
#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
static constexpr uint32_t ColumnChunkLength = 128;
/**
* Line number (of lines at least |ColumnChunkLength| code units long) to
* a sequence of the column numbers at |ColumnChunkLength| boundaries rewound
* (if needed) to the nearest code point boundary.
*
* Entries appear in this map only when a column computation of sufficient
* distance is performed on a line, and the vectors are lazily filled as
* greater offsets within lines require column computations.
*/
mutable HashMap<uint32_t, Vector<uint32_t>> longLineColumnInfo_;
#endif // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
protected:
// Options used for parsing/tokenizing.
const JS::ReadOnlyCompileOptions& options_;
@ -730,6 +810,29 @@ class TokenStreamAnyChars : public TokenStreamShared {
JSContext* const cx;
bool mutedErrors;
StrictModeGetter* strictModeGetter; // used to test for strict mode
#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
// Computing accurate column numbers requires at *some* point linearly
// iterating through prior source units in the line to properly account for
// multi-unit code points. This is quadratic if counting happens repeatedly.
//
// But usually we need columns for advancing offsets through scripts. By
// caching the last ((line number, offset) => relative column) mapping (in
// similar manner to how |SourceUnits::lastIndex_| is used to cache
// (offset => line number) mappings) we can usually avoid re-iterating through
// the common line prefix.
//
// Additionally, we avoid hash table lookup costs by caching the
// |Vector<uint32_t>*| for the line of the last lookup. (|nullptr| means we
// have to look it up -- or it hasn't been created yet.) This pointer is
// invalidated when a lookup on a new line occurs, but as it's not a pointer
// at literal element data, it's *not* invalidated when new entries are added
// to such a vector.
mutable uint32_t lineOfLastColumnComputation_ = UINT32_MAX;
mutable Vector<uint32_t>* lastChunkVectorForLine_ = nullptr;
mutable uint32_t lastOffsetOfComputedColumn_ = UINT32_MAX;
mutable uint32_t lastComputedColumn_ = 0;
#endif // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
};
constexpr char16_t CodeUnitValue(char16_t unit) { return unit; }
@ -1677,22 +1780,14 @@ class GeneralTokenStreamChars : public SpecializedTokenStreamCharsBase<Unit> {
return static_cast<TokenStreamSpecific*>(this);
}
private:
// Computing accurate column numbers requires linearly iterating through all
// source units in the line to account for multi-unit code points; on long
// lines requiring many column computations, this becomes quadratic.
//
// However, because usually we need columns for advancing offsets through
// scripts, caching the last ((line number, offset) => relative column)
// mapping -- in similar manner to how |SourceUnits::lastIndex_| is used to
// cache (offset => line number) mappings -- lets us avoid re-iterating
// through the line prefix in most cases.
mutable uint32_t lastLineForColumn_ = UINT32_MAX;
mutable uint32_t lastOffsetForColumn_ = UINT32_MAX;
mutable uint32_t lastColumn_ = 0;
protected:
/**
* Compute the column number in Unicode code points of the absolute |offset|
* within source text on the line corresponding to |lineToken|.
*
* |offset| must be a code point boundary, preceded only by validly-encoded
* source units. (It doesn't have to be *followed* by valid source units.)
*/
uint32_t computeColumn(LineToken lineToken, uint32_t offset) const;
void computeLineAndColumn(uint32_t offset, uint32_t* line,
uint32_t* column) const;

Просмотреть файл

@ -0,0 +1,403 @@
// |reftest| skip-if(!this.hasOwnProperty('Reflect')||!Reflect.parse) -- uses Reflect.parse(..., { loc: true}) to trigger the column-computing API
/*
* Any copyright is dedicated to the Public Domain.
* http://creativecommons.org/licenses/publicdomain/
*/
//-----------------------------------------------------------------------------
var BUGNUMBER = 1551916;
var summary =
"Optimize computing a column number as count of code points by caching " +
"column numbers (and whether each chunk might contain anything multi-unit) " +
"and counting forward from them";
print(BUGNUMBER + ": " + summary);
/**************
* BEGIN TEST *
**************/
// Various testing of column-number computations, with respect to counting as
// code points or units, for very long lines.
//
// This test should be valid regardless whether
// |JS_COLUMN_DIMENSION_IS_CODE_POINTS()| is 0 or 1. It also *should* pass no
// matter what valid value |TokenStreamAnyChars::ColumnChunkLength| takes (it
// must be at least 4 so that the maximum-length code point in UTF-8/16 will fit
// in a single chunk), because the value of that constant should be externally
// invisible save for perf effects. (As a result, recompiling and running this
// test with a variety of different values assigned to that constant is a good
// smoke-test of column number computation, that doesn't require having to
// closely inspect any column-related code.)
//
// However, this test is structured on the assumption that that constant has the
// value 128, in order to exercise in targeted fashion various column number
// computation edge cases.
//
// All this testing *could* be written to not be done with |Reflect.parse| --
// backwards column computations happen even when compiling normal code, in some
// cases. But it's much more the exception than the rule. And |Reflect.parse|
// has *very* predictable column-computation operations (basically start/end
// coordinates are computed immediately when the end of an AST node is reached)
// that make it easier to recognize what the exact pattern of computations for
// which offsets will look like.
// Helper function for checking node location tersely.
function checkLoc(node, expectedStart, expectedEnd)
{
let start = node.loc.start;
assertEq(start.line, expectedStart[0],
"start line number must be as expected");
assertEq(start.column, expectedStart[1],
"start column number must be as expected");
let end = node.loc.end;
assertEq(end.line, expectedEnd[0], "end line number must be as expected");
assertEq(end.column, expectedEnd[1],
"end column number must be as expected");
}
function lengthInCodePoints(str)
{
return [...str].length;
}
// True if column numbers are counts of code points, false otherwise. This
// constant can be used to short-circuit testing that isn't point/unit-agnostic.
const columnsAreCodePoints = (function()
{
var columnTypes = [];
function checkColumn(actual, expectedPoints, expectedUnits)
{
if (actual === expectedPoints)
columnTypes.push("p");
else if (actual === expectedUnits)
columnTypes.push("u");
else
columnTypes.push("x");
}
var script = Reflect.parse('"😱😱😱😱";', { loc: true });
assertEq(script.type, "Program");
assertEq(script.loc.start.line, 1);
assertEq(script.loc.end.line, 1);
assertEq(script.loc.start.column, 0);
checkColumn(script.loc.end.column, 7, 11);
var body = script.body;
assertEq(body.length, 1);
var stmt = body[0];
assertEq(stmt.type, "ExpressionStatement");
assertEq(stmt.loc.start.line, 1);
assertEq(stmt.loc.end.line, 1);
assertEq(stmt.loc.start.column, 0);
checkColumn(stmt.loc.end.column, 7, 11);
var expr = stmt.expression;
assertEq(expr.type, "Literal");
assertEq(expr.value, "😱😱😱😱");
assertEq(expr.loc.start.line, 1);
assertEq(expr.loc.end.line, 1);
assertEq(expr.loc.start.column, 0);
checkColumn(expr.loc.end.column, 6, 10);
var checkResult = columnTypes.join(",");
assertEq(checkResult === "p,p,p" || checkResult === "u,u,u", true,
"columns must be wholly code points or units: " + checkResult);
return checkResult === "p,p,p";
})();
// Start with some basic sanity-testing, without regard to exactly when, how, or
// in what order (offset => column) computations are performed.
function testSimple()
{
if (!columnsAreCodePoints)
return;
// Array elements within the full |simpleCode| string constructed below are
// one-element arrays containing the string "😱😱#x" where "#" is the
// character that, in C++, could be written as |'(' + i| where |i| is the
// index of the array within the outer array.
let simpleCodeArray =
[
'var Q = [[', // column 0, offset 0
// REPEAT
'"😱😱(x"],["', // column 10, offset 10
'😱😱)x"],["😱', // column 20, offset 22
'😱*x"],["😱😱', // column 30, offset 35
'+x"],["😱😱,', // column 40, offset 48
'x"],["😱😱-x', // column 50, offset 60
'"],["😱😱.x"', // column 60, offset 72
'],["😱😱/x"]', // column 70, offset 84
',["😱😱0x"],', // column 80, offset 96
'["😱😱1x"],[', // column 90, offset 108
// REPEAT
'"😱😱2x"],["', // column 100, offset 120 -- chunk limit between "]
'😱😱3x"],["😱', // column 110, offset 132
'😱4x"],["😱😱', // column 120, offset 145
'5x"],["😱😱6', // column 130, offset 158
'x"],["😱😱7x', // column 140, offset 170
'"],["😱😱8x"', // column 150, offset 182
'],["😱😱9x"]', // column 160, offset 194
',["😱😱:x"],', // column 170, offset 206
'["😱😱;x"],[', // column 180, offset 218
// REPEAT
'"😱😱<x"],["', // column 190, offset 230
'😱😱=x"],["😱', // column 200, offset 242
'😱>x"],["😱😱', // column 210, offset 255 -- chunk limit splits first 😱
'?x"],["😱😱@', // column 220, offset 268
'x"],["😱😱Ax', // column 230, offset 280
'"],["😱😱Bx"', // column 240, offset 292
'],["😱😱Cx"]', // column 250, offset 304
',["😱😱Dx"],', // column 260, offset 316
'["😱😱Ex"],[', // column 270, offset 328
// REPEAT
'"😱😱Fx"],["', // column 280, offset 340
'😱😱Gx"],["😱', // column 290, offset 352
'😱Hx"],["😱😱', // column 300, offset 365
'Ix"],["😱😱J', // column 310, offset 378 -- chunk limit between ["
'x"],["😱😱Kx', // column 320, offset 390
'"],["😱😱Lx"', // column 330, offset 402
'],["😱😱Mx"]', // column 340, offset 414
',["😱😱Nx"],', // column 350, offset 426
'["😱😱Ox"]];', // column 360 (10 long), offset 438 (+12 to end)
];
let simpleCode = simpleCodeArray.join("");
// |simpleCode| overall contains this many code points. (This number is
// chosen to be several |TokenStreamAnyChars::ColumnChunkLength = 128| chunks
// long so that long-line handling is exercised, and the relevant vector
// increased in length, for more than one chunk [which would be too short to
// trigger chunking] and for more than two chunks [so that vector extension
// will eventually occur].)
const CodePointLength = 370;
assertEq(lengthInCodePoints(simpleCode), CodePointLength,
"code point count should be correct");
// |simpleCodeArray| contains this many REPEAT-delimited cycles.
const RepetitionNumber = 4;
// Each cycle consists of this many elements.
const ElementsPerCycle = 9;
// Each element in a cycle has at least this many 😱.
const MinFaceScreamingPerElementInCycle = 2;
// Each cycle consists of many elements with three 😱.
const ElementsInCycleWithThreeFaceScreaming = 2;
// Compute the overall number of UTF-16 code units. (UTF-16 because this is a
// JS string as input.)
const OverallCodeUnitCount =
CodePointLength +
RepetitionNumber * (ElementsPerCycle * MinFaceScreamingPerElementInCycle +
ElementsInCycleWithThreeFaceScreaming);
// Code units != code points.
assertEq(OverallCodeUnitCount > CodePointLength, true,
"string contains code points outside BMP, so length in units " +
"exceeds length in points");
// The overall computed number of code units has this exact numeric value.
assertEq(OverallCodeUnitCount, 450,
"code unit count computation produces this value");
// The overall computed number of code units matches the string length.
assertEq(simpleCode.length, OverallCodeUnitCount, "string length must match");
// Evaluate the string.
var Q;
eval(simpleCode);
// Verify characteristics of the resulting execution.
assertEq(Array.isArray(Q), true);
const NumArrayElements = 40;
assertEq(Q.length, NumArrayElements);
Q.forEach((v, i) => {
assertEq(Array.isArray(v), true);
assertEq(v.length, 1);
assertEq(v[0], "😱😱" + String.fromCharCode('('.charCodeAt(0) + i) + "x");
});
let parseTree = Reflect.parse(simpleCode, { loc: true });
// Check the overall script.
assertEq(parseTree.type, "Program");
checkLoc(parseTree, [1, 0], [1, 370]);
assertEq(parseTree.body.length, 1);
// Check the coordinates of the declaration.
let varDecl = parseTree.body[0];
assertEq(varDecl.type, "VariableDeclaration");
checkLoc(varDecl, [1, 0], [1, 369]);
// ...and its initializing expression.
let varInit = varDecl.declarations[0].init;
assertEq(varInit.type, "ArrayExpression");
checkLoc(varInit, [1, 8], [1, 369]);
// ...and then every literal inside it.
assertEq(varInit.elements.length, NumArrayElements, "array literal length");
const ItemLength = lengthInCodePoints('["😱😱#x"],');
assertEq(ItemLength, 9, "item length check");
for (let i = 0; i < NumArrayElements; i++)
{
let elem = varInit.elements[i];
assertEq(elem.type, "ArrayExpression");
let startCol = 9 + i * ItemLength;
let endCol = startCol + ItemLength - 1;
checkLoc(elem, [1, startCol], [1, endCol]);
let arrayElems = elem.elements;
assertEq(arrayElems.length, 1);
let str = arrayElems[0];
assertEq(str.type, "Literal");
assertEq(str.value,
"😱😱" + String.fromCharCode('('.charCodeAt(0) + i) + "x");
checkLoc(str, [1, startCol + 1], [1, endCol - 1]);
}
}
testSimple();
// Test |ChunkInfo::unitsType() == UnitsType::GuaranteedSingleUnit| -- not that
// it should be observable, precisely, but effects of mis-applying or
// miscomputing it would in principle be observable if such were happening.
// This test also is intended to to be useful for (manually, in a debugger)
// verifying that the optimization is computed and kicks in correctly.
function testGuaranteedSingleUnit()
{
if (!columnsAreCodePoints)
return;
// Begin a few array literals in a first chunk to test column computation in
// that first chunk.
//
// End some of them in the first chunk to test columns *before* we know we
// have a long line.
//
// End one array *outside* the first chunk to test a computation inside a
// first chunk *after* we know we have a long line and have computed a first
// chunk.
let mixedChunksCode = "var Z = [ [ [],"; // column 0, offset 0
assertEq(mixedChunksCode.length, 15);
assertEq(lengthInCodePoints(mixedChunksCode), 15);
mixedChunksCode +=
" ".repeat(128 - mixedChunksCode.length); // column 15, offset 15
assertEq(mixedChunksCode.length, 128);
assertEq(lengthInCodePoints(mixedChunksCode), 128);
// Fill out a second chunk as also single-unit, with an outer array literal
// that begins in this chunk but finishes in the next (to test column
// computation in a prior, guaranteed-single-unit chunk).
mixedChunksCode += "[" + "[],".repeat(42) + " "; // column 128, offset 128
assertEq(mixedChunksCode.length, 256);
assertEq(lengthInCodePoints(mixedChunksCode), 256);
// Add a third chunk with one last empty nested array literal (so that we
// tack on another chunk, and conclude the second chunk is single-unit, before
// closing the enclosing array literal). Then close the enclosing array
// literal. Finally start a new string literal element containing
// multi-unit code points. For good measure, make the chunk *end* in the
// middle of such a code point, so that the relevant chunk limit must be
// retracted one code unit.
mixedChunksCode += "[] ], '" + "😱".repeat(61); // column 256, offset 256
assertEq(mixedChunksCode.length, 384 + 1);
assertEq(lengthInCodePoints(mixedChunksCode), 324);
// Wrap things up. Terminate the string, then terminate the nested array
// literal to trigger a column computation within the first chunk that can
// benefit from knowing the first chunk is all single-unit. Next add a *new*
// element to the outermost array, a string literal that contains a line
// terminator. The terminator invalidates the column computation cache, so
// when the outermost array is closed, location info for it will not hit the
// cache. Finally, tack on the terminating semicolon for good measure.
mixedChunksCode += "' ], '\u2028' ];"; // column 324, offset 385
assertEq(mixedChunksCode.length, 396);
assertEq(lengthInCodePoints(mixedChunksCode), 335);
let parseTree = Reflect.parse(mixedChunksCode, { loc: true });
// Check the overall script.
assertEq(parseTree.type, "Program");
checkLoc(parseTree, [1, 0], [2, 4]);
assertEq(parseTree.body.length, 1);
// Check the coordinates of the declaration.
let varDecl = parseTree.body[0];
assertEq(varDecl.type, "VariableDeclaration");
checkLoc(varDecl, [1, 0], [2, 3]);
// ...and its initializing expression.
let varInit = varDecl.declarations[0].init;
assertEq(varInit.type, "ArrayExpression");
checkLoc(varInit, [1, 8], [2, 3]);
let outerArrayElements = varInit.elements;
assertEq(outerArrayElements.length, 2);
{
// Next the first element, the array inside the initializing expression.
let nestedArray = varInit.elements[0];
assertEq(nestedArray.type, "ArrayExpression");
checkLoc(nestedArray, [1, 10], [1, 327]);
// Now inside that nested array.
let nestedArrayElements = nestedArray.elements;
assertEq(nestedArrayElements.length, 3);
// First the [] in chunk #0
let emptyArray = nestedArrayElements[0];
assertEq(emptyArray.type, "ArrayExpression");
assertEq(emptyArray.elements.length, 0);
checkLoc(emptyArray, [1, 12], [1, 14]);
// Then the big array of empty arrays starting in chunk #1 and ending just
// barely in chunk #2.
let bigArrayOfEmpties = nestedArrayElements[1];
assertEq(bigArrayOfEmpties.type, "ArrayExpression");
assertEq(bigArrayOfEmpties.elements.length, 42 + 1);
bigArrayOfEmpties.elements.forEach((elem, i) => {
assertEq(elem.type, "ArrayExpression");
assertEq(elem.elements.length, 0);
if (i !== 42)
checkLoc(elem, [1, 129 + i * 3], [1, 131 + i * 3]);
else
checkLoc(elem, [1, 256], [1, 258]); // last element was hand-placed
});
// Then the string literal of multi-unit code points beginning in chunk #2
// and ending just into chunk #3 on a second line.
let multiUnitStringLiteral = nestedArrayElements[2];
assertEq(multiUnitStringLiteral.type, "Literal");
assertEq(multiUnitStringLiteral.value, "😱".repeat(61));
checkLoc(multiUnitStringLiteral, [1, 262], [1, 325]);
}
{
// Finally, the string literal containing a line terminator as element in
// the outermost array.
let stringLiteralWithEmbeddedTerminator = outerArrayElements[1];
assertEq(stringLiteralWithEmbeddedTerminator.type, "Literal");
assertEq(stringLiteralWithEmbeddedTerminator.value, "\u2028");
checkLoc(stringLiteralWithEmbeddedTerminator, [1, 329], [2, 1]);
}
}
testGuaranteedSingleUnit();
if (typeof reportCompare === "function")
reportCompare(true, true);
print("Testing completed");