Bug 1551916 - Optimize column-number computations for offsets more than |ColumnChunkLength = 128| code units into a line by saving column information at 128-unit increments (rounded down to the nearest code point start) so that at most (length of... r=arai

...longest code point encoding - 1) + ColumnChunkLength - 1 units must be observed when computing a column number. r=arai Depends on D31301 Differential Revision: https://phabricator.services.mozilla.com/D31302 --HG-- extra : moz-landing-system : lando
2019-05-17 03:21:04 +00:00 · 2019-05-17 03:21:04 +00:00 · d18c7116eb
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@ -495,6 +495,9 @@ TokenStreamAnyChars::TokenStreamAnyChars(JSContext* cx,
                                         const ReadOnlyCompileOptions& options,
                                         StrictModeGetter* smg)
    : srcCoords(cx, options.lineno, options.scriptSourceOffset),
+#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+      longLineColumnInfo_(cx),
+#endif  // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
      options_(options),
      tokens(),
      cursor_(0),
@ -689,15 +692,210 @@ inline void SourceUnits<Utf8Unit>::assertNextCodePoint(

 #endif  // DEBUG

-template <typename Unit>
-static size_t ComputeColumn(const Unit* begin, const Unit* end) {
 #if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
-  return unicode::CountCodePoints(begin, end);
-#else
-  return PointerRangeSize(begin, end);
-#endif  // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+
+static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary(
+    const Utf8Unit** ptr, const Utf8Unit* limit) {
+  MOZ_ASSERT(*ptr <= limit);
+
+  // |limit| is a code point boundary.
+  if (MOZ_UNLIKELY(*ptr == limit)) {
+    return;
+  }
+
+  // Otherwise rewind past trailing units to the start of the code point.
+#  ifdef DEBUG
+  size_t retracted = 0;
+#  endif
+  while (MOZ_UNLIKELY(IsTrailingUnit((*ptr)[0]))) {
+    --*ptr;
+#  ifdef DEBUG
+    retracted++;
+#  endif
+  }
+
+  MOZ_ASSERT(retracted < 4,
+             "the longest UTF-8 code point is four units, so this should never "
+             "retract more than three units");
 }

+static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary(
+    const char16_t** ptr, const char16_t* limit) {
+  MOZ_ASSERT(*ptr <= limit);
+
+  // |limit| is a code point boundary.
+  if (MOZ_UNLIKELY(*ptr == limit)) {
+    return;
+  }
+
+  // Otherwise the pointer must be retracted by one iff it splits a two-unit
+  // code point.
+  if (MOZ_UNLIKELY(unicode::IsTrailSurrogate((*ptr)[0]))) {
+    // Outside test suites testing garbage WTF-16, it's basically guaranteed
+    // here that |(*ptr)[-1] (*ptr)[0]| is a surrogate pair.
+    if (MOZ_LIKELY(unicode::IsLeadSurrogate((*ptr)[-1]))) {
+      --*ptr;
+    }
+  }
+}
+
+template <typename Unit>
+uint32_t TokenStreamAnyChars::computePartialColumn(
+    const LineToken lineToken, const uint32_t offset,
+    const SourceUnits<Unit>& sourceUnits) const {
+  lineToken.assertConsistentOffset(offset);
+
+  const uint32_t line = lineNumber(lineToken);
+  const uint32_t start = srcCoords.lineStart(lineToken);
+
+  // Reset the previous offset/column cache for this line, if the previous
+  // lookup wasn't on this line.
+  if (line != lineOfLastColumnComputation_) {
+    lineOfLastColumnComputation_ = line;
+    lastChunkVectorForLine_ = nullptr;
+    lastOffsetOfComputedColumn_ = start;
+    lastComputedColumn_ = 0;
+  }
+
+  // Compute and return the final column number from a partial offset/column,
+  // using the last-cached offset/column if they're more optimal.
+  auto ColumnFromPartial = [this, offset, &sourceUnits](uint32_t partialOffset,
+                                                        uint32_t partialCols) {
+    MOZ_ASSERT(partialOffset <= offset);
+
+    // If the last lookup on this line was closer to |offset|, use it.
+    if (partialOffset < this->lastOffsetOfComputedColumn_ &&
+        this->lastOffsetOfComputedColumn_ <= offset) {
+      partialOffset = this->lastOffsetOfComputedColumn_;
+      partialCols = this->lastComputedColumn_;
+    }
+
+    const Unit* begin = sourceUnits.codeUnitPtrAt(partialOffset);
+    const Unit* end = sourceUnits.codeUnitPtrAt(offset);
+
+    partialOffset += PointerRangeSize(begin, end);
+    partialCols += AssertedCast<uint32_t>(unicode::CountCodePoints(begin, end));
+
+    this->lastOffsetOfComputedColumn_ = partialOffset;
+    this->lastComputedColumn_ = partialCols;
+    return partialCols;
+  };
+
+  const uint32_t offsetInLine = offset - start;
+
+  // The index within a relevant |Vector<uint32_t>| of the nearest chunk
+  // info...if it's been computed at all.
+  const uint32_t chunkIndex = offsetInLine / ColumnChunkLength;
+
+  // Compute the column from the start of the line if chunk information would
+  // direct us to the start of the line -- including if the line's too short to
+  // be chunked.
+  if (chunkIndex == 0) {
+    return ColumnFromPartial(start, 0);
+  }
+
+  // If this line has no chunk vector yet, insert one in the hash map.  (The
+  // required index is allocated and filled further down.)
+  if (!lastChunkVectorForLine_) {
+    auto ptr = longLineColumnInfo_.lookupForAdd(line);
+    if (!ptr) {
+      // This could rehash and invalidate a cached vector pointer, but the outer
+      // condition means we don't have a cached pointer.
+      if (!longLineColumnInfo_.add(ptr, line, Vector<uint32_t>(cx))) {
+        // In case of OOM, just count columns from the start of the line.
+        cx->recoverFromOutOfMemory();
+        return ColumnFromPartial(start, 0);
+      }
+    }
+
+    // Note that adding elements to this vector won't invalidate this pointer.
+    lastChunkVectorForLine_ = &ptr->value();
+  }
+
+  const Unit* const limit = sourceUnits.codeUnitPtrAt(offset);
+
+  auto RetractedOffsetOfChunk = [
+#  ifdef DEBUG
+                                    this,
+#  endif
+                                    start, limit,
+                                    &sourceUnits](uint32_t index) {
+    MOZ_ASSERT(index < this->lastChunkVectorForLine_->length());
+
+    uint32_t naiveOffset = start + index * ColumnChunkLength;
+    const Unit* naivePtr = sourceUnits.codeUnitPtrAt(naiveOffset);
+
+    const Unit* actualPtr = naivePtr;
+    RetractPointerToCodePointBoundary(&actualPtr, limit);
+
+    return naiveOffset - PointerRangeSize(actualPtr, naivePtr);
+  };
+
+  uint32_t partialOffset;
+  uint32_t partialColumn;
+
+  auto entriesLen = AssertedCast<uint32_t>(lastChunkVectorForLine_->length());
+  if (entriesLen <= chunkIndex) {
+    // Extend the vector from its last entry or the start of the line.  (This is
+    // also a suitable partial start point if we must recover from OOM.)
+    if (entriesLen > 0) {
+      partialOffset = RetractedOffsetOfChunk(entriesLen - 1);
+      partialColumn = (*lastChunkVectorForLine_)[entriesLen - 1];
+    } else {
+      partialOffset = start;
+      partialColumn = 0;
+    }
+
+    if (!lastChunkVectorForLine_->reserve(chunkIndex + 1)) {
+      // As earlier, just start from the greatest offset/column in case of OOM.
+      cx->recoverFromOutOfMemory();
+      return ColumnFromPartial(partialOffset, partialColumn);
+    }
+
+    // OOM is no longer possible now.  \o/
+
+    // The vector always begins with the column of the line start, i.e. zero.
+    if (entriesLen == 0) {
+      lastChunkVectorForLine_->infallibleAppend(0);
+      entriesLen++;
+    }
+
+    do {
+      const Unit* const begin = sourceUnits.codeUnitPtrAt(partialOffset);
+      const Unit* chunkLimit = sourceUnits.codeUnitPtrAt(
+          start + std::min(entriesLen * ColumnChunkLength, offsetInLine));
+
+      MOZ_ASSERT(begin < chunkLimit);
+      MOZ_ASSERT(chunkLimit <= limit);
+
+      static_assert(ColumnChunkLength > SourceUnitTraits<Unit>::maxUnitsLength,
+                    "chunk length in code units must be able to contain the "
+                    "largest encoding of a code point, for retracting below to "
+                    "never underflow");
+
+      // Prior tokenizing ensured that [begin, limit) is validly encoded, and
+      // |begin < chunkLimit|, so any retraction here can't underflow.
+      RetractPointerToCodePointBoundary(&chunkLimit, limit);
+
+      MOZ_ASSERT(begin < chunkLimit);
+      MOZ_ASSERT(chunkLimit <= limit);
+
+      partialOffset += PointerRangeSize(begin, chunkLimit);
+      partialColumn += unicode::CountCodePoints(begin, chunkLimit);
+
+      lastChunkVectorForLine_->infallibleAppend(partialColumn);
+      entriesLen++;
+    } while (entriesLen < chunkIndex + 1);
+  } else {
+    partialOffset = RetractedOffsetOfChunk(chunkIndex);
+    partialColumn = (*lastChunkVectorForLine_)[chunkIndex];
+  }
+
+  return ColumnFromPartial(partialOffset, partialColumn);
+}
+
+#endif  // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+
 template <typename Unit, class AnyCharsAccess>
 uint32_t GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeColumn(
    LineToken lineToken, uint32_t offset) const {
@ -705,26 +903,14 @@ uint32_t GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeColumn(

  const TokenStreamAnyChars& anyChars = anyCharsAccess();

-  uint32_t lineNumber = anyChars.srcCoords.lineNumber(lineToken);
+  uint32_t partialCols =
+#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+      anyChars.computePartialColumn(lineToken, offset, this->sourceUnits)
+#else
+      offset - anyChars.lineStart(lineToken)
+#endif  // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+      ;

-  uint32_t beginOffset;
-  uint32_t partialCols;
-  if (lineNumber == lastLineForColumn_ && lastOffsetForColumn_ <= offset) {
-    beginOffset = lastOffsetForColumn_;
-    partialCols = lastColumn_;
-  } else {
-    beginOffset = anyChars.lineStart(lineToken);
-    partialCols = 0;
-  }
-
-  const Unit* begin = this->sourceUnits.codeUnitPtrAt(beginOffset);
-  const Unit* end = this->sourceUnits.codeUnitPtrAt(offset);
-
-  partialCols += AssertedCast<uint32_t>(ComputeColumn(begin, end));
-
-  lastLineForColumn_ = lineNumber;
-  lastOffsetForColumn_ = offset;
-  lastColumn_ = partialCols;
  return (lineToken.isFirstLine() ? anyChars.options_.column : 0) + partialCols;
 }

--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@ -207,6 +207,7 @@
 #include "frontend/Token.h"
 #include "frontend/TokenKind.h"
 #include "js/CompileOptions.h"
+#include "js/HashTable.h"    // js::HashMap
 #include "js/RegExpFlags.h"  // JS::RegExpFlags
 #include "js/UniquePtr.h"
 #include "js/Vector.h"
@ -318,6 +319,9 @@ class MOZ_STACK_CLASS TokenStreamPosition final {
  Token lookaheadTokens[TokenStreamShared::maxLookahead];
 } JS_HAZ_ROOTED;

+template <typename Unit>
+class SourceUnits;
+
 // Column numbers *ought* be in terms of counts of code points, but in the past
 // we counted code units.  Set this to 0 to keep returning counts of code units
 // (even for UTF-8, which is clearly wrong, but we don't ship UTF-8 yet so this
@ -601,6 +605,8 @@ class TokenStreamAnyChars : public TokenStreamShared {

    /** Return the offset of the start of the line for |lineToken|. */
    uint32_t lineStart(LineToken lineToken) const {
+      MOZ_ASSERT(lineToken.index + 1 < lineStartOffsets_.length(),
+                 "recorded line-start information must be available");
      return lineStartOffsets_[lineToken.index];
    }
  };
@ -639,6 +645,64 @@ class TokenStreamAnyChars : public TokenStreamShared {
  MOZ_ALWAYS_INLINE void updateFlagsForEOL() { flags.isDirtyLine = false; }

 private:
+#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+  /**
+   * Compute the "partial" column number in Unicode code points of the absolute
+   * |offset| within source text on the line of |lineToken| (which must have
+   * been computed from |offset|).
+   *
+   * A partial column number on a line that isn't the first line is just the
+   * actual column number.  But a partial column number on the first line is the
+   * column number *ignoring the initial line/column of the script*.  For
+   * example, consider this HTML with line/column number keys:
+   *
+   *                 1         2            3
+   *       0123456789012345678901234   567890
+   *     ------------------------------------
+   *   1 | <html>
+   *   2 | <head>
+   *   3 |   <script>var x = 3;  x &lt; 4;
+   *   4 | const y = 7;</script>
+   *   5 | </head>
+   *   6 | <body></body>
+   *   7 | </html>
+   *
+   * The script would be compiled specifying initial (line, column) of (3, 10)
+   * using |JS::ReadOnlyCompileOptions::{lineno,column}|.  And the column
+   * reported by |computeColumn| for the "v" of |var| would be 10.  But the
+   * partial column number of the "v" in |var|, that this function returns,
+   * would be 0.  On the other hand, the column reported by |computeColumn| and
+   * the partial column number returned by this function for the "c" in |const|
+   * would both be 0, because it's not in the first line of source text.
+   *
+   * The partial column is with respect *only* to the JavaScript source text as
+   * SpiderMonkey sees it.  In the example, the "&lt;" is converted to "<" by
+   * the browser before SpiderMonkey would see it.  So the partial column of the
+   * "4" in the inequality would be 16, not 19.
+   *
+   * Code points are not all equal length, so counting requires *some* kind of
+   * linear-time counting from the start of the line.  This function attempts
+   * various tricks to reduce this cost.  If these optimizations succeed,
+   * repeated calls to this function on a line will pay a one-time cost linear
+   * in the length of the line, then each call pays a separate constant-time
+   * cost.  If the optimizations do not succeed, this function works in time
+   * linear in the length of the line.
+   *
+   * It's unusual for a function in *this* class to be |Unit|-templated, but
+   * while this operation manages |Unit|-agnostic fields in this class and in
+   * |srcCoords|, it must *perform* |Unit|-sensitive computations to fill them.
+   * And this is the best place to do that.
+   */
+  template <typename Unit>
+  uint32_t computePartialColumn(const LineToken lineToken,
+                                const uint32_t offset,
+                                const SourceUnits<Unit>& sourceUnits) const;
+#endif  // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+
+  /**
+   * Update line/column information for the start of a new line at
+   * |lineStartOffset|.
+   */
  MOZ_MUST_USE MOZ_ALWAYS_INLINE bool internalUpdateLineInfoForEOL(
      uint32_t lineStartOffset);

@ -686,6 +750,22 @@ class TokenStreamAnyChars : public TokenStreamShared {

  const char* getFilename() const { return filename_; }

+ private:
+#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+  static constexpr uint32_t ColumnChunkLength = 128;
+
+  /**
+   * Line number (of lines at least |ColumnChunkLength| code units long) to
+   * a sequence of the column numbers at |ColumnChunkLength| boundaries rewound
+   * (if needed) to the nearest code point boundary.
+   *
+   * Entries appear in this map only when a column computation of sufficient
+   * distance is performed on a line, and the vectors are lazily filled as
+   * greater offsets within lines require column computations.
+   */
+  mutable HashMap<uint32_t, Vector<uint32_t>> longLineColumnInfo_;
+#endif  // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+
 protected:
  // Options used for parsing/tokenizing.
  const JS::ReadOnlyCompileOptions& options_;
@ -730,6 +810,29 @@ class TokenStreamAnyChars : public TokenStreamShared {
  JSContext* const cx;
  bool mutedErrors;
  StrictModeGetter* strictModeGetter;  // used to test for strict mode
+
+#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+  // Computing accurate column numbers requires at *some* point linearly
+  // iterating through prior source units in the line to properly account for
+  // multi-unit code points.  This is quadratic if counting happens repeatedly.
+  //
+  // But usually we need columns for advancing offsets through scripts.  By
+  // caching the last ((line number, offset) => relative column) mapping (in
+  // similar manner to how |SourceUnits::lastIndex_| is used to cache
+  // (offset => line number) mappings) we can usually avoid re-iterating through
+  // the common line prefix.
+  //
+  // Additionally, we avoid hash table lookup costs by caching the
+  // |Vector<uint32_t>*| for the line of the last lookup.  (|nullptr| means we
+  // have to look it up -- or it hasn't been created yet.)  This pointer is
+  // invalidated when a lookup on a new line occurs, but as it's not a pointer
+  // at literal element data, it's *not* invalidated when new entries are added
+  // to such a vector.
+  mutable uint32_t lineOfLastColumnComputation_ = UINT32_MAX;
+  mutable Vector<uint32_t>* lastChunkVectorForLine_ = nullptr;
+  mutable uint32_t lastOffsetOfComputedColumn_ = UINT32_MAX;
+  mutable uint32_t lastComputedColumn_ = 0;
+#endif  // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
 };

 constexpr char16_t CodeUnitValue(char16_t unit) { return unit; }
@ -1677,22 +1780,14 @@ class GeneralTokenStreamChars : public SpecializedTokenStreamCharsBase<Unit> {
    return static_cast<TokenStreamSpecific*>(this);
  }

- private:
-  // Computing accurate column numbers requires linearly iterating through all
-  // source units in the line to account for multi-unit code points; on long
-  // lines requiring many column computations, this becomes quadratic.
-  //
-  // However, because usually we need columns for advancing offsets through
-  // scripts, caching the last ((line number, offset) => relative column)
-  // mapping -- in similar manner to how |SourceUnits::lastIndex_| is used to
-  // cache (offset => line number) mappings -- lets us avoid re-iterating
-  // through the line prefix in most cases.
-
-  mutable uint32_t lastLineForColumn_ = UINT32_MAX;
-  mutable uint32_t lastOffsetForColumn_ = UINT32_MAX;
-  mutable uint32_t lastColumn_ = 0;
-
 protected:
+  /**
+   * Compute the column number in Unicode code points of the absolute |offset|
+   * within source text on the line corresponding to |lineToken|.
+   *
+   * |offset| must be a code point boundary, preceded only by validly-encoded
+   * source units.  (It doesn't have to be *followed* by valid source units.)
+   */
  uint32_t computeColumn(LineToken lineToken, uint32_t offset) const;
  void computeLineAndColumn(uint32_t offset, uint32_t* line,
                            uint32_t* column) const;
--- a/js/src/tests/non262/syntax/column-numbers-in-long-lines.js
+++ b/js/src/tests/non262/syntax/column-numbers-in-long-lines.js
@ -0,0 +1,403 @@
+// |reftest| skip-if(!this.hasOwnProperty('Reflect')||!Reflect.parse) -- uses Reflect.parse(..., { loc: true}) to trigger the column-computing API
+/*
+ * Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/licenses/publicdomain/
+ */
+
+//-----------------------------------------------------------------------------
+var BUGNUMBER = 1551916;
+var summary =
+  "Optimize computing a column number as count of code points by caching " +
+  "column numbers (and whether each chunk might contain anything multi-unit) " +
+  "and counting forward from them";
+
+print(BUGNUMBER + ": " + summary);
+
+/**************
+ * BEGIN TEST *
+ **************/
+
+// Various testing of column-number computations, with respect to counting as
+// code points or units, for very long lines.
+//
+// This test should be valid regardless whether
+// |JS_COLUMN_DIMENSION_IS_CODE_POINTS()| is 0 or 1.  It also *should* pass no
+// matter what valid value |TokenStreamAnyChars::ColumnChunkLength| takes (it
+// must be at least 4 so that the maximum-length code point in UTF-8/16 will fit
+// in a single chunk), because the value of that constant should be externally
+// invisible save for perf effects. (As a result, recompiling and running this
+// test with a variety of different values assigned to that constant is a good
+// smoke-test of column number computation, that doesn't require having to
+// closely inspect any column-related code.)
+//
+// However, this test is structured on the assumption that that constant has the
+// value 128, in order to exercise in targeted fashion various column number
+// computation edge cases.
+//
+// All this testing *could* be written to not be done with |Reflect.parse| --
+// backwards column computations happen even when compiling normal code, in some
+// cases.  But it's much more the exception than the rule.  And |Reflect.parse|
+// has *very* predictable column-computation operations (basically start/end
+// coordinates are computed immediately when the end of an AST node is reached)
+// that make it easier to recognize what the exact pattern of computations for
+// which offsets will look like.
+
+// Helper function for checking node location tersely.
+function checkLoc(node, expectedStart, expectedEnd)
+{
+  let start = node.loc.start;
+
+  assertEq(start.line, expectedStart[0],
+           "start line number must be as expected");
+  assertEq(start.column, expectedStart[1],
+           "start column number must be as expected");
+
+  let end = node.loc.end;
+
+  assertEq(end.line, expectedEnd[0], "end line number must be as expected");
+  assertEq(end.column, expectedEnd[1],
+           "end column number must be as expected");
+}
+
+function lengthInCodePoints(str)
+{
+  return [...str].length;
+}
+
+// True if column numbers are counts of code points, false otherwise.  This
+// constant can be used to short-circuit testing that isn't point/unit-agnostic.
+const columnsAreCodePoints = (function()
+{
+  var columnTypes = [];
+
+  function checkColumn(actual, expectedPoints, expectedUnits)
+  {
+    if (actual === expectedPoints)
+      columnTypes.push("p");
+    else if (actual === expectedUnits)
+      columnTypes.push("u");
+    else
+      columnTypes.push("x");
+  }
+
+  var script = Reflect.parse('"😱😱😱😱";', { loc: true });
+  assertEq(script.type, "Program");
+  assertEq(script.loc.start.line, 1);
+  assertEq(script.loc.end.line, 1);
+  assertEq(script.loc.start.column, 0);
+  checkColumn(script.loc.end.column, 7, 11);
+
+  var body = script.body;
+  assertEq(body.length, 1);
+
+  var stmt = body[0];
+  assertEq(stmt.type, "ExpressionStatement");
+  assertEq(stmt.loc.start.line, 1);
+  assertEq(stmt.loc.end.line, 1);
+  assertEq(stmt.loc.start.column, 0);
+  checkColumn(stmt.loc.end.column, 7, 11);
+
+  var expr = stmt.expression;
+  assertEq(expr.type, "Literal");
+  assertEq(expr.value, "😱😱😱😱");
+  assertEq(expr.loc.start.line, 1);
+  assertEq(expr.loc.end.line, 1);
+  assertEq(expr.loc.start.column, 0);
+  checkColumn(expr.loc.end.column, 6, 10);
+
+  var checkResult = columnTypes.join(",");
+
+  assertEq(checkResult === "p,p,p" || checkResult === "u,u,u", true,
+           "columns must be wholly code points or units: " + checkResult);
+
+  return checkResult === "p,p,p";
+})();
+
+// Start with some basic sanity-testing, without regard to exactly when, how, or
+// in what order (offset => column) computations are performed.
+function testSimple()
+{
+  if (!columnsAreCodePoints)
+    return;
+
+  // Array elements within the full |simpleCode| string constructed below are
+  // one-element arrays containing the string "😱😱#x" where "#" is the
+  // character that, in C++, could be written as |'(' + i| where |i| is the
+  // index of the array within the outer array.
+  let simpleCodeArray =
+    [
+      'var Q = [[',  // column 0, offset 0
+      // REPEAT
+      '"😱😱(x"],["',  // column 10, offset 10
+      '😱😱)x"],["😱',  // column 20, offset 22
+      '😱*x"],["😱😱',  // column 30, offset 35
+      '+x"],["😱😱,',  // column 40, offset 48
+      'x"],["😱😱-x',  // column 50, offset 60
+      '"],["😱😱.x"',  // column 60, offset 72
+      '],["😱😱/x"]',  // column 70, offset 84
+      ',["😱😱0x"],',  // column 80, offset 96
+      '["😱😱1x"],[',  // column 90, offset 108
+      // REPEAT
+      '"😱😱2x"],["',  // column 100, offset 120 -- chunk limit between "]
+      '😱😱3x"],["😱',  // column 110, offset 132
+      '😱4x"],["😱😱',  // column 120, offset 145
+      '5x"],["😱😱6',  // column 130, offset 158
+      'x"],["😱😱7x',  // column 140, offset 170
+      '"],["😱😱8x"',  // column 150, offset 182
+      '],["😱😱9x"]',  // column 160, offset 194
+      ',["😱😱:x"],',  // column 170, offset 206
+      '["😱😱;x"],[',  // column 180, offset 218
+      // REPEAT
+      '"😱😱<x"],["',  // column 190, offset 230
+      '😱😱=x"],["😱',  // column 200, offset 242
+      '😱>x"],["😱😱',  // column 210, offset 255 -- chunk limit splits first 😱
+      '?x"],["😱😱@',  // column 220, offset 268
+      'x"],["😱😱Ax',  // column 230, offset 280
+      '"],["😱😱Bx"',  // column 240, offset 292
+      '],["😱😱Cx"]',  // column 250, offset 304
+      ',["😱😱Dx"],',  // column 260, offset 316
+      '["😱😱Ex"],[',  // column 270, offset 328
+      // REPEAT
+      '"😱😱Fx"],["',  // column 280, offset 340
+      '😱😱Gx"],["😱',  // column 290, offset 352
+      '😱Hx"],["😱😱',  // column 300, offset 365
+      'Ix"],["😱😱J',  // column 310, offset 378 -- chunk limit between ["
+      'x"],["😱😱Kx',  // column 320, offset 390
+      '"],["😱😱Lx"',  // column 330, offset 402
+      '],["😱😱Mx"]',  // column 340, offset 414
+      ',["😱😱Nx"],',  // column 350, offset 426
+      '["😱😱Ox"]];',  // column 360 (10 long), offset 438 (+12 to end)
+    ];
+  let simpleCode = simpleCodeArray.join("");
+
+  // |simpleCode| overall contains this many code points.  (This number is
+  // chosen to be several |TokenStreamAnyChars::ColumnChunkLength = 128| chunks
+  // long so that long-line handling is exercised, and the relevant vector
+  // increased in length, for more than one chunk [which would be too short to
+  // trigger chunking] and for more than two chunks [so that vector extension
+  // will eventually occur].)
+  const CodePointLength = 370;
+
+  assertEq(lengthInCodePoints(simpleCode), CodePointLength,
+           "code point count should be correct");
+
+  // |simpleCodeArray| contains this many REPEAT-delimited cycles.
+  const RepetitionNumber = 4;
+
+  // Each cycle consists of this many elements.
+  const ElementsPerCycle = 9;
+
+  // Each element in a cycle has at least this many 😱.
+  const MinFaceScreamingPerElementInCycle = 2;
+
+  // Each cycle consists of many elements with three 😱.
+  const ElementsInCycleWithThreeFaceScreaming = 2;
+
+  // Compute the overall number of UTF-16 code units.  (UTF-16 because this is a
+  // JS string as input.)
+  const OverallCodeUnitCount =
+    CodePointLength +
+    RepetitionNumber * (ElementsPerCycle * MinFaceScreamingPerElementInCycle +
+                        ElementsInCycleWithThreeFaceScreaming);
+
+  // Code units != code points.
+  assertEq(OverallCodeUnitCount > CodePointLength, true,
+           "string contains code points outside BMP, so length in units " +
+           "exceeds length in points");
+
+  // The overall computed number of code units has this exact numeric value.
+  assertEq(OverallCodeUnitCount, 450,
+           "code unit count computation produces this value");
+
+  // The overall computed number of code units matches the string length.
+  assertEq(simpleCode.length, OverallCodeUnitCount, "string length must match");
+
+  // Evaluate the string.
+  var Q;
+  eval(simpleCode);
+
+  // Verify characteristics of the resulting execution.
+  assertEq(Array.isArray(Q), true);
+
+  const NumArrayElements = 40;
+  assertEq(Q.length, NumArrayElements);
+  Q.forEach((v, i) => {
+    assertEq(Array.isArray(v), true);
+    assertEq(v.length, 1);
+    assertEq(v[0], "😱😱" + String.fromCharCode('('.charCodeAt(0) + i) + "x");
+  });
+
+  let parseTree = Reflect.parse(simpleCode, { loc: true });
+
+  // Check the overall script.
+  assertEq(parseTree.type, "Program");
+  checkLoc(parseTree, [1, 0], [1, 370]);
+  assertEq(parseTree.body.length, 1);
+
+  // Check the coordinates of the declaration.
+  let varDecl = parseTree.body[0];
+  assertEq(varDecl.type, "VariableDeclaration");
+  checkLoc(varDecl, [1, 0], [1, 369]);
+
+  // ...and its initializing expression.
+  let varInit = varDecl.declarations[0].init;
+  assertEq(varInit.type, "ArrayExpression");
+  checkLoc(varInit, [1, 8], [1, 369]);
+
+  // ...and then every literal inside it.
+  assertEq(varInit.elements.length, NumArrayElements, "array literal length");
+
+  const ItemLength = lengthInCodePoints('["😱😱#x"],');
+  assertEq(ItemLength, 9, "item length check");
+
+  for (let i = 0; i < NumArrayElements; i++)
+  {
+    let elem = varInit.elements[i];
+    assertEq(elem.type, "ArrayExpression");
+
+    let startCol = 9 + i * ItemLength;
+    let endCol = startCol + ItemLength - 1;
+    checkLoc(elem, [1, startCol], [1, endCol]);
+
+    let arrayElems = elem.elements;
+    assertEq(arrayElems.length, 1);
+
+    let str = arrayElems[0];
+    assertEq(str.type, "Literal");
+    assertEq(str.value,
+             "😱😱" + String.fromCharCode('('.charCodeAt(0) + i) + "x");
+    checkLoc(str, [1, startCol + 1], [1, endCol - 1]);
+  }
+}
+testSimple();
+
+// Test |ChunkInfo::unitsType() == UnitsType::GuaranteedSingleUnit| -- not that
+// it should be observable, precisely, but effects of mis-applying or
+// miscomputing it would in principle be observable if such were happening.
+// This test also is intended to to be useful for (manually, in a debugger)
+// verifying that the optimization is computed and kicks in correctly.
+function testGuaranteedSingleUnit()
+{
+  if (!columnsAreCodePoints)
+    return;
+
+  // Begin a few array literals in a first chunk to test column computation in
+  // that first chunk.
+  //
+  // End some of them in the first chunk to test columns *before* we know we
+  // have a long line.
+  //
+  // End one array *outside* the first chunk to test a computation inside a
+  // first chunk *after* we know we have a long line and have computed a first
+  // chunk.
+  let mixedChunksCode = "var Z = [ [ [],"; // column 0, offset 0
+  assertEq(mixedChunksCode.length, 15);
+  assertEq(lengthInCodePoints(mixedChunksCode), 15);
+
+  mixedChunksCode +=
+    " ".repeat(128 - mixedChunksCode.length); // column 15, offset 15
+  assertEq(mixedChunksCode.length, 128);
+  assertEq(lengthInCodePoints(mixedChunksCode), 128);
+
+  // Fill out a second chunk as also single-unit, with an outer array literal
+  // that begins in this chunk but finishes in the next (to test column
+  // computation in a prior, guaranteed-single-unit chunk).
+  mixedChunksCode += "[" + "[],".repeat(42) + " "; // column 128, offset 128
+  assertEq(mixedChunksCode.length, 256);
+  assertEq(lengthInCodePoints(mixedChunksCode), 256);
+
+  // Add a third chunk with one last empty nested array literal (so that we
+  // tack on another chunk, and conclude the second chunk is single-unit, before
+  // closing the enclosing array literal).  Then close the enclosing array
+  // literal.  Finally start a new string literal element containing
+  // multi-unit code points.  For good measure, make the chunk *end* in the
+  // middle of such a code point, so that the relevant chunk limit must be
+  // retracted one code unit.
+  mixedChunksCode += "[] ], '" + "😱".repeat(61); // column 256, offset 256
+  assertEq(mixedChunksCode.length, 384 + 1);
+  assertEq(lengthInCodePoints(mixedChunksCode), 324);
+
+  // Wrap things up.  Terminate the string, then terminate the nested array
+  // literal to trigger a column computation within the first chunk that can
+  // benefit from knowing the first chunk is all single-unit.  Next add a *new*
+  // element to the outermost array, a string literal that contains a line
+  // terminator.  The terminator invalidates the column computation cache, so
+  // when the outermost array is closed, location info for it will not hit the
+  // cache.  Finally, tack on the terminating semicolon for good measure.
+  mixedChunksCode += "' ], '\u2028' ];"; // column 324, offset 385
+  assertEq(mixedChunksCode.length, 396);
+  assertEq(lengthInCodePoints(mixedChunksCode), 335);
+
+  let parseTree = Reflect.parse(mixedChunksCode, { loc: true });
+
+  // Check the overall script.
+  assertEq(parseTree.type, "Program");
+  checkLoc(parseTree, [1, 0], [2, 4]);
+  assertEq(parseTree.body.length, 1);
+
+  // Check the coordinates of the declaration.
+  let varDecl = parseTree.body[0];
+  assertEq(varDecl.type, "VariableDeclaration");
+  checkLoc(varDecl, [1, 0], [2, 3]);
+
+  // ...and its initializing expression.
+  let varInit = varDecl.declarations[0].init;
+  assertEq(varInit.type, "ArrayExpression");
+  checkLoc(varInit, [1, 8], [2, 3]);
+
+  let outerArrayElements = varInit.elements;
+  assertEq(outerArrayElements.length, 2);
+
+  {
+    // Next the first element, the array inside the initializing expression.
+    let nestedArray = varInit.elements[0];
+    assertEq(nestedArray.type, "ArrayExpression");
+    checkLoc(nestedArray, [1, 10], [1, 327]);
+
+    // Now inside that nested array.
+    let nestedArrayElements = nestedArray.elements;
+    assertEq(nestedArrayElements.length, 3);
+
+    // First the [] in chunk #0
+    let emptyArray = nestedArrayElements[0];
+    assertEq(emptyArray.type, "ArrayExpression");
+    assertEq(emptyArray.elements.length, 0);
+    checkLoc(emptyArray, [1, 12], [1, 14]);
+
+    // Then the big array of empty arrays starting in chunk #1 and ending just
+    // barely in chunk #2.
+    let bigArrayOfEmpties = nestedArrayElements[1];
+    assertEq(bigArrayOfEmpties.type, "ArrayExpression");
+    assertEq(bigArrayOfEmpties.elements.length, 42 + 1);
+    bigArrayOfEmpties.elements.forEach((elem, i) => {
+      assertEq(elem.type, "ArrayExpression");
+      assertEq(elem.elements.length, 0);
+      if (i !== 42)
+        checkLoc(elem, [1, 129 + i * 3], [1, 131 + i * 3]);
+      else
+        checkLoc(elem, [1, 256], [1, 258]); // last element was hand-placed
+    });
+
+    // Then the string literal of multi-unit code points beginning in chunk #2
+    // and ending just into chunk #3 on a second line.
+    let multiUnitStringLiteral = nestedArrayElements[2];
+    assertEq(multiUnitStringLiteral.type, "Literal");
+    assertEq(multiUnitStringLiteral.value, "😱".repeat(61));
+    checkLoc(multiUnitStringLiteral, [1, 262], [1, 325]);
+  }
+
+  {
+    // Finally, the string literal containing a line terminator as element in
+    // the outermost array.
+    let stringLiteralWithEmbeddedTerminator = outerArrayElements[1];
+    assertEq(stringLiteralWithEmbeddedTerminator.type, "Literal");
+    assertEq(stringLiteralWithEmbeddedTerminator.value, "\u2028");
+    checkLoc(stringLiteralWithEmbeddedTerminator, [1, 329], [2, 1]);
+  }
+}
+testGuaranteedSingleUnit();
+
+if (typeof reportCompare === "function")
+  reportCompare(true, true);
+
+print("Testing completed");