Bug 1366927 - Rework column handling in frontend by separating column from offset from root ScriptSource buffer. (r=jimb)

The idea is this:

Offsets, like token positions and scripts' begin/end and
toString{Start,End}, are now always offset from the beginning of the
script's full ScriptSource buffer. That is, when delazifying, if the
buffer we are parsing is an interior pointer into some outer script's
ScriptSource buffer, the token offsets for that parsing session are
still offset from that outer buffer instead of the interior pointer.

The initial column number in CompileOptions will be added to the column
number when an offset maps to the initial line of the ScriptSource.
Columns are computed from offsets, but have no bearing on how to index
into the ScriptSource buffers.
This commit is contained in:
Shu-yu Guo 2017-07-17 18:45:52 -07:00
Родитель fd488a1db1
Коммит 81c208d0c8
12 изменённых файлов: 71 добавлений и 85 удалений

Просмотреть файл

@ -645,11 +645,11 @@ frontend::CompileLazyFunction(JSContext* cx, Handle<LazyScript*> lazy, const cha
{
MOZ_ASSERT(cx->compartment() == lazy->functionNonDelazifying()->compartment());
uint32_t sourceStartColumn = lazy->scriptSource()->startColumn();
CompileOptions options(cx, lazy->version());
options.setMutedErrors(lazy->mutedErrors())
.setFileAndLine(lazy->filename(), lazy->lineno())
.setColumn(lazy->column(), sourceStartColumn)
.setColumn(lazy->column())
.setScriptSourceOffset(lazy->begin())
.setNoScriptRval(false)
.setSelfHostingMode(false);
@ -682,8 +682,9 @@ frontend::CompileLazyFunction(JSContext* cx, Handle<LazyScript*> lazy, const cha
Rooted<JSFunction*> fun(cx, lazy->functionNonDelazifying());
MOZ_ASSERT(!lazy->isLegacyGenerator());
ParseNode* pn = parser.standaloneLazyFunction(fun, lazy->toStringStart() + sourceStartColumn,
lazy->strict(), lazy->generatorKind(), lazy->asyncKind());
ParseNode* pn = parser.standaloneLazyFunction(fun, lazy->toStringStart(),
lazy->strict(), lazy->generatorKind(),
lazy->asyncKind());
if (!pn)
return false;

Просмотреть файл

@ -10780,14 +10780,8 @@ BytecodeEmitter::emitClass(ParseNode* pn)
// offsets in the source buffer as source notes so that when we
// actually make the constructor during execution, we can give it the
// correct toString output.
//
// Token positions are already offset from the start column. Since
// toString offsets are absolute offsets into the ScriptSource,
// de-offset from the starting column.
ptrdiff_t classStart = ptrdiff_t(pn->pn_pos.begin) -
tokenStream().options().sourceStartColumn;
ptrdiff_t classEnd = ptrdiff_t(pn->pn_pos.end) -
tokenStream().options().sourceStartColumn;
ptrdiff_t classStart = ptrdiff_t(pn->pn_pos.begin);
ptrdiff_t classEnd = ptrdiff_t(pn->pn_pos.end);
if (!newSrcNote3(SRC_CLASS_SPAN, classStart, classEnd))
return false;

Просмотреть файл

@ -3054,6 +3054,8 @@ Parser<ParseHandler, CharT>::functionArguments(YieldHandling yieldHandling,
argModifier = firstTokenModifier;
}
}
TokenPos firstTokenPos;
if (!parenFreeArrow) {
TokenKind tt;
if (!tokenStream.getToken(&tt, firstTokenModifier))
@ -3063,12 +3065,19 @@ Parser<ParseHandler, CharT>::functionArguments(YieldHandling yieldHandling,
return false;
}
firstTokenPos = pos();
// Record the start of function source (for FunctionToString). If we
// are parenFreeArrow, we will set this below, after consuming the NAME.
funbox->setStart(tokenStream);
} else {
// When delazifying, we may not have a current token and pos() is
// garbage. In that case, substitute the first token's position.
if (!tokenStream.peekTokenPos(&firstTokenPos, firstTokenModifier))
return false;
}
Node argsbody = handler.newList(PNK_PARAMSBODY, pos());
Node argsbody = handler.newList(PNK_PARAMSBODY, firstTokenPos);
if (!argsbody)
return false;
handler.setFunctionFormalParametersAndBody(funcpn, argsbody);
@ -3295,13 +3304,7 @@ Parser<FullParseHandler, char16_t>::skipLazyInnerFunction(ParseNode* pn, uint32_
PropagateTransitiveParseFlags(lazy, pc->sc());
// The position passed to tokenStream.advance() is an offset of the sort
// returned by userbuf.offset() and expected by userbuf.rawCharPtrAt(),
// while LazyScript::{begin,end} offsets are relative to the outermost
// script source.
Rooted<LazyScript*> lazyOuter(context, handler.lazyOuterFunction());
uint32_t userbufBase = lazyOuter->begin() - lazyOuter->column();
if (!tokenStream.advance(fun->lazyScript()->end() - userbufBase))
if (!tokenStream.advance(fun->lazyScript()->end()))
return false;
#if JS_HAS_EXPR_CLOSURES

Просмотреть файл

@ -561,17 +561,8 @@ class FunctionBox : public ObjectBox, public SharedContext
}
void setStart(const TokenStream& tokenStream) {
// Token positions are already offset from the start column in
// CompileOptions. bufStart and toStringStart, however, refer to
// absolute positions within the ScriptSource buffer, and need to
// de-offset from the starting column.
uint32_t offset = tokenStream.currentToken().pos.begin;
uint32_t sourceStartColumn = tokenStream.options().sourceStartColumn;
MOZ_ASSERT(offset >= sourceStartColumn);
MOZ_ASSERT(toStringStart >= sourceStartColumn);
toStringStart -= sourceStartColumn;
bufStart = offset - sourceStartColumn;
bufStart = offset;
tokenStream.srcCoords.lineNumAndColumnIndex(offset, &startLine, &startColumn);
}
@ -579,14 +570,9 @@ class FunctionBox : public ObjectBox, public SharedContext
// For all functions except class constructors, the buffer and
// toString ending positions are the same. Class constructors override
// the toString ending position with the end of the class definition.
//
// Offsets are de-offset for the same reason as in setStart above.
uint32_t offset = tokenStream.currentToken().pos.end;
uint32_t sourceStartColumn = tokenStream.options().sourceStartColumn;
MOZ_ASSERT(offset >= sourceStartColumn);
bufEnd = offset - sourceStartColumn;
toStringEnd = bufEnd;
bufEnd = offset;
toStringEnd = offset;
}
void trace(JSTracer* trc) override;

Просмотреть файл

@ -248,8 +248,9 @@ TokenStreamAnyChars::reservedWordToPropertyName(TokenKind tt) const
return nullptr;
}
TokenStream::SourceCoords::SourceCoords(JSContext* cx, uint32_t ln)
: lineStartOffsets_(cx), initialLineNum_(ln), lastLineIndex_(0)
TokenStream::SourceCoords::SourceCoords(JSContext* cx, uint32_t ln, uint32_t col,
uint32_t initialLineOffset)
: lineStartOffsets_(cx), initialLineNum_(ln), initialColumn_(col), lastLineIndex_(0)
{
// This is actually necessary! Removing it causes compile errors on
// GCC and clang. You could try declaring this:
@ -260,12 +261,12 @@ TokenStream::SourceCoords::SourceCoords(JSContext* cx, uint32_t ln)
//
uint32_t maxPtr = MAX_PTR;
// The first line begins at buffer offset 0. MAX_PTR is the sentinel. The
// appends cannot fail because |lineStartOffsets_| has statically-allocated
// elements.
// The first line begins at buffer offset |initialLineOffset|. MAX_PTR is
// the sentinel. The appends cannot fail because |lineStartOffsets_| has
// statically-allocated elements.
MOZ_ASSERT(lineStartOffsets_.capacity() >= 2);
MOZ_ALWAYS_TRUE(lineStartOffsets_.reserve(2));
lineStartOffsets_.infallibleAppend(0);
lineStartOffsets_.infallibleAppend(initialLineOffset);
lineStartOffsets_.infallibleAppend(maxPtr);
}
@ -275,7 +276,8 @@ TokenStream::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset)
uint32_t lineIndex = lineNumToIndex(lineNum);
uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
MOZ_ASSERT(lineStartOffsets_[0] == 0 && lineStartOffsets_[sentinelIndex] == MAX_PTR);
MOZ_ASSERT(lineStartOffsets_[0] <= lineStartOffset &&
lineStartOffsets_[sentinelIndex] == MAX_PTR);
if (lineIndex == sentinelIndex) {
// We haven't seen this newline before. Update lineStartOffsets_
@ -303,6 +305,7 @@ TokenStream::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset)
MOZ_ALWAYS_INLINE bool
TokenStreamAnyChars::SourceCoords::fill(const TokenStreamAnyChars::SourceCoords& other)
{
MOZ_ASSERT(lineStartOffsets_[0] == other.lineStartOffsets_[0]);
MOZ_ASSERT(lineStartOffsets_.back() == MAX_PTR);
MOZ_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);
@ -380,21 +383,16 @@ TokenStreamAnyChars::SourceCoords::lineNum(uint32_t offset) const
uint32_t
TokenStreamAnyChars::SourceCoords::columnIndex(uint32_t offset) const
{
uint32_t lineIndex = lineIndexOf(offset);
uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
MOZ_ASSERT(offset >= lineStartOffset);
return offset - lineStartOffset;
return lineIndexAndOffsetToColumn(lineIndexOf(offset), offset);
}
void
TokenStreamAnyChars::SourceCoords::lineNumAndColumnIndex(uint32_t offset, uint32_t* lineNum,
uint32_t* columnIndex) const
uint32_t* column) const
{
uint32_t lineIndex = lineIndexOf(offset);
*lineNum = lineIndexToNum(lineIndex);
uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
MOZ_ASSERT(offset >= lineStartOffset);
*columnIndex = offset - lineStartOffset;
*column = lineIndexAndOffsetToColumn(lineIndex, offset);
}
#ifdef _MSC_VER
@ -404,7 +402,7 @@ TokenStreamAnyChars::SourceCoords::lineNumAndColumnIndex(uint32_t offset, uint32
TokenStreamAnyChars::TokenStreamAnyChars(JSContext* cx, const ReadOnlyCompileOptions& options,
StrictModeGetter* smg)
: srcCoords(cx, options.lineno),
: srcCoords(cx, options.lineno, options.column, options.scriptSourceOffset),
options_(options),
tokens(),
cursor(),
@ -425,7 +423,7 @@ TokenStreamAnyChars::TokenStreamAnyChars(JSContext* cx, const ReadOnlyCompileOpt
TokenStream::TokenStream(JSContext* cx, const ReadOnlyCompileOptions& options,
const CharT* base, size_t length, StrictModeGetter* smg)
: TokenStreamAnyChars(cx, options, smg),
userbuf(cx, base, length, options.column),
userbuf(cx, base, length, options.scriptSourceOffset),
tokenbuf(cx)
{
// Nb: the following tables could be static, but initializing them here is

Просмотреть файл

@ -466,7 +466,8 @@ class TokenStreamAnyChars
// For a given buffer holding source code, |lineStartOffsets_| has one
// element per line of source code, plus one sentinel element. Each
// non-sentinel element holds the buffer offset for the start of the
// corresponding line of source code. For this example script:
// corresponding line of source code. For this example script,
// assuming an initialLineOffset of 0:
//
// 1 // xyz [line starts at offset 0]
// 2 var x; [line starts at offset 7]
@ -484,8 +485,8 @@ class TokenStreamAnyChars
// which is 14. (Note that |initialLineNum_| is often 1, but not
// always.)
//
// The first element is always 0, and the last element is always the
// MAX_PTR sentinel.
// The first element is always initialLineOffset, passed to the
// constructor, and the last element is always the MAX_PTR sentinel.
//
// offset-to-line/column lookups are O(log n) in the worst case (binary
// search), but in practice they're heavily clustered and we do better
@ -497,6 +498,7 @@ class TokenStreamAnyChars
//
Vector<uint32_t, 128> lineStartOffsets_;
uint32_t initialLineNum_;
uint32_t initialColumn_;
// This is mutable because it's modified on every search, but that fact
// isn't visible outside this class.
@ -508,9 +510,17 @@ class TokenStreamAnyChars
uint32_t lineIndexToNum(uint32_t lineIndex) const { return lineIndex + initialLineNum_; }
uint32_t lineNumToIndex(uint32_t lineNum) const { return lineNum - initialLineNum_; }
uint32_t lineIndexAndOffsetToColumn(uint32_t lineIndex, uint32_t offset) const {
uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
MOZ_RELEASE_ASSERT(offset >= lineStartOffset);
uint32_t column = offset - lineStartOffset;
if (lineIndex == 0)
return column + initialColumn_;
return column;
}
public:
SourceCoords(JSContext* cx, uint32_t ln);
SourceCoords(JSContext* cx, uint32_t ln, uint32_t col, uint32_t initialLineOffset);
MOZ_MUST_USE bool add(uint32_t lineNum, uint32_t lineStartOffset);
MOZ_MUST_USE bool fill(const SourceCoords& other);
@ -526,7 +536,7 @@ class TokenStreamAnyChars
uint32_t lineNum(uint32_t offset) const;
uint32_t columnIndex(uint32_t offset) const;
void lineNumAndColumnIndex(uint32_t offset, uint32_t* lineNum, uint32_t* columnIndex) const;
void lineNumAndColumnIndex(uint32_t offset, uint32_t* lineNum, uint32_t* column) const;
};
SourceCoords srcCoords;

Просмотреть файл

@ -0,0 +1 @@
evaluate("\n(y => 1)()", { columnNumber: 1729 })

Просмотреть файл

@ -3942,7 +3942,7 @@ JS::ReadOnlyCompileOptions::copyPODOptions(const ReadOnlyCompileOptions& rhs)
copyPODTransitiveOptions(rhs);
lineno = rhs.lineno;
column = rhs.column;
sourceStartColumn = rhs.sourceStartColumn;
scriptSourceOffset = rhs.scriptSourceOffset;
isRunOnce = rhs.isRunOnce;
noScriptRval = rhs.noScriptRval;
}

Просмотреть файл

@ -3978,7 +3978,7 @@ class JS_FRIEND_API(ReadOnlyCompileOptions) : public TransitiveCompileOptions
: TransitiveCompileOptions(),
lineno(1),
column(0),
sourceStartColumn(0),
scriptSourceOffset(0),
isRunOnce(false),
noScriptRval(false)
{ }
@ -4001,7 +4001,18 @@ class JS_FRIEND_API(ReadOnlyCompileOptions) : public TransitiveCompileOptions
// POD options.
unsigned lineno;
unsigned column;
unsigned sourceStartColumn;
// The offset within the ScriptSource's full uncompressed text of the first
// character we're presenting for compilation with this CompileOptions.
//
// When we compile a LazyScript, we pass the compiler only the substring of
// the source the lazy function occupies. With chunked decompression, we
// may not even have the complete uncompressed source present in memory. But
// parse node positions are offsets within the ScriptSource's full text,
// and LazyScripts indicate their substring of the full source by its
// starting and ending offsets within the full text. This
// scriptSourceOffset field lets the frontend convert between these
// offsets and offsets within the substring presented for compilation.
unsigned scriptSourceOffset;
// isRunOnce only applies to non-function scripts.
bool isRunOnce;
bool noScriptRval;
@ -4074,12 +4085,8 @@ class JS_FRIEND_API(OwningCompileOptions) : public ReadOnlyCompileOptions
return *this;
}
OwningCompileOptions& setUTF8(bool u) { utf8 = u; return *this; }
OwningCompileOptions& setColumn(unsigned c, unsigned ssc) {
MOZ_ASSERT(ssc <= c);
column = c;
sourceStartColumn = ssc;
return *this;
}
OwningCompileOptions& setColumn(unsigned c) { column = c; return *this; }
OwningCompileOptions& setScriptSourceOffset(unsigned o) { scriptSourceOffset = o; return *this; }
OwningCompileOptions& setIsRunOnce(bool once) { isRunOnce = once; return *this; }
OwningCompileOptions& setNoScriptRval(bool nsr) { noScriptRval = nsr; return *this; }
OwningCompileOptions& setSelfHostingMode(bool shm) { selfHostingMode = shm; return *this; }
@ -4175,12 +4182,8 @@ class MOZ_STACK_CLASS JS_FRIEND_API(CompileOptions) final : public ReadOnlyCompi
return *this;
}
CompileOptions& setUTF8(bool u) { utf8 = u; return *this; }
CompileOptions& setColumn(unsigned c, unsigned ssc) {
MOZ_ASSERT(ssc <= c);
column = c;
sourceStartColumn = ssc;
return *this;
}
CompileOptions& setColumn(unsigned c) { column = c; return *this; }
CompileOptions& setScriptSourceOffset(unsigned o) { scriptSourceOffset = o; return *this; }
CompileOptions& setIsRunOnce(bool once) { isRunOnce = once; return *this; }
CompileOptions& setNoScriptRval(bool nsr) { noScriptRval = nsr; return *this; }
CompileOptions& setSelfHostingMode(bool shm) { selfHostingMode = shm; return *this; }

Просмотреть файл

@ -2326,7 +2326,6 @@ ScriptSource::initFromOptions(JSContext* cx, const ReadOnlyCompileOptions& optio
introductionType_ = options.introductionType;
setIntroductionOffset(options.introductionOffset);
startColumn_ = options.sourceStartColumn;
parameterListEnd_ = parameterListEnd.isSome() ? parameterListEnd.value() : 0;
if (options.hasIntroductionInfo) {

Просмотреть файл

@ -434,12 +434,6 @@ class ScriptSource
UniqueTwoByteChars sourceMapURL_;
bool mutedErrors_;
// The start column of the source. Offsets kept for toString and the
// function source in LazyScripts are absolute positions within a
// ScriptSource buffer. To get their positions, they need to be offset
// with the starting column.
uint32_t startColumn_;
// bytecode offset in caller script that generated this code.
// This is present for eval-ed code, as well as "new Function(...)"-introduced
// scripts.
@ -516,7 +510,6 @@ class ScriptSource
displayURL_(nullptr),
sourceMapURL_(nullptr),
mutedErrors_(false),
startColumn_(0),
introductionOffset_(0),
parameterListEnd_(0),
introducerFilename_(nullptr),
@ -631,8 +624,6 @@ class ScriptSource
bool mutedErrors() const { return mutedErrors_; }
uint32_t startColumn() const { return startColumn_; }
bool hasIntroductionOffset() const { return hasIntroductionOffset_; }
uint32_t introductionOffset() const {
MOZ_ASSERT(hasIntroductionOffset());

Просмотреть файл

@ -1305,7 +1305,7 @@ ParseCompileOptions(JSContext* cx, CompileOptions& options, HandleObject opts,
int32_t c;
if (!ToInt32(cx, v, &c))
return false;
options.setColumn(c, c);
options.setColumn(c);
}
if (!JS_GetProperty(cx, opts, "sourceIsLazy", &v))