From 518f53e993740c75668bf1c3667e16e905b55397 Mon Sep 17 00:00:00 2001 From: Tooru Fujisawa Date: Thu, 9 Nov 2023 11:41:07 +0000 Subject: [PATCH] Bug 1862692 - Part 8: Use dedicate type for TokenStream-internal unsigned column number offset. r=iain Differential Revision: https://phabricator.services.mozilla.com/D193021 --- js/public/ColumnNumber.h | 47 ++++++++++++ js/src/frontend/TokenStream.cpp | 132 +++++++++++++++++--------------- js/src/frontend/TokenStream.h | 58 +++++++------- 3 files changed, 146 insertions(+), 91 deletions(-) diff --git a/js/public/ColumnNumber.h b/js/public/ColumnNumber.h index 036229a44149..087a1136d510 100644 --- a/js/public/ColumnNumber.h +++ b/js/public/ColumnNumber.h @@ -94,6 +94,46 @@ struct ColumnNumberOffset { int32_t value() const { return value_; } }; +// The positive offset from certain column number. +struct ColumnNumberUnsignedOffset { + private: + uint32_t value_ = 0; + + public: + constexpr ColumnNumberUnsignedOffset() = default; + constexpr ColumnNumberUnsignedOffset( + const ColumnNumberUnsignedOffset& other) = default; + + inline explicit ColumnNumberUnsignedOffset(uint32_t value) : value_(value) {} + + static constexpr ColumnNumberUnsignedOffset zero() { + return ColumnNumberUnsignedOffset(); + } + + ColumnNumberUnsignedOffset operator+( + const ColumnNumberUnsignedOffset& offset) const { + return ColumnNumberUnsignedOffset(value_ + offset.value()); + } + + ColumnNumberUnsignedOffset& operator+=( + const ColumnNumberUnsignedOffset& offset) { + value_ += offset.value(); + return *this; + } + + bool operator==(const ColumnNumberUnsignedOffset& rhs) const { + return value_ == rhs.value_; + } + + bool operator!=(const ColumnNumberUnsignedOffset& rhs) const { + return !(*this == rhs); + } + + uint32_t value() const { return value_; } + + uint32_t* addressOfValueForTranscode() { return &value_; } +}; + namespace detail { template @@ -144,6 +184,13 @@ struct ColumnNumberWithOrigin { return ColumnNumberWithOrigin(value_ + offset.value()); } + ColumnNumberWithOrigin operator+( + const ColumnNumberUnsignedOffset& offset) const { + MOZ_ASSERT(valid()); + MOZ_ASSERT(ptrdiff_t(value_) + offset.value() >= 0); + return ColumnNumberWithOrigin(value_ + offset.value()); + } + ColumnNumberWithOrigin operator-( const ColumnNumberOffset& offset) const { MOZ_ASSERT(valid()); diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp index d3e1a9e2f26a..a0fd3bd9938d 100644 --- a/js/src/frontend/TokenStream.cpp +++ b/js/src/frontend/TokenStream.cpp @@ -592,7 +592,7 @@ static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary( } template -JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumn( +JS::ColumnNumberUnsignedOffset TokenStreamAnyChars::computeColumnOffset( const LineToken lineToken, const uint32_t offset, const SourceUnits& sourceUnits) const { lineToken.assertConsistentOffset(offset); @@ -601,64 +601,67 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumn( const uint32_t offsetInLine = offset - start; if constexpr (std::is_same_v) { - // Column number is in UTF-16 code units. - return JS::ColumnNumberZeroOrigin(offsetInLine); + // Column offset is in UTF-16 code units. + return JS::ColumnNumberUnsignedOffset(offsetInLine); } - return computePartialColumnForUTF8(lineToken, offset, start, offsetInLine, - sourceUnits); + return computeColumnOffsetForUTF8(lineToken, offset, start, offsetInLine, + sourceUnits); } template -JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8( +JS::ColumnNumberUnsignedOffset TokenStreamAnyChars::computeColumnOffsetForUTF8( const LineToken lineToken, const uint32_t offset, const uint32_t start, const uint32_t offsetInLine, const SourceUnits& sourceUnits) const { const uint32_t line = lineNumber(lineToken); - // Reset the previous offset/column cache for this line, if the previous - // lookup wasn't on this line. + // Reset the previous offset/column number offset cache for this line, if the + // previous lookup wasn't on this line. if (line != lineOfLastColumnComputation_) { lineOfLastColumnComputation_ = line; lastChunkVectorForLine_ = nullptr; lastOffsetOfComputedColumn_ = start; - lastComputedColumn_ = JS::ColumnNumberZeroOrigin::zero(); + lastComputedColumnOffset_ = JS::ColumnNumberUnsignedOffset::zero(); } - // Compute and return the final column number from a partial offset/column, - // using the last-cached offset/column if they're more optimal. - auto ColumnFromPartial = [this, offset, &sourceUnits]( - uint32_t partialOffset, - JS::ColumnNumberZeroOrigin partialCols, - UnitsType unitsType) { - MOZ_ASSERT(partialOffset <= offset); + // Compute and return the final column number offset from a partially + // calculated offset/column number offset, using the last-cached + // offset/column number offset if they're more optimal. + auto OffsetFromPartial = + [this, offset, &sourceUnits]( + uint32_t partialOffset, + JS::ColumnNumberUnsignedOffset partialColumnOffset, + UnitsType unitsType) { + MOZ_ASSERT(partialOffset <= offset); - // If the last lookup on this line was closer to |offset|, use it. - if (partialOffset < this->lastOffsetOfComputedColumn_ && - this->lastOffsetOfComputedColumn_ <= offset) { - partialOffset = this->lastOffsetOfComputedColumn_; - partialCols = this->lastComputedColumn_; - } + // If the last lookup on this line was closer to |offset|, use it. + if (partialOffset < this->lastOffsetOfComputedColumn_ && + this->lastOffsetOfComputedColumn_ <= offset) { + partialOffset = this->lastOffsetOfComputedColumn_; + partialColumnOffset = this->lastComputedColumnOffset_; + } - const Unit* begin = sourceUnits.codeUnitPtrAt(partialOffset); - const Unit* end = sourceUnits.codeUnitPtrAt(offset); + const Unit* begin = sourceUnits.codeUnitPtrAt(partialOffset); + const Unit* end = sourceUnits.codeUnitPtrAt(offset); - size_t offsetDelta = AssertedCast(PointerRangeSize(begin, end)); - partialOffset += offsetDelta; + size_t offsetDelta = + AssertedCast(PointerRangeSize(begin, end)); + partialOffset += offsetDelta; - if (unitsType == UnitsType::GuaranteedSingleUnit) { - MOZ_ASSERT(unicode::CountUTF16CodeUnits(begin, end) == offsetDelta, - "guaranteed-single-units also guarantee pointer distance " - "equals UTF-16 code unit count"); - partialCols += JS::ColumnNumberOffset(offsetDelta); - } else { - partialCols += JS::ColumnNumberOffset( - AssertedCast(unicode::CountUTF16CodeUnits(begin, end))); - } + if (unitsType == UnitsType::GuaranteedSingleUnit) { + MOZ_ASSERT(unicode::CountUTF16CodeUnits(begin, end) == offsetDelta, + "guaranteed-single-units also guarantee pointer distance " + "equals UTF-16 code unit count"); + partialColumnOffset += JS::ColumnNumberUnsignedOffset(offsetDelta); + } else { + partialColumnOffset += JS::ColumnNumberUnsignedOffset( + AssertedCast(unicode::CountUTF16CodeUnits(begin, end))); + } - this->lastOffsetOfComputedColumn_ = partialOffset; - this->lastComputedColumn_ = partialCols; - return partialCols; - }; + this->lastOffsetOfComputedColumn_ = partialOffset; + this->lastComputedColumnOffset_ = partialColumnOffset; + return partialColumnOffset; + }; // We won't add an entry to |longLineColumnInfo_| for lines where the maximum // column has offset less than this value. The most common (non-minified) @@ -677,14 +680,14 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8( // not *always* worst-case.) UnitsType unitsType; if (lastChunkVectorForLine_ && lastChunkVectorForLine_->length() > 0) { - MOZ_ASSERT((*lastChunkVectorForLine_)[0].column() == - JS::ColumnNumberZeroOrigin::zero()); + MOZ_ASSERT((*lastChunkVectorForLine_)[0].columnOffset() == + JS::ColumnNumberUnsignedOffset::zero()); unitsType = (*lastChunkVectorForLine_)[0].unitsType(); } else { unitsType = UnitsType::PossiblyMultiUnit; } - return ColumnFromPartial(start, JS::ColumnNumberZeroOrigin::zero(), + return OffsetFromPartial(start, JS::ColumnNumberUnsignedOffset::zero(), unitsType); } @@ -698,7 +701,7 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8( if (!longLineColumnInfo_.add(ptr, line, Vector(fc))) { // In case of OOM, just count columns from the start of the line. fc->recoverFromOutOfMemory(); - return ColumnFromPartial(start, JS::ColumnNumberZeroOrigin::zero(), + return OffsetFromPartial(start, JS::ColumnNumberUnsignedOffset::zero(), UnitsType::PossiblyMultiUnit); } } @@ -734,7 +737,7 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8( }; uint32_t partialOffset; - JS::ColumnNumberZeroOrigin partialColumn; + JS::ColumnNumberUnsignedOffset partialColumnOffset; UnitsType unitsType; auto entriesLen = AssertedCast(lastChunkVectorForLine_->length()); @@ -742,7 +745,7 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8( // We've computed the chunk |offset| resides in. Compute the column number // from the chunk. partialOffset = RetractedOffsetOfChunk(chunkIndex); - partialColumn = (*lastChunkVectorForLine_)[chunkIndex].column(); + partialColumnOffset = (*lastChunkVectorForLine_)[chunkIndex].columnOffset(); // This is exact if |chunkIndex| isn't the last chunk. unitsType = (*lastChunkVectorForLine_)[chunkIndex].unitsType(); @@ -759,16 +762,17 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8( // also a suitable partial start point if we must recover from OOM.) if (entriesLen > 0) { partialOffset = RetractedOffsetOfChunk(entriesLen - 1); - partialColumn = (*lastChunkVectorForLine_)[entriesLen - 1].column(); + partialColumnOffset = + (*lastChunkVectorForLine_)[entriesLen - 1].columnOffset(); } else { partialOffset = start; - partialColumn = JS::ColumnNumberZeroOrigin::zero(); + partialColumnOffset = JS::ColumnNumberUnsignedOffset::zero(); } if (!lastChunkVectorForLine_->reserve(chunkIndex + 1)) { // As earlier, just start from the greatest offset/column in case of OOM. fc->recoverFromOutOfMemory(); - return ColumnFromPartial(partialOffset, partialColumn, + return OffsetFromPartial(partialOffset, partialColumnOffset, UnitsType::PossiblyMultiUnit); } @@ -777,8 +781,9 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8( // The vector always begins with the column of the line start, i.e. zero, // with chunk units pessimally assumed not single-unit. if (entriesLen == 0) { - lastChunkVectorForLine_->infallibleAppend(ChunkInfo( - JS::ColumnNumberZeroOrigin::zero(), UnitsType::PossiblyMultiUnit)); + lastChunkVectorForLine_->infallibleAppend( + ChunkInfo(JS::ColumnNumberUnsignedOffset::zero(), + UnitsType::PossiblyMultiUnit)); entriesLen++; } @@ -813,10 +818,10 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8( } partialOffset += numUnits; - partialColumn += JS::ColumnNumberOffset(numUTF16CodeUnits); + partialColumnOffset += JS::ColumnNumberUnsignedOffset(numUTF16CodeUnits); lastChunkVectorForLine_->infallibleEmplaceBack( - partialColumn, UnitsType::PossiblyMultiUnit); + partialColumnOffset, UnitsType::PossiblyMultiUnit); } while (entriesLen < chunkIndex + 1); // We're at a spot in the current final chunk, and final chunks never have @@ -824,7 +829,7 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8( unitsType = UnitsType::PossiblyMultiUnit; } - return ColumnFromPartial(partialOffset, partialColumn, unitsType); + return OffsetFromPartial(partialOffset, partialColumnOffset, unitsType); } template @@ -835,19 +840,20 @@ GeneralTokenStreamChars::computeColumn( const TokenStreamAnyChars& anyChars = anyCharsAccess(); - JS::ColumnNumberZeroOrigin column = - anyChars.computePartialColumn(lineToken, offset, this->sourceUnits); + JS::ColumnNumberUnsignedOffset columnOffset = + anyChars.computeColumnOffset(lineToken, offset, this->sourceUnits); - if (lineToken.isFirstLine()) { - if (column.zeroOriginValue() > JS::LimitedColumnNumberZeroOrigin::Limit) { - return JS::LimitedColumnNumberZeroOrigin::limit(); - } - - uint32_t firstLineOffset = anyChars.options_.column.zeroOriginValue(); - column += JS::ColumnNumberOffset(firstLineOffset); + if (!lineToken.isFirstLine()) { + return JS::LimitedColumnNumberZeroOrigin::fromUnlimited( + JS::ColumnNumberZeroOrigin::zero() + columnOffset); } - return JS::LimitedColumnNumberZeroOrigin::fromUnlimited(column); + if (columnOffset.value() > JS::LimitedColumnNumberZeroOrigin::Limit) { + return JS::LimitedColumnNumberZeroOrigin::limit(); + } + + return JS::LimitedColumnNumberZeroOrigin::fromUnlimited( + anyChars.options_.column + columnOffset); } template diff --git a/js/src/frontend/TokenStream.h b/js/src/frontend/TokenStream.h index 9ce61455c103..10c4c15a9bd7 100644 --- a/js/src/frontend/TokenStream.h +++ b/js/src/frontend/TokenStream.h @@ -205,7 +205,7 @@ #include "frontend/Token.h" #include "frontend/TokenKind.h" #include "js/CharacterEncoding.h" // JS::ConstUTF8CharsZ -#include "js/ColumnNumber.h" // JS::LimitedColumnNumberZeroOrigin, JS::ColumnNumberZeroOrigin, JS::ColumnNumberOneOrigin +#include "js/ColumnNumber.h" // JS::LimitedColumnNumberZeroOrigin, JS::ColumnNumberOneOrigin, JS::ColumnNumberUnsignedOffset #include "js/CompileOptions.h" #include "js/friend/ErrorMessages.h" // JSMSG_* #include "js/HashTable.h" // js::HashMap @@ -506,21 +506,22 @@ enum class UnitsType : unsigned char { class ChunkInfo { private: - // Column number in UTF-16 code units (0-origin). + // Column number offset in UTF-16 code units. // Store everything in |unsigned char|s so everything packs. - unsigned char column_[sizeof(uint32_t)]; + unsigned char columnOffset_[sizeof(uint32_t)]; unsigned char unitsType_; public: - ChunkInfo(JS::ColumnNumberZeroOrigin col, UnitsType type) + ChunkInfo(JS::ColumnNumberUnsignedOffset offset, UnitsType type) : unitsType_(static_cast(type)) { - memcpy(column_, col.addressOfValueForTranscode(), sizeof(col)); + memcpy(columnOffset_, offset.addressOfValueForTranscode(), sizeof(offset)); } - JS::ColumnNumberZeroOrigin column() const { - JS::ColumnNumberZeroOrigin col; - memcpy(col.addressOfValueForTranscode(), column_, sizeof(uint32_t)); - return col; + JS::ColumnNumberUnsignedOffset columnOffset() const { + JS::ColumnNumberUnsignedOffset offset; + memcpy(offset.addressOfValueForTranscode(), columnOffset_, + sizeof(uint32_t)); + return offset; } UnitsType unitsType() const { @@ -576,7 +577,7 @@ class TokenStreamAnyChars : public TokenStreamShared { /** * A map of (line number => sequence of the column numbers at * |ColumnChunkLength|-unit boundaries rewound [if needed] to the nearest code - * point boundary). (|TokenStreamAnyChars::computePartialColumn| is the sole + * point boundary). (|TokenStreamAnyChars::computeColumnOffset| is the sole * user of |ColumnChunkLength| and therefore contains its definition.) * * Entries appear in this map only when a column computation of sufficient @@ -622,10 +623,10 @@ class TokenStreamAnyChars : public TokenStreamShared { mutable uint32_t lastOffsetOfComputedColumn_ = UINT32_MAX; /** - * The column number for the offset (in code units) of the last column - * computation performed, relative to source start. + * The column number offset from the 1st column for the offset (in code units) + * of the last column computation performed, relative to source start. */ - mutable JS::ColumnNumberZeroOrigin lastComputedColumn_; + mutable JS::ColumnNumberUnsignedOffset lastComputedColumnOffset_; // Intra-token fields. @@ -903,14 +904,15 @@ class TokenStreamAnyChars : public TokenStreamShared { private: /** - * Compute the "partial" column number in UTF-16 code units of the absolute - * |offset| within source text on the line of |lineToken| (which must have - * been computed from |offset|). + * Compute the column number offset from the 1st code unit in the line in + * UTF-16 code units, for given absolute |offset| within source text on the + * line of |lineToken| (which must have been computed from |offset|). * - * A partial column number on a line that isn't the first line is just the - * actual column number. But a partial column number on the first line is the - * column number *ignoring the initial line/column of the script*. For - * example, consider this HTML with line/column number keys: + * A column number offset on a line that isn't the first line is just + * the actual column number in 0-origin. But a column number offset + * on the first line is the column number offset from the initial + * line/column of the script. For example, consider this HTML with + * line/column number keys: * * 1 2 3 * 0123456789012345678901234 567890 @@ -926,15 +928,15 @@ class TokenStreamAnyChars : public TokenStreamShared { * The script would be compiled specifying initial (line, column) of (3, 10) * using |JS::ReadOnlyCompileOptions::{lineno,column}|. And the column * reported by |computeColumn| for the "v" of |var| would be 10. But the - * partial column number of the "v" in |var|, that this function returns, + * column number offset of the "v" in |var|, that this function returns, * would be 0. On the other hand, the column reported by |computeColumn| and - * the partial column number returned by this function for the "c" in |const| + * the column number offset returned by this function for the "c" in |const| * would both be 0, because it's not in the first line of source text. * - * The partial column is with respect *only* to the JavaScript source text as - * SpiderMonkey sees it. In the example, the "<" is converted to "<" by - * the browser before SpiderMonkey would see it. So the partial column of the - * "4" in the inequality would be 16, not 19. + * The column number offset is with respect *only* to the JavaScript source + * text as SpiderMonkey sees it. In the example, the "<" is converted to + * "<" by the browser before SpiderMonkey would see it. So the column number + * offset of the "4" in the inequality would be 16, not 19. * * UTF-16 code units are not all equal length in UTF-8 source, so counting * requires *some* kind of linear-time counting from the start of the line. @@ -950,12 +952,12 @@ class TokenStreamAnyChars : public TokenStreamShared { * And this is the best place to do that. */ template - JS::ColumnNumberZeroOrigin computePartialColumn( + JS::ColumnNumberUnsignedOffset computeColumnOffset( const LineToken lineToken, const uint32_t offset, const SourceUnits& sourceUnits) const; template - JS::ColumnNumberZeroOrigin computePartialColumnForUTF8( + JS::ColumnNumberUnsignedOffset computeColumnOffsetForUTF8( const LineToken lineToken, const uint32_t offset, const uint32_t start, const uint32_t offsetInLine, const SourceUnits& sourceUnits) const;