Bug 1862692 - Part 8: Use dedicate type for TokenStream-internal unsigned column number offset. r=iain

Differential Revision: https://phabricator.services.mozilla.com/D193021
This commit is contained in:
Tooru Fujisawa 2023-11-09 11:41:07 +00:00
Родитель 9f0d2627bd
Коммит 518f53e993
3 изменённых файлов: 146 добавлений и 91 удалений

Просмотреть файл

@ -94,6 +94,46 @@ struct ColumnNumberOffset {
int32_t value() const { return value_; }
};
// The positive offset from certain column number.
struct ColumnNumberUnsignedOffset {
private:
uint32_t value_ = 0;
public:
constexpr ColumnNumberUnsignedOffset() = default;
constexpr ColumnNumberUnsignedOffset(
const ColumnNumberUnsignedOffset& other) = default;
inline explicit ColumnNumberUnsignedOffset(uint32_t value) : value_(value) {}
static constexpr ColumnNumberUnsignedOffset zero() {
return ColumnNumberUnsignedOffset();
}
ColumnNumberUnsignedOffset operator+(
const ColumnNumberUnsignedOffset& offset) const {
return ColumnNumberUnsignedOffset(value_ + offset.value());
}
ColumnNumberUnsignedOffset& operator+=(
const ColumnNumberUnsignedOffset& offset) {
value_ += offset.value();
return *this;
}
bool operator==(const ColumnNumberUnsignedOffset& rhs) const {
return value_ == rhs.value_;
}
bool operator!=(const ColumnNumberUnsignedOffset& rhs) const {
return !(*this == rhs);
}
uint32_t value() const { return value_; }
uint32_t* addressOfValueForTranscode() { return &value_; }
};
namespace detail {
template <typename T>
@ -144,6 +184,13 @@ struct ColumnNumberWithOrigin {
return ColumnNumberWithOrigin<Origin, LimitValue>(value_ + offset.value());
}
ColumnNumberWithOrigin<Origin, LimitValue> operator+(
const ColumnNumberUnsignedOffset& offset) const {
MOZ_ASSERT(valid());
MOZ_ASSERT(ptrdiff_t(value_) + offset.value() >= 0);
return ColumnNumberWithOrigin<Origin, LimitValue>(value_ + offset.value());
}
ColumnNumberWithOrigin<Origin, LimitValue> operator-(
const ColumnNumberOffset& offset) const {
MOZ_ASSERT(valid());

Просмотреть файл

@ -592,7 +592,7 @@ static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary(
}
template <typename Unit>
JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumn(
JS::ColumnNumberUnsignedOffset TokenStreamAnyChars::computeColumnOffset(
const LineToken lineToken, const uint32_t offset,
const SourceUnits<Unit>& sourceUnits) const {
lineToken.assertConsistentOffset(offset);
@ -601,64 +601,67 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumn(
const uint32_t offsetInLine = offset - start;
if constexpr (std::is_same_v<Unit, char16_t>) {
// Column number is in UTF-16 code units.
return JS::ColumnNumberZeroOrigin(offsetInLine);
// Column offset is in UTF-16 code units.
return JS::ColumnNumberUnsignedOffset(offsetInLine);
}
return computePartialColumnForUTF8(lineToken, offset, start, offsetInLine,
sourceUnits);
return computeColumnOffsetForUTF8(lineToken, offset, start, offsetInLine,
sourceUnits);
}
template <typename Unit>
JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8(
JS::ColumnNumberUnsignedOffset TokenStreamAnyChars::computeColumnOffsetForUTF8(
const LineToken lineToken, const uint32_t offset, const uint32_t start,
const uint32_t offsetInLine, const SourceUnits<Unit>& sourceUnits) const {
const uint32_t line = lineNumber(lineToken);
// Reset the previous offset/column cache for this line, if the previous
// lookup wasn't on this line.
// Reset the previous offset/column number offset cache for this line, if the
// previous lookup wasn't on this line.
if (line != lineOfLastColumnComputation_) {
lineOfLastColumnComputation_ = line;
lastChunkVectorForLine_ = nullptr;
lastOffsetOfComputedColumn_ = start;
lastComputedColumn_ = JS::ColumnNumberZeroOrigin::zero();
lastComputedColumnOffset_ = JS::ColumnNumberUnsignedOffset::zero();
}
// Compute and return the final column number from a partial offset/column,
// using the last-cached offset/column if they're more optimal.
auto ColumnFromPartial = [this, offset, &sourceUnits](
uint32_t partialOffset,
JS::ColumnNumberZeroOrigin partialCols,
UnitsType unitsType) {
MOZ_ASSERT(partialOffset <= offset);
// Compute and return the final column number offset from a partially
// calculated offset/column number offset, using the last-cached
// offset/column number offset if they're more optimal.
auto OffsetFromPartial =
[this, offset, &sourceUnits](
uint32_t partialOffset,
JS::ColumnNumberUnsignedOffset partialColumnOffset,
UnitsType unitsType) {
MOZ_ASSERT(partialOffset <= offset);
// If the last lookup on this line was closer to |offset|, use it.
if (partialOffset < this->lastOffsetOfComputedColumn_ &&
this->lastOffsetOfComputedColumn_ <= offset) {
partialOffset = this->lastOffsetOfComputedColumn_;
partialCols = this->lastComputedColumn_;
}
// If the last lookup on this line was closer to |offset|, use it.
if (partialOffset < this->lastOffsetOfComputedColumn_ &&
this->lastOffsetOfComputedColumn_ <= offset) {
partialOffset = this->lastOffsetOfComputedColumn_;
partialColumnOffset = this->lastComputedColumnOffset_;
}
const Unit* begin = sourceUnits.codeUnitPtrAt(partialOffset);
const Unit* end = sourceUnits.codeUnitPtrAt(offset);
const Unit* begin = sourceUnits.codeUnitPtrAt(partialOffset);
const Unit* end = sourceUnits.codeUnitPtrAt(offset);
size_t offsetDelta = AssertedCast<uint32_t>(PointerRangeSize(begin, end));
partialOffset += offsetDelta;
size_t offsetDelta =
AssertedCast<uint32_t>(PointerRangeSize(begin, end));
partialOffset += offsetDelta;
if (unitsType == UnitsType::GuaranteedSingleUnit) {
MOZ_ASSERT(unicode::CountUTF16CodeUnits(begin, end) == offsetDelta,
"guaranteed-single-units also guarantee pointer distance "
"equals UTF-16 code unit count");
partialCols += JS::ColumnNumberOffset(offsetDelta);
} else {
partialCols += JS::ColumnNumberOffset(
AssertedCast<uint32_t>(unicode::CountUTF16CodeUnits(begin, end)));
}
if (unitsType == UnitsType::GuaranteedSingleUnit) {
MOZ_ASSERT(unicode::CountUTF16CodeUnits(begin, end) == offsetDelta,
"guaranteed-single-units also guarantee pointer distance "
"equals UTF-16 code unit count");
partialColumnOffset += JS::ColumnNumberUnsignedOffset(offsetDelta);
} else {
partialColumnOffset += JS::ColumnNumberUnsignedOffset(
AssertedCast<uint32_t>(unicode::CountUTF16CodeUnits(begin, end)));
}
this->lastOffsetOfComputedColumn_ = partialOffset;
this->lastComputedColumn_ = partialCols;
return partialCols;
};
this->lastOffsetOfComputedColumn_ = partialOffset;
this->lastComputedColumnOffset_ = partialColumnOffset;
return partialColumnOffset;
};
// We won't add an entry to |longLineColumnInfo_| for lines where the maximum
// column has offset less than this value. The most common (non-minified)
@ -677,14 +680,14 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8(
// not *always* worst-case.)
UnitsType unitsType;
if (lastChunkVectorForLine_ && lastChunkVectorForLine_->length() > 0) {
MOZ_ASSERT((*lastChunkVectorForLine_)[0].column() ==
JS::ColumnNumberZeroOrigin::zero());
MOZ_ASSERT((*lastChunkVectorForLine_)[0].columnOffset() ==
JS::ColumnNumberUnsignedOffset::zero());
unitsType = (*lastChunkVectorForLine_)[0].unitsType();
} else {
unitsType = UnitsType::PossiblyMultiUnit;
}
return ColumnFromPartial(start, JS::ColumnNumberZeroOrigin::zero(),
return OffsetFromPartial(start, JS::ColumnNumberUnsignedOffset::zero(),
unitsType);
}
@ -698,7 +701,7 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8(
if (!longLineColumnInfo_.add(ptr, line, Vector<ChunkInfo>(fc))) {
// In case of OOM, just count columns from the start of the line.
fc->recoverFromOutOfMemory();
return ColumnFromPartial(start, JS::ColumnNumberZeroOrigin::zero(),
return OffsetFromPartial(start, JS::ColumnNumberUnsignedOffset::zero(),
UnitsType::PossiblyMultiUnit);
}
}
@ -734,7 +737,7 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8(
};
uint32_t partialOffset;
JS::ColumnNumberZeroOrigin partialColumn;
JS::ColumnNumberUnsignedOffset partialColumnOffset;
UnitsType unitsType;
auto entriesLen = AssertedCast<uint32_t>(lastChunkVectorForLine_->length());
@ -742,7 +745,7 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8(
// We've computed the chunk |offset| resides in. Compute the column number
// from the chunk.
partialOffset = RetractedOffsetOfChunk(chunkIndex);
partialColumn = (*lastChunkVectorForLine_)[chunkIndex].column();
partialColumnOffset = (*lastChunkVectorForLine_)[chunkIndex].columnOffset();
// This is exact if |chunkIndex| isn't the last chunk.
unitsType = (*lastChunkVectorForLine_)[chunkIndex].unitsType();
@ -759,16 +762,17 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8(
// also a suitable partial start point if we must recover from OOM.)
if (entriesLen > 0) {
partialOffset = RetractedOffsetOfChunk(entriesLen - 1);
partialColumn = (*lastChunkVectorForLine_)[entriesLen - 1].column();
partialColumnOffset =
(*lastChunkVectorForLine_)[entriesLen - 1].columnOffset();
} else {
partialOffset = start;
partialColumn = JS::ColumnNumberZeroOrigin::zero();
partialColumnOffset = JS::ColumnNumberUnsignedOffset::zero();
}
if (!lastChunkVectorForLine_->reserve(chunkIndex + 1)) {
// As earlier, just start from the greatest offset/column in case of OOM.
fc->recoverFromOutOfMemory();
return ColumnFromPartial(partialOffset, partialColumn,
return OffsetFromPartial(partialOffset, partialColumnOffset,
UnitsType::PossiblyMultiUnit);
}
@ -777,8 +781,9 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8(
// The vector always begins with the column of the line start, i.e. zero,
// with chunk units pessimally assumed not single-unit.
if (entriesLen == 0) {
lastChunkVectorForLine_->infallibleAppend(ChunkInfo(
JS::ColumnNumberZeroOrigin::zero(), UnitsType::PossiblyMultiUnit));
lastChunkVectorForLine_->infallibleAppend(
ChunkInfo(JS::ColumnNumberUnsignedOffset::zero(),
UnitsType::PossiblyMultiUnit));
entriesLen++;
}
@ -813,10 +818,10 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8(
}
partialOffset += numUnits;
partialColumn += JS::ColumnNumberOffset(numUTF16CodeUnits);
partialColumnOffset += JS::ColumnNumberUnsignedOffset(numUTF16CodeUnits);
lastChunkVectorForLine_->infallibleEmplaceBack(
partialColumn, UnitsType::PossiblyMultiUnit);
partialColumnOffset, UnitsType::PossiblyMultiUnit);
} while (entriesLen < chunkIndex + 1);
// We're at a spot in the current final chunk, and final chunks never have
@ -824,7 +829,7 @@ JS::ColumnNumberZeroOrigin TokenStreamAnyChars::computePartialColumnForUTF8(
unitsType = UnitsType::PossiblyMultiUnit;
}
return ColumnFromPartial(partialOffset, partialColumn, unitsType);
return OffsetFromPartial(partialOffset, partialColumnOffset, unitsType);
}
template <typename Unit, class AnyCharsAccess>
@ -835,19 +840,20 @@ GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeColumn(
const TokenStreamAnyChars& anyChars = anyCharsAccess();
JS::ColumnNumberZeroOrigin column =
anyChars.computePartialColumn(lineToken, offset, this->sourceUnits);
JS::ColumnNumberUnsignedOffset columnOffset =
anyChars.computeColumnOffset(lineToken, offset, this->sourceUnits);
if (lineToken.isFirstLine()) {
if (column.zeroOriginValue() > JS::LimitedColumnNumberZeroOrigin::Limit) {
return JS::LimitedColumnNumberZeroOrigin::limit();
}
uint32_t firstLineOffset = anyChars.options_.column.zeroOriginValue();
column += JS::ColumnNumberOffset(firstLineOffset);
if (!lineToken.isFirstLine()) {
return JS::LimitedColumnNumberZeroOrigin::fromUnlimited(
JS::ColumnNumberZeroOrigin::zero() + columnOffset);
}
return JS::LimitedColumnNumberZeroOrigin::fromUnlimited(column);
if (columnOffset.value() > JS::LimitedColumnNumberZeroOrigin::Limit) {
return JS::LimitedColumnNumberZeroOrigin::limit();
}
return JS::LimitedColumnNumberZeroOrigin::fromUnlimited(
anyChars.options_.column + columnOffset);
}
template <typename Unit, class AnyCharsAccess>

Просмотреть файл

@ -205,7 +205,7 @@
#include "frontend/Token.h"
#include "frontend/TokenKind.h"
#include "js/CharacterEncoding.h" // JS::ConstUTF8CharsZ
#include "js/ColumnNumber.h" // JS::LimitedColumnNumberZeroOrigin, JS::ColumnNumberZeroOrigin, JS::ColumnNumberOneOrigin
#include "js/ColumnNumber.h" // JS::LimitedColumnNumberZeroOrigin, JS::ColumnNumberOneOrigin, JS::ColumnNumberUnsignedOffset
#include "js/CompileOptions.h"
#include "js/friend/ErrorMessages.h" // JSMSG_*
#include "js/HashTable.h" // js::HashMap
@ -506,21 +506,22 @@ enum class UnitsType : unsigned char {
class ChunkInfo {
private:
// Column number in UTF-16 code units (0-origin).
// Column number offset in UTF-16 code units.
// Store everything in |unsigned char|s so everything packs.
unsigned char column_[sizeof(uint32_t)];
unsigned char columnOffset_[sizeof(uint32_t)];
unsigned char unitsType_;
public:
ChunkInfo(JS::ColumnNumberZeroOrigin col, UnitsType type)
ChunkInfo(JS::ColumnNumberUnsignedOffset offset, UnitsType type)
: unitsType_(static_cast<unsigned char>(type)) {
memcpy(column_, col.addressOfValueForTranscode(), sizeof(col));
memcpy(columnOffset_, offset.addressOfValueForTranscode(), sizeof(offset));
}
JS::ColumnNumberZeroOrigin column() const {
JS::ColumnNumberZeroOrigin col;
memcpy(col.addressOfValueForTranscode(), column_, sizeof(uint32_t));
return col;
JS::ColumnNumberUnsignedOffset columnOffset() const {
JS::ColumnNumberUnsignedOffset offset;
memcpy(offset.addressOfValueForTranscode(), columnOffset_,
sizeof(uint32_t));
return offset;
}
UnitsType unitsType() const {
@ -576,7 +577,7 @@ class TokenStreamAnyChars : public TokenStreamShared {
/**
* A map of (line number => sequence of the column numbers at
* |ColumnChunkLength|-unit boundaries rewound [if needed] to the nearest code
* point boundary). (|TokenStreamAnyChars::computePartialColumn| is the sole
* point boundary). (|TokenStreamAnyChars::computeColumnOffset| is the sole
* user of |ColumnChunkLength| and therefore contains its definition.)
*
* Entries appear in this map only when a column computation of sufficient
@ -622,10 +623,10 @@ class TokenStreamAnyChars : public TokenStreamShared {
mutable uint32_t lastOffsetOfComputedColumn_ = UINT32_MAX;
/**
* The column number for the offset (in code units) of the last column
* computation performed, relative to source start.
* The column number offset from the 1st column for the offset (in code units)
* of the last column computation performed, relative to source start.
*/
mutable JS::ColumnNumberZeroOrigin lastComputedColumn_;
mutable JS::ColumnNumberUnsignedOffset lastComputedColumnOffset_;
// Intra-token fields.
@ -903,14 +904,15 @@ class TokenStreamAnyChars : public TokenStreamShared {
private:
/**
* Compute the "partial" column number in UTF-16 code units of the absolute
* |offset| within source text on the line of |lineToken| (which must have
* been computed from |offset|).
* Compute the column number offset from the 1st code unit in the line in
* UTF-16 code units, for given absolute |offset| within source text on the
* line of |lineToken| (which must have been computed from |offset|).
*
* A partial column number on a line that isn't the first line is just the
* actual column number. But a partial column number on the first line is the
* column number *ignoring the initial line/column of the script*. For
* example, consider this HTML with line/column number keys:
* A column number offset on a line that isn't the first line is just
* the actual column number in 0-origin. But a column number offset
* on the first line is the column number offset from the initial
* line/column of the script. For example, consider this HTML with
* line/column number keys:
*
* 1 2 3
* 0123456789012345678901234 567890
@ -926,15 +928,15 @@ class TokenStreamAnyChars : public TokenStreamShared {
* The script would be compiled specifying initial (line, column) of (3, 10)
* using |JS::ReadOnlyCompileOptions::{lineno,column}|. And the column
* reported by |computeColumn| for the "v" of |var| would be 10. But the
* partial column number of the "v" in |var|, that this function returns,
* column number offset of the "v" in |var|, that this function returns,
* would be 0. On the other hand, the column reported by |computeColumn| and
* the partial column number returned by this function for the "c" in |const|
* the column number offset returned by this function for the "c" in |const|
* would both be 0, because it's not in the first line of source text.
*
* The partial column is with respect *only* to the JavaScript source text as
* SpiderMonkey sees it. In the example, the "&lt;" is converted to "<" by
* the browser before SpiderMonkey would see it. So the partial column of the
* "4" in the inequality would be 16, not 19.
* The column number offset is with respect *only* to the JavaScript source
* text as SpiderMonkey sees it. In the example, the "&lt;" is converted to
* "<" by the browser before SpiderMonkey would see it. So the column number
* offset of the "4" in the inequality would be 16, not 19.
*
* UTF-16 code units are not all equal length in UTF-8 source, so counting
* requires *some* kind of linear-time counting from the start of the line.
@ -950,12 +952,12 @@ class TokenStreamAnyChars : public TokenStreamShared {
* And this is the best place to do that.
*/
template <typename Unit>
JS::ColumnNumberZeroOrigin computePartialColumn(
JS::ColumnNumberUnsignedOffset computeColumnOffset(
const LineToken lineToken, const uint32_t offset,
const SourceUnits<Unit>& sourceUnits) const;
template <typename Unit>
JS::ColumnNumberZeroOrigin computePartialColumnForUTF8(
JS::ColumnNumberUnsignedOffset computeColumnOffsetForUTF8(
const LineToken lineToken, const uint32_t offset, const uint32_t start,
const uint32_t offsetInLine, const SourceUnits<Unit>& sourceUnits) const;