Bug 1779849: Implement CheckCharacterInRangeArray r=mgaudet

IsCharacterInRangeArray is translated from the version in `imported/regexp-macro-assembler.h`, with more comments and adjustments to make callWithABI work.

Differential Revision: https://phabricator.services.mozilla.com/D152905
This commit is contained in:
Iain Ireland 2022-08-03 18:06:49 +00:00
Родитель ab543ce05d
Коммит 3e3cb10c55
7 изменённых файлов: 149 добавлений и 6 удалений

Просмотреть файл

@ -786,6 +786,10 @@ uint32_t CaseInsensitiveCompareUnicode(const char16_t* substring1,
substring1, substring2, byteLength);
}
bool IsCharacterInRangeArray(uint32_t c, ByteArrayData* ranges) {
return SMRegExpMacroAssembler::IsCharacterInRangeArray(c, ranges);
}
#ifdef DEBUG
bool IsolateShouldSimulateInterrupt(Isolate* isolate) {
return isolate->shouldSimulateInterrupt_ != 0;

Просмотреть файл

@ -77,6 +77,8 @@ uint32_t CaseInsensitiveCompareNonUnicode(const char16_t* substring1,
uint32_t CaseInsensitiveCompareUnicode(const char16_t* substring1,
const char16_t* substring2,
size_t byteLength);
bool IsCharacterInRangeArray(uint32_t c, ByteArrayData* ranges);
#ifdef DEBUG
bool IsolateShouldSimulateInterrupt(Isolate* isolate);
void IsolateSetShouldSimulateInterrupt(Isolate* isolate);

Просмотреть файл

@ -239,6 +239,111 @@ void SMRegExpMacroAssembler::CheckCharacterNotInRange(uc16 from, uc16 to,
CheckCharacterInRangeImpl(from, to, on_not_in_range, Assembler::Above);
}
/* static */
bool SMRegExpMacroAssembler::IsCharacterInRangeArray(uint32_t c,
ByteArrayData* ranges) {
js::AutoUnsafeCallWithABI unsafe;
MOZ_ASSERT(ranges->length % sizeof(uint16_t) == 0);
uint32_t length = ranges->length / sizeof(uint16_t);
MOZ_ASSERT(length > 0);
// Fast paths.
if (c < ranges->get_uint16(0)) {
// |c| is lower than the start of the first range.
// It is not in the range array.
return false;
}
if (c >= ranges->get_uint16(length - 1)) {
// |c| is higher than the last entry. If the table contains an odd
// number of entries, the last range is open-ended, so |c| is in
// the range array iff |length| is odd.
return (length % 2) != 0;
}
// |ranges| is stored as an interval list: an ordered list of
// starting points, where every even index marks the beginning of a
// range of characters that are included, and every odd index marks
// the beginning of a range of characters that are excluded. For
// example, the set [1,2,3,7,8,9] would be represented as the
// range array [1,4,7,10]. If |ranges| has an odd number of entries,
// the last included range is open-ended (so the set containing
// every character would be represented as [0]).
//
// Because of the symmetry between included and excluded ranges, we
// can do a binary search for the index in |ranges| with the value
// closest to but not exceeding |c|. If that index is even, |c| is
// in an included range. If that index is odd, |c| is in an excluded
// range.
uint32_t lower = 0;
uint32_t upper = length;
uint32_t mid = 0;
do {
mid = lower + (upper - lower) / 2;
const base::uc16 elem = ranges->get_uint16(mid);
if (c < elem) {
upper = mid;
} else if (c > elem) {
lower = mid + 1;
} else {
break;
}
} while (lower < upper);
uint32_t rangeIndex = c < ranges->get_uint16(mid) ? mid - 1 : mid;
// Included ranges start at even indices and end at odd indices.
return rangeIndex % 2 == 0;
}
void SMRegExpMacroAssembler::CallIsCharacterInRangeArray(
const ZoneList<CharacterRange>* ranges) {
Handle<ByteArray> rangeArray = GetOrAddRangeArray(ranges);
masm_.movePtr(ImmPtr(rangeArray->inner()), temp0_);
// Save volatile regs. Temp regs don't need to be saved.
LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile());
volatileRegs.takeUnchecked(temp0_);
volatileRegs.takeUnchecked(temp1_);
if (temp2_ != js::jit::InvalidReg) {
volatileRegs.takeUnchecked(temp2_);
}
masm_.PushRegsInMask(volatileRegs);
using Fn = bool (*)(uint32_t, ByteArrayData*);
masm_.setupUnalignedABICall(temp1_);
masm_.passABIArg(current_character_);
masm_.passABIArg(temp0_);
masm_.callWithABI<Fn, ::js::irregexp::IsCharacterInRangeArray>();
masm_.storeCallBoolResult(temp1_);
masm_.PopRegsInMask(volatileRegs);
// GetOrAddRangeArray caches previously seen range arrays to reduce
// memory usage, so this may not be the first time we've seen this
// range array. We only need to transfer ownership from the
// HandleScope to the |tables_| vector once.
PseudoHandle<ByteArrayData> rawRangeArray =
rangeArray->maybeTakeOwnership(isolate());
if (rawRangeArray) {
AddTable(std::move(rawRangeArray));
}
}
bool SMRegExpMacroAssembler::CheckCharacterInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_in_range) {
CallIsCharacterInRangeArray(ranges);
masm_.branchTest32(Assembler::NonZero, temp1_, temp1_,
LabelOrBacktrack(on_in_range));
return true;
}
bool SMRegExpMacroAssembler::CheckCharacterNotInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) {
CallIsCharacterInRangeArray(ranges);
masm_.branchTest32(Assembler::Zero, temp1_, temp1_,
LabelOrBacktrack(on_not_in_range));
return true;
}
void SMRegExpMacroAssembler::CheckBitInTable(Handle<ByteArray> table,
Label* on_bit_set) {
// Claim ownership of the ByteArray from the current HandleScope.

Просмотреть файл

@ -71,6 +71,10 @@ class SMRegExpMacroAssembler final : public NativeRegExpMacroAssembler {
virtual void CheckCharacterInRange(uc16 from, uc16 to, Label* on_in_range);
virtual void CheckCharacterNotInRange(uc16 from, uc16 to,
Label* on_not_in_range);
virtual bool CheckCharacterInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_in_range);
virtual bool CheckCharacterNotInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_not_in_range);
virtual void CheckAtStart(int cp_offset, Label* on_at_start);
virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
virtual void CheckPosition(int cp_offset, Label* on_outside_input);
@ -132,6 +136,7 @@ class SMRegExpMacroAssembler final : public NativeRegExpMacroAssembler {
void CheckNotBackReferenceImpl(int start_reg, bool read_backward,
bool unicode, Label* on_no_match,
bool ignore_case);
void CallIsCharacterInRangeArray(const ZoneList<CharacterRange>* ranges);
void LoadCurrentCharacterUnchecked(int cp_offset, int characters);
@ -156,6 +161,7 @@ class SMRegExpMacroAssembler final : public NativeRegExpMacroAssembler {
static uint32_t CaseInsensitiveCompareUnicode(const char16_t* substring1,
const char16_t* substring2,
size_t byteLength);
static bool IsCharacterInRangeArray(uint32_t c, ByteArrayData* ranges);
private:
inline int char_size() { return static_cast<int>(mode_); }

Просмотреть файл

@ -113,6 +113,13 @@ void* Isolate::allocatePseudoHandle(size_t bytes) {
template <typename T>
PseudoHandle<T> Isolate::takeOwnership(void* ptr) {
PseudoHandle<T> result = maybeTakeOwnership<T>(ptr);
MOZ_ASSERT(result);
return result;
}
template <typename T>
PseudoHandle<T> Isolate::maybeTakeOwnership(void* ptr) {
for (auto iter = uniquePtrArena_.IterFromLast(); !iter.Done(); iter.Prev()) {
auto& entry = iter.Get();
if (entry.get() == ptr) {
@ -121,13 +128,19 @@ PseudoHandle<T> Isolate::takeOwnership(void* ptr) {
return result;
}
}
MOZ_CRASH("Tried to take ownership of pseudohandle that is not in the arena");
return PseudoHandle<T>();
}
PseudoHandle<ByteArrayData> ByteArray::maybeTakeOwnership(Isolate* isolate) {
PseudoHandle<ByteArrayData> result =
isolate->maybeTakeOwnership<ByteArrayData>(value().toPrivate());
setValue(JS::PrivateValue(nullptr));
return result;
}
PseudoHandle<ByteArrayData> ByteArray::takeOwnership(Isolate* isolate) {
PseudoHandle<ByteArrayData> result =
isolate->takeOwnership<ByteArrayData>(value().toPrivate());
setValue(JS::PrivateValue(nullptr));
PseudoHandle<ByteArrayData> result = maybeTakeOwnership(isolate);
MOZ_ASSERT(result);
return result;
}

Просмотреть файл

@ -633,12 +633,15 @@ inline uint8_t* ByteArrayData::data() {
// A fixed-size array of bytes.
class ByteArray : public HeapObject {
protected:
ByteArrayData* inner() const {
return static_cast<ByteArrayData*>(value().toPrivate());
}
public:
PseudoHandle<ByteArrayData> takeOwnership(Isolate* isolate);
PseudoHandle<ByteArrayData> maybeTakeOwnership(Isolate* isolate);
byte get(uint32_t index) { return inner()->get(index); }
void set(uint32_t index, byte val) { inner()->set(index, val); }
uint16_t get_uint16(uint32_t index) { return inner()->get_uint16(index); }
@ -654,6 +657,10 @@ class ByteArray : public HeapObject {
b.setValue(object.value());
return b;
}
bool IsByteArray() const { return true; }
friend class SMRegExpMacroAssembler;
};
// Like Handles in SM, V8 handles are references to marked pointers.
@ -678,7 +685,9 @@ class ByteArray : public HeapObject {
// managed by the GC but provide the same API to irregexp. The "root"
// of a pseudohandle is a unique pointer living in a second arena. If
// the allocated object should outlive the HandleScope, it must be
// manually moved out of the arena using takeOwnership.
// manually moved out of the arena using maybeTakeOwnership.
// (If maybeTakeOwnership is called multiple times, it will return
// a null pointer on subsequent calls.)
class MOZ_STACK_CLASS HandleScope {
public:
@ -1097,6 +1106,8 @@ class Isolate {
public:
template <typename T>
PseudoHandle<T> takeOwnership(void* ptr);
template <typename T>
PseudoHandle<T> maybeTakeOwnership(void* ptr);
uint32_t liveHandles() const { return handleArena_.Length(); }
uint32_t livePseudoHandles() const { return uniquePtrArena_.Length(); }

Просмотреть файл

@ -23,7 +23,8 @@
#include "irregexp/RegExpAPI.h"
// js::irregexp::CaseInsensitiveCompareNonUnicode,
// js::irregexp::CaseInsensitiveCompareUnicode,
// js::irregexp::GrowBacktrackStack
// js::irregexp::GrowBacktrackStack,
// js::irregexp::IsCharacterInRangeArray
#include "jit/ABIFunctions.h"
#include "jit/Bailouts.h" // js::jit::FinishBailoutToBaseline, js::jit::Bailout,
@ -98,6 +99,7 @@ namespace jit {
_(js::irregexp::CaseInsensitiveCompareNonUnicode) \
_(js::irregexp::CaseInsensitiveCompareUnicode) \
_(js::irregexp::GrowBacktrackStack) \
_(js::irregexp::IsCharacterInRangeArray) \
_(js::jit::AllocateAndInitTypedArrayBuffer) \
_(js::jit::AllocateBigIntNoGC) \
_(js::jit::AllocateFatInlineString) \