зеркало из https://github.com/mozilla/gecko-dev.git
Backed out 9 changesets (bug 1879225) for bustages on regexp-nodes.h . CLOSED TREE
Backed out changeset 589758807f24 (bug 1879225) Backed out changeset 85a16b24ff70 (bug 1879225) Backed out changeset e06159bc2d35 (bug 1879225) Backed out changeset a0e12e764dab (bug 1879225) Backed out changeset 8efe813ab556 (bug 1879225) Backed out changeset 4e39e690f2b8 (bug 1879225) Backed out changeset 9db3b5ee9eb1 (bug 1879225) Backed out changeset 6a96960cbfb0 (bug 1879225) Backed out changeset 1b309a9c237b (bug 1879225)
This commit is contained in:
Родитель
d7d2b179b5
Коммит
cbac415c3c
|
@ -12,7 +12,6 @@
|
|||
#include "mozilla/Assertions.h" // MOZ_ASSERT
|
||||
#include "mozilla/Attributes.h" // MOZ_IMPLICIT
|
||||
|
||||
#include <ostream> // ostream
|
||||
#include <stdint.h> // uint8_t
|
||||
|
||||
namespace JS {
|
||||
|
@ -130,15 +129,6 @@ class RegExpFlags {
|
|||
explicit operator bool() const { return flags_ != 0; }
|
||||
|
||||
Flag value() const { return flags_; }
|
||||
constexpr operator Flag() const { return flags_; }
|
||||
|
||||
void set(Flag flags, bool value) {
|
||||
if (value) {
|
||||
flags_ |= flags;
|
||||
} else {
|
||||
flags_ &= ~flags;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline RegExpFlags& operator&=(RegExpFlags& flags, RegExpFlags::Flag flag) {
|
||||
|
@ -168,39 +158,6 @@ inline RegExpFlags operator|(const RegExpFlags& lhs, const RegExpFlags& rhs) {
|
|||
return result;
|
||||
}
|
||||
|
||||
inline bool MaybeParseRegExpFlag(char c, RegExpFlags::Flag* flag) {
|
||||
switch (c) {
|
||||
case 'd':
|
||||
*flag = RegExpFlag::HasIndices;
|
||||
return true;
|
||||
case 'g':
|
||||
*flag = RegExpFlag::Global;
|
||||
return true;
|
||||
case 'i':
|
||||
*flag = RegExpFlag::IgnoreCase;
|
||||
return true;
|
||||
case 'm':
|
||||
*flag = RegExpFlag::Multiline;
|
||||
return true;
|
||||
case 's':
|
||||
*flag = RegExpFlag::DotAll;
|
||||
return true;
|
||||
case 'u':
|
||||
*flag = RegExpFlag::Unicode;
|
||||
return true;
|
||||
case 'v':
|
||||
*flag = RegExpFlag::UnicodeSets;
|
||||
return true;
|
||||
case 'y':
|
||||
*flag = RegExpFlag::Sticky;
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, RegExpFlags flags);
|
||||
|
||||
} // namespace JS
|
||||
|
||||
#endif // js_RegExpFlags_h
|
||||
|
|
|
@ -383,9 +383,6 @@ enum JSWhyMagic {
|
|||
/** arguments object can't be created because environment is dead. */
|
||||
JS_MISSING_ARGUMENTS,
|
||||
|
||||
/** exception value thrown when interrupting irregexp */
|
||||
JS_INTERRUPT_REGEXP,
|
||||
|
||||
/** for local use */
|
||||
JS_GENERIC_MAGIC,
|
||||
|
||||
|
|
|
@ -632,7 +632,7 @@ enum class AssembleResult {
|
|||
// RegExpShared.
|
||||
ByteArray bytecode =
|
||||
v8::internal::ByteArray::cast(*result.code).takeOwnership(cx->isolate);
|
||||
uint32_t length = bytecode->length();
|
||||
uint32_t length = bytecode->length;
|
||||
re->setByteCode(bytecode.release(), isLatin1);
|
||||
js::AddCellMemory(re, length, MemoryUse::RegExpSharedBytecode);
|
||||
}
|
||||
|
@ -773,7 +773,7 @@ bool CompilePattern(JSContext* cx, MutableHandleRegExpShared re,
|
|||
bool isLatin1 = input->hasLatin1Chars();
|
||||
|
||||
SampleCharacters(input, compiler);
|
||||
data.node = compiler.PreprocessRegExp(&data, isLatin1);
|
||||
data.node = compiler.PreprocessRegExp(&data, flags, isLatin1);
|
||||
data.error = AnalyzeRegExp(cx->isolate, isLatin1, flags, data.node);
|
||||
if (data.error != RegExpError::kNone) {
|
||||
MOZ_ASSERT(data.error == RegExpError::kAnalysisStackOverflow);
|
||||
|
|
|
@ -247,8 +247,8 @@ void SMRegExpMacroAssembler::CheckCharacterNotInRange(base::uc16 from,
|
|||
bool SMRegExpMacroAssembler::IsCharacterInRangeArray(uint32_t c,
|
||||
ByteArrayData* ranges) {
|
||||
js::AutoUnsafeCallWithABI unsafe;
|
||||
MOZ_ASSERT(ranges->length() % sizeof(uint16_t) == 0);
|
||||
uint32_t length = ranges->length() / sizeof(uint16_t);
|
||||
MOZ_ASSERT(ranges->length % sizeof(uint16_t) == 0);
|
||||
uint32_t length = ranges->length / sizeof(uint16_t);
|
||||
MOZ_ASSERT(length > 0);
|
||||
|
||||
// Fast paths.
|
||||
|
|
|
@ -227,13 +227,13 @@ Handle<ByteArray> Isolate::NewByteArray(int length, AllocationType alloc) {
|
|||
|
||||
js::AutoEnterOOMUnsafeRegion oomUnsafe;
|
||||
|
||||
size_t alloc_size = sizeof(ByteArrayData) + length;
|
||||
size_t alloc_size = sizeof(uint32_t) + length;
|
||||
ByteArrayData* data =
|
||||
static_cast<ByteArrayData*>(allocatePseudoHandle(alloc_size));
|
||||
if (!data) {
|
||||
oomUnsafe.crash("Irregexp NewByteArray");
|
||||
}
|
||||
new (data) ByteArrayData(length);
|
||||
data->length = length;
|
||||
|
||||
return Handle<ByteArray>(JS::PrivateValue(data), this);
|
||||
}
|
||||
|
@ -261,7 +261,7 @@ Handle<FixedIntegerArray<T>> Isolate::NewFixedIntegerArray(uint32_t length) {
|
|||
if (!data) {
|
||||
oomUnsafe.crash("Irregexp NewFixedIntegerArray");
|
||||
}
|
||||
new (data) ByteArrayData(rawLength);
|
||||
data->length = rawLength;
|
||||
|
||||
return Handle<FixedIntegerArray<T>>(JS::PrivateValue(data), this);
|
||||
}
|
||||
|
|
|
@ -586,6 +586,15 @@ class Object {
|
|||
// IsCharacterInRangeArray in regexp-macro-assembler.cc.
|
||||
Object(uintptr_t raw) : asBits_(raw) { MOZ_CRASH("unused"); }
|
||||
|
||||
// Used in regexp-interpreter.cc to check the return value of
|
||||
// isolate->stack_guard()->HandleInterrupts(). We want to handle
|
||||
// interrupts in the caller, so we always return false from
|
||||
// HandleInterrupts and true here.
|
||||
inline bool IsException(Isolate*) const {
|
||||
MOZ_ASSERT(!value().toBoolean());
|
||||
return true;
|
||||
}
|
||||
|
||||
JS::Value value() const { return JS::Value::fromRawBits(asBits_); }
|
||||
|
||||
inline static Object cast(Object object) { return object; }
|
||||
|
@ -595,14 +604,6 @@ class Object {
|
|||
uint64_t asBits_;
|
||||
} JS_HAZ_GC_POINTER;
|
||||
|
||||
// Used in regexp-interpreter.cc to check the return value of
|
||||
// isolate->stack_guard()->HandleInterrupts(). We want to handle
|
||||
// interrupts in the caller, so we return a magic value from
|
||||
// HandleInterrupts and check for it here.
|
||||
inline bool IsException(Object obj, Isolate*) {
|
||||
return obj.value().isMagic(JS_INTERRUPT_REGEXP);
|
||||
}
|
||||
|
||||
class Smi : public Object {
|
||||
public:
|
||||
static Smi FromInt(int32_t value) {
|
||||
|
@ -625,27 +626,6 @@ class HeapObject : public Object {
|
|||
}
|
||||
};
|
||||
|
||||
// V8's values use low-bit tagging. If the LSB is 0, it's a small
|
||||
// integer. If the LSB is 1, it's a pointer to some GC thing. In V8,
|
||||
// this wrapper class is used to represent a pointer that has the low
|
||||
// bit set, or a small integer that has been shifted left by one
|
||||
// bit. We don't use the same tagging system, so all we need is a
|
||||
// transparent wrapper that automatically converts to/from the wrapped
|
||||
// type.
|
||||
template <typename T>
|
||||
class Tagged {
|
||||
public:
|
||||
Tagged() {}
|
||||
MOZ_IMPLICIT Tagged(const T& value) : value_(value) {}
|
||||
MOZ_IMPLICIT Tagged(T&& value) : value_(std::move(value)) {}
|
||||
|
||||
T* operator->() { return &value_; }
|
||||
constexpr operator T() const { return value_; }
|
||||
|
||||
private:
|
||||
T value_;
|
||||
};
|
||||
|
||||
// A fixed-size array with Objects (aka Values) as element types.
|
||||
// Implemented using the dense elements of an ArrayObject.
|
||||
// Used for named captures.
|
||||
|
@ -688,13 +668,13 @@ T* ByteArrayData::typedData() {
|
|||
|
||||
template <typename T>
|
||||
T ByteArrayData::getTyped(uint32_t index) {
|
||||
MOZ_ASSERT(index < length() / sizeof(T));
|
||||
MOZ_ASSERT(index < length / sizeof(T));
|
||||
return typedData<T>()[index];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ByteArrayData::setTyped(uint32_t index, T value) {
|
||||
MOZ_ASSERT(index < length() / sizeof(T));
|
||||
MOZ_ASSERT(index < length / sizeof(T));
|
||||
typedData<T>()[index] = value;
|
||||
}
|
||||
|
||||
|
@ -704,7 +684,6 @@ class ByteArray : public HeapObject {
|
|||
ByteArrayData* inner() const {
|
||||
return static_cast<ByteArrayData*>(value().toPrivate());
|
||||
}
|
||||
friend bool IsByteArray(Object obj);
|
||||
|
||||
public:
|
||||
PseudoHandle<ByteArrayData> takeOwnership(Isolate* isolate);
|
||||
|
@ -713,8 +692,8 @@ class ByteArray : public HeapObject {
|
|||
uint8_t get(uint32_t index) { return inner()->get(index); }
|
||||
void set(uint32_t index, uint8_t val) { inner()->set(index, val); }
|
||||
|
||||
uint32_t length() const { return inner()->length(); }
|
||||
uint8_t* begin() { return inner()->data(); }
|
||||
uint32_t length() const { return inner()->length; }
|
||||
uint8_t* GetDataStartAddress() { return inner()->data(); }
|
||||
|
||||
static ByteArray cast(Object object) {
|
||||
ByteArray b;
|
||||
|
@ -722,17 +701,11 @@ class ByteArray : public HeapObject {
|
|||
return b;
|
||||
}
|
||||
|
||||
bool IsByteArray() const { return true; }
|
||||
|
||||
friend class SMRegExpMacroAssembler;
|
||||
};
|
||||
|
||||
// This is only used in assertions. In debug builds, we put a magic value
|
||||
// in the header of each ByteArrayData, and assert here that it matches.
|
||||
inline bool IsByteArray(Object obj) {
|
||||
MOZ_ASSERT(ByteArray::cast(obj).inner()->magic() ==
|
||||
ByteArrayData::ExpectedMagic);
|
||||
return true;
|
||||
}
|
||||
|
||||
// This is a convenience class used in V8 for treating a ByteArray as an array
|
||||
// of fixed-size integers. This version supports integral types up to 32 bits.
|
||||
template <typename T>
|
||||
|
@ -1057,7 +1030,6 @@ class JSRegExp : public HeapObject {
|
|||
};
|
||||
|
||||
using RegExpFlags = JS::RegExpFlags;
|
||||
using RegExpFlag = JS::RegExpFlags::Flag;
|
||||
|
||||
inline bool IsUnicode(RegExpFlags flags) { return flags.unicode(); }
|
||||
inline bool IsGlobal(RegExpFlags flags) { return flags.global(); }
|
||||
|
@ -1070,22 +1042,6 @@ inline bool IsEitherUnicode(RegExpFlags flags) {
|
|||
return flags.unicode() || flags.unicodeSets();
|
||||
}
|
||||
|
||||
inline base::Optional<RegExpFlag> TryRegExpFlagFromChar(char c) {
|
||||
RegExpFlag flag;
|
||||
|
||||
// The parser only calls this after verifying that it's a supported flag.
|
||||
MOZ_ALWAYS_TRUE(JS::MaybeParseRegExpFlag(c, &flag));
|
||||
|
||||
return base::Optional(flag);
|
||||
}
|
||||
|
||||
inline bool operator==(const RegExpFlags& lhs, const int& rhs) {
|
||||
return lhs.value() == rhs;
|
||||
}
|
||||
inline bool operator!=(const RegExpFlags& lhs, const int& rhs) {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
class Histogram {
|
||||
public:
|
||||
inline void AddSample(int sample) {}
|
||||
|
@ -1170,11 +1126,9 @@ class Isolate {
|
|||
|
||||
// This is called from inside no-GC code. V8 runs the interrupt
|
||||
// inside the no-GC code and then "manually relocates unhandlified
|
||||
// references" afterwards. We just return a magic value and let the
|
||||
// caller handle interrupts.
|
||||
Object HandleInterrupts() {
|
||||
return Object(JS::MagicValue(JS_INTERRUPT_REGEXP));
|
||||
}
|
||||
// references" afterwards. We just return false and let the caller
|
||||
// handle interrupts.
|
||||
Object HandleInterrupts() { return Object(JS::BooleanValue(false)); }
|
||||
|
||||
JSContext* cx() const { return cx_; }
|
||||
|
||||
|
|
|
@ -21,17 +21,15 @@ namespace internal {
|
|||
|
||||
class ByteArrayData {
|
||||
public:
|
||||
ByteArrayData(uint32_t length) : length_(length) {}
|
||||
|
||||
uint32_t length() { return length_; };
|
||||
uint32_t length;
|
||||
uint8_t* data();
|
||||
|
||||
uint8_t get(uint32_t index) {
|
||||
MOZ_ASSERT(index < length());
|
||||
MOZ_ASSERT(index < length);
|
||||
return data()[index];
|
||||
}
|
||||
void set(uint32_t index, uint8_t val) {
|
||||
MOZ_ASSERT(index < length());
|
||||
MOZ_ASSERT(index < length);
|
||||
data()[index] = val;
|
||||
}
|
||||
|
||||
|
@ -41,19 +39,9 @@ class ByteArrayData {
|
|||
template <typename T>
|
||||
void setTyped(uint32_t index, T value);
|
||||
|
||||
#ifdef DEBUG
|
||||
const static uint32_t ExpectedMagic = 0x12344321;
|
||||
uint32_t magic() const { return magic_; }
|
||||
|
||||
private:
|
||||
uint32_t magic_ = ExpectedMagic;
|
||||
#endif
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
T* typedData();
|
||||
|
||||
uint32_t length_;
|
||||
};
|
||||
|
||||
class Isolate;
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <sstream>
|
||||
|
||||
#include "irregexp/imported/special-case.h"
|
||||
#include "unicode/usetiter.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
@ -125,6 +126,52 @@ void PrintSpecial(std::ofstream& out) {
|
|||
PrintSet(out, "SpecialAddSet", special_add);
|
||||
}
|
||||
|
||||
void PrintUnicodeSpecial(std::ofstream& out) {
|
||||
icu::UnicodeSet non_simple_folding;
|
||||
icu::UnicodeSet current;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
// Look at all characters except white spaces.
|
||||
icu::UnicodeSet interestingCP(u"[^[:White_Space:]]", status);
|
||||
CHECK_EQ(status, U_ZERO_ERROR);
|
||||
icu::UnicodeSetIterator iter(interestingCP);
|
||||
while (iter.next()) {
|
||||
UChar32 c = iter.getCodepoint();
|
||||
current.set(c, c);
|
||||
current.closeOver(USET_CASE_INSENSITIVE).removeAllStrings();
|
||||
CHECK(!current.isBogus());
|
||||
// Remove characters from the closeover that have a simple case folding.
|
||||
icu::UnicodeSet toRemove;
|
||||
icu::UnicodeSetIterator closeOverIter(current);
|
||||
while (closeOverIter.next()) {
|
||||
UChar32 closeOverChar = closeOverIter.getCodepoint();
|
||||
UChar32 closeOverSCF = u_foldCase(closeOverChar, U_FOLD_CASE_DEFAULT);
|
||||
if (closeOverChar != closeOverSCF) {
|
||||
toRemove.add(closeOverChar);
|
||||
}
|
||||
}
|
||||
CHECK(!toRemove.isBogus());
|
||||
current.removeAll(toRemove);
|
||||
|
||||
// The current character and its simple case folding are also always OK.
|
||||
UChar32 scf = u_foldCase(c, U_FOLD_CASE_DEFAULT);
|
||||
current.remove(c);
|
||||
current.remove(scf);
|
||||
|
||||
// If there are any characters remaining, they were added due to full case
|
||||
// foldings and shouldn't match the current charcter according to the spec.
|
||||
if (!current.isEmpty()) {
|
||||
// Ensure that the character doesn't have a simple case folding.
|
||||
// Otherwise the current approach of simply removing the character from
|
||||
// the set before calling closeOver won't work.
|
||||
CHECK_EQ(c, scf);
|
||||
non_simple_folding.add(c);
|
||||
}
|
||||
}
|
||||
CHECK(!non_simple_folding.isBogus());
|
||||
|
||||
PrintSet(out, "UnicodeNonSimpleCloseOverSet", non_simple_folding);
|
||||
}
|
||||
|
||||
void WriteHeader(const char* header_filename) {
|
||||
std::ofstream out(header_filename);
|
||||
out << std::hex << std::setfill('0') << std::setw(4);
|
||||
|
@ -145,6 +192,7 @@ void WriteHeader(const char* header_filename) {
|
|||
<< "namespace internal {\n\n";
|
||||
|
||||
PrintSpecial(out);
|
||||
PrintUnicodeSpecial(out);
|
||||
|
||||
out << "\n"
|
||||
<< "} // namespace internal\n"
|
||||
|
|
|
@ -307,7 +307,7 @@ void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
|
|||
}
|
||||
|
||||
void* RegExpUnparser::VisitGroup(RegExpGroup* that, void* data) {
|
||||
os_ << "(?" << that->flags() << ": ";
|
||||
os_ << "(?: ";
|
||||
that->body()->Accept(this, data);
|
||||
os_ << ")";
|
||||
return nullptr;
|
||||
|
@ -406,17 +406,10 @@ RegExpClassSetExpression::RegExpClassSetExpression(
|
|||
may_contain_strings_(may_contain_strings),
|
||||
operands_(operands) {
|
||||
DCHECK_NOT_NULL(operands);
|
||||
if (is_negated) {
|
||||
DCHECK(!may_contain_strings_);
|
||||
// We don't know anything about max matches for negated classes.
|
||||
// As there are no strings involved, assume that we can match a unicode
|
||||
// character (2 code points).
|
||||
max_match_ = 2;
|
||||
} else {
|
||||
max_match_ = 0;
|
||||
for (auto op : *operands) {
|
||||
max_match_ = std::max(max_match_, op->max_match());
|
||||
}
|
||||
DCHECK_IMPLIES(is_negated_, !may_contain_strings_);
|
||||
max_match_ = 0;
|
||||
for (auto op : *operands) {
|
||||
max_match_ = std::max(max_match_, op->max_match());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -130,6 +130,12 @@ class CharacterRange {
|
|||
static void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges,
|
||||
Zone* zone);
|
||||
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
// Creates the closeOver of the given UnicodeSet, removing all
|
||||
// characters/strings that can't be derived via simple case folding.
|
||||
static void UnicodeSimpleCloseOver(icu::UnicodeSet& set);
|
||||
#endif // V8_INTL_SUPPORT
|
||||
|
||||
bool Contains(base::uc32 i) const { return from_ <= i && i <= to_; }
|
||||
base::uc32 from() const { return from_; }
|
||||
base::uc32 to() const { return to_; }
|
||||
|
@ -305,12 +311,9 @@ class RegExpClassRanges final : public RegExpTree {
|
|||
// the specified ranges.
|
||||
// CONTAINS_SPLIT_SURROGATE: The character class contains part of a split
|
||||
// surrogate and should not be unicode-desugared (crbug.com/641091).
|
||||
// IS_CASE_FOLDED: If case folding is required (/i), it was already
|
||||
// performed on individual ranges and should not be applied again.
|
||||
enum Flag {
|
||||
NEGATED = 1 << 0,
|
||||
CONTAINS_SPLIT_SURROGATE = 1 << 1,
|
||||
IS_CASE_FOLDED = 1 << 2,
|
||||
};
|
||||
using ClassRangesFlags = base::Flags<Flag>;
|
||||
|
||||
|
@ -353,9 +356,6 @@ class RegExpClassRanges final : public RegExpTree {
|
|||
bool contains_split_surrogate() const {
|
||||
return (class_ranges_flags_ & CONTAINS_SPLIT_SURROGATE) != 0;
|
||||
}
|
||||
bool is_case_folded() const {
|
||||
return (class_ranges_flags_ & IS_CASE_FOLDED) != 0;
|
||||
}
|
||||
|
||||
private:
|
||||
CharacterSet set_;
|
||||
|
@ -626,9 +626,8 @@ class RegExpCapture final : public RegExpTree {
|
|||
|
||||
class RegExpGroup final : public RegExpTree {
|
||||
public:
|
||||
explicit RegExpGroup(RegExpTree* body, RegExpFlags flags)
|
||||
explicit RegExpGroup(RegExpTree* body)
|
||||
: body_(body),
|
||||
flags_(flags),
|
||||
min_match_(body->min_match()),
|
||||
max_match_(body->max_match()) {}
|
||||
|
||||
|
@ -640,11 +639,9 @@ class RegExpGroup final : public RegExpTree {
|
|||
int max_match() override { return max_match_; }
|
||||
Interval CaptureRegisters() override { return body_->CaptureRegisters(); }
|
||||
RegExpTree* body() const { return body_; }
|
||||
RegExpFlags flags() const { return flags_; }
|
||||
|
||||
private:
|
||||
RegExpTree* body_;
|
||||
const RegExpFlags flags_;
|
||||
int min_match_;
|
||||
int max_match_;
|
||||
};
|
||||
|
@ -700,8 +697,9 @@ class RegExpLookaround final : public RegExpTree {
|
|||
|
||||
class RegExpBackReference final : public RegExpTree {
|
||||
public:
|
||||
RegExpBackReference() = default;
|
||||
explicit RegExpBackReference(RegExpCapture* capture) : capture_(capture) {}
|
||||
explicit RegExpBackReference(RegExpFlags flags) : flags_(flags) {}
|
||||
RegExpBackReference(RegExpCapture* capture, RegExpFlags flags)
|
||||
: capture_(capture), flags_(flags) {}
|
||||
|
||||
DECL_BOILERPLATE(BackReference);
|
||||
|
||||
|
@ -718,6 +716,7 @@ class RegExpBackReference final : public RegExpTree {
|
|||
private:
|
||||
RegExpCapture* capture_ = nullptr;
|
||||
const ZoneVector<base::uc16>* name_ = nullptr;
|
||||
const RegExpFlags flags_;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -383,7 +383,7 @@ Handle<HeapObject> RegExpBytecodeGenerator::GetCode(Handle<String> source) {
|
|||
isolate_, zone(), source, buffer_.data(), length(), jump_edges_);
|
||||
} else {
|
||||
array = isolate_->factory()->NewByteArray(length());
|
||||
Copy(array->begin());
|
||||
Copy(array->GetDataStartAddress());
|
||||
}
|
||||
|
||||
return array;
|
||||
|
|
|
@ -1012,13 +1012,13 @@ Handle<ByteArray> RegExpBytecodePeepholeOptimization::OptimizeBytecode(
|
|||
RegExpBytecodePeephole peephole(zone, length, jump_edges);
|
||||
bool did_optimize = peephole.OptimizeBytecode(bytecode, length);
|
||||
Handle<ByteArray> array = isolate->factory()->NewByteArray(peephole.Length());
|
||||
peephole.CopyOptimizedBytecode(array->begin());
|
||||
peephole.CopyOptimizedBytecode(array->GetDataStartAddress());
|
||||
|
||||
if (did_optimize && v8_flags.trace_regexp_peephole_optimization) {
|
||||
PrintF("Original Bytecode:\n");
|
||||
RegExpBytecodeDisassemble(bytecode, length, source->ToCString().get());
|
||||
PrintF("Optimized Bytecode:\n");
|
||||
RegExpBytecodeDisassemble(array->begin(), peephole.Length(),
|
||||
RegExpBytecodeDisassemble(array->GetDataStartAddress(), peephole.Length(),
|
||||
source->ToCString().get());
|
||||
}
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
// found in the LICENSE file.
|
||||
|
||||
#include "irregexp/imported/regexp-compiler.h"
|
||||
|
||||
#include "irregexp/imported/regexp.h"
|
||||
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
|
@ -417,6 +418,27 @@ RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
|
|||
|
||||
} // namespace
|
||||
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
// static
|
||||
void CharacterRange::UnicodeSimpleCloseOver(icu::UnicodeSet& set) {
|
||||
// Remove characters for which closeOver() adds full-case-folding equivalents
|
||||
// because we should work only with simple case folding mappings.
|
||||
icu::UnicodeSet non_simple = icu::UnicodeSet(set);
|
||||
non_simple.retainAll(RegExpCaseFolding::UnicodeNonSimpleCloseOverSet());
|
||||
set.removeAll(non_simple);
|
||||
|
||||
set.closeOver(USET_CASE_INSENSITIVE);
|
||||
// Full case folding maps single characters to multiple characters.
|
||||
// Those are represented as strings in the set. Remove them so that
|
||||
// we end up with only simple and common case mappings.
|
||||
set.removeAllStrings();
|
||||
|
||||
// Add characters that have non-simple case foldings again (they match
|
||||
// themselves).
|
||||
set.addAll(non_simple);
|
||||
}
|
||||
#endif // V8_INTL_SUPPORT
|
||||
|
||||
// static
|
||||
void CharacterRange::AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges,
|
||||
Zone* zone) {
|
||||
|
@ -438,7 +460,8 @@ void CharacterRange::AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges,
|
|||
}
|
||||
// Clear the ranges list without freeing the backing store.
|
||||
ranges->Rewind(0);
|
||||
set.closeOver(USET_SIMPLE_CASE_INSENSITIVE);
|
||||
|
||||
UnicodeSimpleCloseOver(set);
|
||||
for (int i = 0; i < set.getRangeCount(); i++) {
|
||||
ranges->Add(Range(set.getRangeStart(i), set.getRangeEnd(i)), zone);
|
||||
}
|
||||
|
@ -453,9 +476,7 @@ RegExpNode* RegExpClassRanges::ToNode(RegExpCompiler* compiler,
|
|||
Zone* const zone = compiler->zone();
|
||||
ZoneList<CharacterRange>* ranges = this->ranges(zone);
|
||||
|
||||
const bool needs_case_folding =
|
||||
NeedsUnicodeCaseEquivalents(compiler->flags()) && !is_case_folded();
|
||||
if (needs_case_folding) {
|
||||
if (NeedsUnicodeCaseEquivalents(compiler->flags())) {
|
||||
CharacterRange::AddUnicodeCaseEquivalents(ranges, zone);
|
||||
}
|
||||
|
||||
|
@ -466,7 +487,8 @@ RegExpNode* RegExpClassRanges::ToNode(RegExpCompiler* compiler,
|
|||
|
||||
if (is_negated()) {
|
||||
// With /v, character classes are never negated.
|
||||
// https://tc39.es/ecma262/#sec-compileatom
|
||||
// TODO(v8:11935): Change permalink once proposal is in stage 4.
|
||||
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#sec-compileatom
|
||||
// Atom :: CharacterClass
|
||||
// 4. Assert: cc.[[Invert]] is false.
|
||||
// Instead the complement is created when evaluating the class set.
|
||||
|
@ -539,12 +561,7 @@ RegExpNode* RegExpClassSetOperand::ToNode(RegExpCompiler* compiler,
|
|||
}
|
||||
}
|
||||
if (!ranges()->is_empty()) {
|
||||
// In unicode sets mode case folding has to be done at precise locations
|
||||
// (e.g. before building complements).
|
||||
// It is therefore the parsers responsibility to case fold (sub-) ranges
|
||||
// before creating ClassSetOperands.
|
||||
alternatives->Add(zone->template New<RegExpClassRanges>(
|
||||
zone, ranges(), RegExpClassRanges::IS_CASE_FOLDED),
|
||||
alternatives->Add(zone->template New<RegExpClassRanges>(zone, ranges()),
|
||||
zone);
|
||||
}
|
||||
if (empty_string != nullptr) {
|
||||
|
@ -1017,8 +1034,9 @@ namespace {
|
|||
// \B to (?<=\w)(?=\w)|(?<=\W)(?=\W)
|
||||
RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success,
|
||||
RegExpAssertion::Type type) {
|
||||
CHECK(NeedsUnicodeCaseEquivalents(compiler->flags()));
|
||||
RegExpAssertion::Type type,
|
||||
RegExpFlags flags) {
|
||||
CHECK(NeedsUnicodeCaseEquivalents(flags));
|
||||
Zone* zone = compiler->zone();
|
||||
ZoneList<CharacterRange>* word_range =
|
||||
zone->New<ZoneList<CharacterRange>>(2, zone);
|
||||
|
@ -1062,13 +1080,14 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
|
|||
return AssertionNode::AtStart(on_success);
|
||||
case Type::BOUNDARY:
|
||||
return NeedsUnicodeCaseEquivalents(compiler->flags())
|
||||
? BoundaryAssertionAsLookaround(compiler, on_success,
|
||||
Type::BOUNDARY)
|
||||
? BoundaryAssertionAsLookaround(
|
||||
compiler, on_success, Type::BOUNDARY, compiler->flags())
|
||||
: AssertionNode::AtBoundary(on_success);
|
||||
case Type::NON_BOUNDARY:
|
||||
return NeedsUnicodeCaseEquivalents(compiler->flags())
|
||||
? BoundaryAssertionAsLookaround(compiler, on_success,
|
||||
Type::NON_BOUNDARY)
|
||||
Type::NON_BOUNDARY,
|
||||
compiler->flags())
|
||||
: AssertionNode::AtNonBoundary(on_success);
|
||||
case Type::END_OF_INPUT:
|
||||
return AssertionNode::AtEnd(on_success);
|
||||
|
@ -1113,7 +1132,7 @@ RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
|
|||
RegExpNode* on_success) {
|
||||
return compiler->zone()->New<BackReferenceNode>(
|
||||
RegExpCapture::StartRegister(index()),
|
||||
RegExpCapture::EndRegister(index()), compiler->read_backward(),
|
||||
RegExpCapture::EndRegister(index()), flags_, compiler->read_backward(),
|
||||
on_success);
|
||||
}
|
||||
|
||||
|
@ -1122,40 +1141,9 @@ RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
|
|||
return on_success;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class V8_NODISCARD ModifiersScope {
|
||||
public:
|
||||
ModifiersScope(RegExpCompiler* compiler, RegExpFlags flags)
|
||||
: compiler_(compiler), previous_flags_(compiler->flags()) {
|
||||
compiler->set_flags(flags);
|
||||
}
|
||||
~ModifiersScope() { compiler_->set_flags(previous_flags_); }
|
||||
|
||||
private:
|
||||
RegExpCompiler* compiler_;
|
||||
const RegExpFlags previous_flags_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
RegExpNode* RegExpGroup::ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success) {
|
||||
// If no flags are modified, simply convert and return the body.
|
||||
if (flags() == compiler->flags()) {
|
||||
return body_->ToNode(compiler, on_success);
|
||||
}
|
||||
// Reset flags for successor node.
|
||||
const RegExpFlags old_flags = compiler->flags();
|
||||
on_success = ActionNode::ModifyFlags(old_flags, on_success);
|
||||
|
||||
// Convert body using modifier.
|
||||
ModifiersScope modifiers_scope(compiler, flags());
|
||||
RegExpNode* body = body_->ToNode(compiler, on_success);
|
||||
|
||||
// Wrap body into modifier node.
|
||||
RegExpNode* modified_body = ActionNode::ModifyFlags(flags(), body);
|
||||
return modified_body;
|
||||
return body_->ToNode(compiler, on_success);
|
||||
}
|
||||
|
||||
RegExpLookaround::Builder::Builder(bool is_positive, RegExpNode* on_success,
|
||||
|
|
|
@ -707,13 +707,6 @@ ActionNode* ActionNode::EmptyMatchCheck(int start_register,
|
|||
return result;
|
||||
}
|
||||
|
||||
ActionNode* ActionNode::ModifyFlags(RegExpFlags flags, RegExpNode* on_success) {
|
||||
ActionNode* result =
|
||||
on_success->zone()->New<ActionNode>(MODIFY_FLAGS, on_success);
|
||||
result->data_.u_modify_flags.flags = flags;
|
||||
return result;
|
||||
}
|
||||
|
||||
#define DEFINE_ACCEPT(Type) \
|
||||
void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); }
|
||||
FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
|
||||
|
@ -1384,9 +1377,6 @@ void ActionNode::GetQuickCheckDetails(QuickCheckDetails* details,
|
|||
on_success()->GetQuickCheckDetailsFromLoopEntry(details, compiler,
|
||||
filled_in, not_at_start);
|
||||
} else {
|
||||
if (action_type() == MODIFY_FLAGS) {
|
||||
compiler->set_flags(flags());
|
||||
}
|
||||
on_success()->GetQuickCheckDetails(details, compiler, filled_in,
|
||||
not_at_start);
|
||||
}
|
||||
|
@ -2877,7 +2867,7 @@ int BoyerMooreLookahead::GetSkipTable(int min_lookahead, int max_lookahead,
|
|||
const int kSkipArrayEntry = 0;
|
||||
const int kDontSkipArrayEntry = 1;
|
||||
|
||||
std::memset(boolean_skip_table->begin(), kSkipArrayEntry,
|
||||
std::memset(boolean_skip_table->GetDataStartAddress(), kSkipArrayEntry,
|
||||
boolean_skip_table->length());
|
||||
|
||||
for (int i = max_lookahead; i >= min_lookahead; i--) {
|
||||
|
@ -3464,11 +3454,6 @@ void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
|||
assembler->Backtrack();
|
||||
return;
|
||||
}
|
||||
case MODIFY_FLAGS: {
|
||||
compiler->set_flags(flags());
|
||||
on_success()->Emit(compiler, trace);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -3488,8 +3473,8 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
|||
RecursionCheck rc(compiler);
|
||||
|
||||
DCHECK_EQ(start_reg_ + 1, end_reg_);
|
||||
if (IsIgnoreCase(compiler->flags())) {
|
||||
bool unicode = IsEitherUnicode(compiler->flags());
|
||||
if (IsIgnoreCase(flags_)) {
|
||||
bool unicode = IsEitherUnicode(flags_);
|
||||
assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
|
||||
unicode, trace->backtrack());
|
||||
} else {
|
||||
|
@ -3500,7 +3485,7 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
|||
if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
|
||||
|
||||
// Check that the back reference does not end inside a surrogate pair.
|
||||
if (IsEitherUnicode(compiler->flags()) && !compiler->one_byte()) {
|
||||
if (IsEitherUnicode(flags_) && !compiler->one_byte()) {
|
||||
assembler->CheckNotInSurrogatePair(trace->cp_offset(), trace->backtrack());
|
||||
}
|
||||
on_success()->Emit(compiler, trace);
|
||||
|
@ -3722,7 +3707,7 @@ class Analysis : public NodeVisitor {
|
|||
} while (false)
|
||||
|
||||
void VisitText(TextNode* that) override {
|
||||
that->MakeCaseIndependent(isolate(), is_one_byte_, flags());
|
||||
that->MakeCaseIndependent(isolate(), is_one_byte_, flags_);
|
||||
EnsureAnalyzed(that->on_success());
|
||||
if (has_failed()) return;
|
||||
that->CalculateOffsets();
|
||||
|
@ -3730,9 +3715,6 @@ class Analysis : public NodeVisitor {
|
|||
}
|
||||
|
||||
void VisitAction(ActionNode* that) override {
|
||||
if (that->action_type() == ActionNode::MODIFY_FLAGS) {
|
||||
set_flags(that->flags());
|
||||
}
|
||||
EnsureAnalyzed(that->on_success());
|
||||
if (has_failed()) return;
|
||||
STATIC_FOR_EACH(Propagators::VisitAction(that));
|
||||
|
@ -3791,12 +3773,9 @@ class Analysis : public NodeVisitor {
|
|||
#undef STATIC_FOR_EACH
|
||||
|
||||
private:
|
||||
RegExpFlags flags() const { return flags_; }
|
||||
void set_flags(RegExpFlags flags) { flags_ = flags; }
|
||||
|
||||
Isolate* isolate_;
|
||||
const bool is_one_byte_;
|
||||
RegExpFlags flags_;
|
||||
const RegExpFlags flags_;
|
||||
RegExpError error_;
|
||||
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
|
||||
|
@ -3924,12 +3903,13 @@ RegExpNode* RegExpCompiler::OptionallyStepBackToLeadSurrogate(
|
|||
}
|
||||
|
||||
RegExpNode* RegExpCompiler::PreprocessRegExp(RegExpCompileData* data,
|
||||
RegExpFlags flags,
|
||||
bool is_one_byte) {
|
||||
// Wrap the body of the regexp in capture #0.
|
||||
RegExpNode* captured_body =
|
||||
RegExpCapture::ToNode(data->tree, 0, this, accept());
|
||||
RegExpNode* node = captured_body;
|
||||
if (!data->tree->IsAnchoredAtStart() && !IsSticky(flags())) {
|
||||
if (!data->tree->IsAnchoredAtStart() && !IsSticky(flags)) {
|
||||
// Add a .*? at the beginning, outside the body capture, unless
|
||||
// this expression is anchored at the beginning or sticky.
|
||||
RegExpNode* loop_node = RegExpQuantifier::ToNode(
|
||||
|
@ -3951,14 +3931,13 @@ RegExpNode* RegExpCompiler::PreprocessRegExp(RegExpCompileData* data,
|
|||
}
|
||||
}
|
||||
if (is_one_byte) {
|
||||
node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags());
|
||||
node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags);
|
||||
// Do it again to propagate the new nodes to places where they were not
|
||||
// put because they had not been calculated yet.
|
||||
if (node != nullptr) {
|
||||
node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags());
|
||||
node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags);
|
||||
}
|
||||
} else if (IsEitherUnicode(flags()) &&
|
||||
(IsGlobal(flags()) || IsSticky(flags()))) {
|
||||
} else if (IsEitherUnicode(flags) && (IsGlobal(flags) || IsSticky(flags))) {
|
||||
node = OptionallyStepBackToLeadSurrogate(node);
|
||||
}
|
||||
|
||||
|
|
|
@ -501,7 +501,8 @@ class RegExpCompiler {
|
|||
// - Inserting the implicit .* before/after the regexp if necessary.
|
||||
// - If the input is a one-byte string, filtering out nodes that can't match.
|
||||
// - Fixing up regexp matches that start within a surrogate pair.
|
||||
RegExpNode* PreprocessRegExp(RegExpCompileData* data, bool is_one_byte);
|
||||
RegExpNode* PreprocessRegExp(RegExpCompileData* data, RegExpFlags flags,
|
||||
bool is_one_byte);
|
||||
|
||||
// If the regexp matching starts within a surrogate pair, step back to the
|
||||
// lead surrogate and start matching from there.
|
||||
|
@ -526,8 +527,7 @@ class RegExpCompiler {
|
|||
inline void IncrementRecursionDepth() { recursion_depth_++; }
|
||||
inline void DecrementRecursionDepth() { recursion_depth_--; }
|
||||
|
||||
inline RegExpFlags flags() const { return flags_; }
|
||||
inline void set_flags(RegExpFlags flags) { flags_ = flags; }
|
||||
RegExpFlags flags() const { return flags_; }
|
||||
|
||||
void SetRegExpTooBig() { reg_exp_too_big_ = true; }
|
||||
|
||||
|
@ -571,7 +571,7 @@ class RegExpCompiler {
|
|||
int unicode_lookaround_position_register_;
|
||||
ZoneVector<RegExpNode*>* work_list_;
|
||||
int recursion_depth_;
|
||||
RegExpFlags flags_;
|
||||
const RegExpFlags flags_;
|
||||
RegExpMacroAssembler* macro_assembler_;
|
||||
bool one_byte_;
|
||||
bool reg_exp_too_big_;
|
||||
|
|
|
@ -231,10 +231,6 @@ void DotPrinterImpl::VisitAction(ActionNode* that) {
|
|||
<< "\", shape=septagon";
|
||||
break;
|
||||
}
|
||||
case ActionNode::MODIFY_FLAGS: {
|
||||
os_ << "label=\"flags $" << that->flags() << "\", shape=septagon";
|
||||
break;
|
||||
}
|
||||
}
|
||||
os_ << "];\n";
|
||||
PrintAttributes(that);
|
||||
|
|
|
@ -88,7 +88,8 @@ int32_t Load32Aligned(const uint8_t* pc) {
|
|||
return *reinterpret_cast<const int32_t*>(pc);
|
||||
}
|
||||
|
||||
uint32_t Load16AlignedUnsigned(const uint8_t* pc) {
|
||||
// TODO(jgruber): Rename to Load16AlignedUnsigned.
|
||||
uint32_t Load16Aligned(const uint8_t* pc) {
|
||||
DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
|
||||
return *reinterpret_cast<const uint16_t*>(pc);
|
||||
}
|
||||
|
@ -220,17 +221,17 @@ IrregexpInterpreter::Result MaybeThrowStackOverflow(
|
|||
template <typename Char>
|
||||
void UpdateCodeAndSubjectReferences(
|
||||
Isolate* isolate, Handle<ByteArray> code_array,
|
||||
Handle<String> subject_string, Tagged<ByteArray>* code_array_out,
|
||||
Handle<String> subject_string, ByteArray* code_array_out,
|
||||
const uint8_t** code_base_out, const uint8_t** pc_out,
|
||||
Tagged<String>* subject_string_out,
|
||||
String* subject_string_out,
|
||||
base::Vector<const Char>* subject_string_vector_out) {
|
||||
DisallowGarbageCollection no_gc;
|
||||
|
||||
if (*code_base_out != code_array->begin()) {
|
||||
if (*code_base_out != code_array->GetDataStartAddress()) {
|
||||
*code_array_out = *code_array;
|
||||
const intptr_t pc_offset = *pc_out - *code_base_out;
|
||||
DCHECK_GT(pc_offset, 0);
|
||||
*code_base_out = code_array->begin();
|
||||
*code_base_out = code_array->GetDataStartAddress();
|
||||
*pc_out = *code_base_out + pc_offset;
|
||||
}
|
||||
|
||||
|
@ -243,9 +244,8 @@ void UpdateCodeAndSubjectReferences(
|
|||
// necessary.
|
||||
template <typename Char>
|
||||
IrregexpInterpreter::Result HandleInterrupts(
|
||||
Isolate* isolate, RegExp::CallOrigin call_origin,
|
||||
Tagged<ByteArray>* code_array_out, Tagged<String>* subject_string_out,
|
||||
const uint8_t** code_base_out,
|
||||
Isolate* isolate, RegExp::CallOrigin call_origin, ByteArray* code_array_out,
|
||||
String* subject_string_out, const uint8_t** code_base_out,
|
||||
base::Vector<const Char>* subject_string_vector_out,
|
||||
const uint8_t** pc_out) {
|
||||
DisallowGarbageCollection no_gc;
|
||||
|
@ -276,12 +276,12 @@ IrregexpInterpreter::Result HandleInterrupts(
|
|||
} else if (check.InterruptRequested()) {
|
||||
const bool was_one_byte =
|
||||
String::IsOneByteRepresentationUnderneath(*subject_string_out);
|
||||
Tagged<Object> result;
|
||||
Object result;
|
||||
{
|
||||
AllowGarbageCollection yes_gc;
|
||||
result = isolate->stack_guard()->HandleInterrupts();
|
||||
}
|
||||
if (IsException(result, isolate)) {
|
||||
if (result.IsException(isolate)) {
|
||||
return IrregexpInterpreter::EXCEPTION;
|
||||
}
|
||||
|
||||
|
@ -375,10 +375,10 @@ bool IndexIsInBounds(int index, int length) {
|
|||
|
||||
template <typename Char>
|
||||
IrregexpInterpreter::Result RawMatch(
|
||||
Isolate* isolate, Tagged<ByteArray> code_array,
|
||||
Tagged<String> subject_string, base::Vector<const Char> subject,
|
||||
int* output_registers, int output_register_count, int total_register_count,
|
||||
int current, uint32_t current_char, RegExp::CallOrigin call_origin,
|
||||
Isolate* isolate, ByteArray code_array, String subject_string,
|
||||
base::Vector<const Char> subject, int* output_registers,
|
||||
int output_register_count, int total_register_count, int current,
|
||||
uint32_t current_char, RegExp::CallOrigin call_origin,
|
||||
const uint32_t backtrack_limit) {
|
||||
DisallowGarbageCollection no_gc;
|
||||
|
||||
|
@ -430,7 +430,7 @@ IrregexpInterpreter::Result RawMatch(
|
|||
|
||||
#endif // V8_USE_COMPUTED_GOTO
|
||||
|
||||
const uint8_t* pc = code_array->begin();
|
||||
const uint8_t* pc = code_array.GetDataStartAddress();
|
||||
const uint8_t* code_base = pc;
|
||||
|
||||
InterpreterRegisters registers(total_register_count, output_registers,
|
||||
|
@ -702,8 +702,8 @@ IrregexpInterpreter::Result RawMatch(
|
|||
}
|
||||
BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
|
||||
uint32_t c = LoadPacked24Unsigned(insn);
|
||||
uint32_t minus = Load16AlignedUnsigned(pc + 4);
|
||||
uint32_t mask = Load16AlignedUnsigned(pc + 6);
|
||||
uint32_t minus = Load16Aligned(pc + 4);
|
||||
uint32_t mask = Load16Aligned(pc + 6);
|
||||
if (c != ((current_char - minus) & mask)) {
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
|
||||
} else {
|
||||
|
@ -712,8 +712,8 @@ IrregexpInterpreter::Result RawMatch(
|
|||
DISPATCH();
|
||||
}
|
||||
BYTECODE(CHECK_CHAR_IN_RANGE) {
|
||||
uint32_t from = Load16AlignedUnsigned(pc + 4);
|
||||
uint32_t to = Load16AlignedUnsigned(pc + 6);
|
||||
uint32_t from = Load16Aligned(pc + 4);
|
||||
uint32_t to = Load16Aligned(pc + 6);
|
||||
if (from <= current_char && current_char <= to) {
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
|
||||
} else {
|
||||
|
@ -722,8 +722,8 @@ IrregexpInterpreter::Result RawMatch(
|
|||
DISPATCH();
|
||||
}
|
||||
BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
|
||||
uint32_t from = Load16AlignedUnsigned(pc + 4);
|
||||
uint32_t to = Load16AlignedUnsigned(pc + 6);
|
||||
uint32_t from = Load16Aligned(pc + 4);
|
||||
uint32_t to = Load16Aligned(pc + 6);
|
||||
if (from > current_char || current_char > to) {
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
|
||||
} else {
|
||||
|
@ -914,7 +914,7 @@ IrregexpInterpreter::Result RawMatch(
|
|||
BYTECODE(SKIP_UNTIL_CHAR) {
|
||||
int32_t load_offset = LoadPacked24Signed(insn);
|
||||
int32_t advance = Load16AlignedSigned(pc + 4);
|
||||
uint32_t c = Load16AlignedUnsigned(pc + 6);
|
||||
uint32_t c = Load16Aligned(pc + 6);
|
||||
while (IndexIsInBounds(current + load_offset, subject.length())) {
|
||||
current_char = subject[current + load_offset];
|
||||
if (c == current_char) {
|
||||
|
@ -929,7 +929,7 @@ IrregexpInterpreter::Result RawMatch(
|
|||
BYTECODE(SKIP_UNTIL_CHAR_AND) {
|
||||
int32_t load_offset = LoadPacked24Signed(insn);
|
||||
int32_t advance = Load16AlignedSigned(pc + 4);
|
||||
uint16_t c = Load16AlignedUnsigned(pc + 6);
|
||||
uint16_t c = Load16Aligned(pc + 6);
|
||||
uint32_t mask = Load32Aligned(pc + 8);
|
||||
int32_t maximum_offset = Load32Aligned(pc + 12);
|
||||
while (static_cast<uintptr_t>(current + maximum_offset) <=
|
||||
|
@ -947,7 +947,7 @@ IrregexpInterpreter::Result RawMatch(
|
|||
BYTECODE(SKIP_UNTIL_CHAR_POS_CHECKED) {
|
||||
int32_t load_offset = LoadPacked24Signed(insn);
|
||||
int32_t advance = Load16AlignedSigned(pc + 4);
|
||||
uint16_t c = Load16AlignedUnsigned(pc + 6);
|
||||
uint16_t c = Load16Aligned(pc + 6);
|
||||
int32_t maximum_offset = Load32Aligned(pc + 8);
|
||||
while (static_cast<uintptr_t>(current + maximum_offset) <=
|
||||
static_cast<uintptr_t>(subject.length())) {
|
||||
|
@ -979,7 +979,7 @@ IrregexpInterpreter::Result RawMatch(
|
|||
BYTECODE(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE) {
|
||||
int32_t load_offset = LoadPacked24Signed(insn);
|
||||
int32_t advance = Load16AlignedSigned(pc + 4);
|
||||
uint16_t limit = Load16AlignedUnsigned(pc + 6);
|
||||
uint16_t limit = Load16Aligned(pc + 6);
|
||||
const uint8_t* table = pc + 8;
|
||||
while (IndexIsInBounds(current + load_offset, subject.length())) {
|
||||
current_char = subject[current + load_offset];
|
||||
|
@ -999,8 +999,8 @@ IrregexpInterpreter::Result RawMatch(
|
|||
BYTECODE(SKIP_UNTIL_CHAR_OR_CHAR) {
|
||||
int32_t load_offset = LoadPacked24Signed(insn);
|
||||
int32_t advance = Load32Aligned(pc + 4);
|
||||
uint16_t c = Load16AlignedUnsigned(pc + 8);
|
||||
uint16_t c2 = Load16AlignedUnsigned(pc + 10);
|
||||
uint16_t c = Load16Aligned(pc + 8);
|
||||
uint16_t c2 = Load16Aligned(pc + 10);
|
||||
while (IndexIsInBounds(current + load_offset, subject.length())) {
|
||||
current_char = subject[current + load_offset];
|
||||
// The two if-statements below are split up intentionally, as combining
|
||||
|
@ -1047,29 +1047,29 @@ IrregexpInterpreter::Result RawMatch(
|
|||
|
||||
// static
|
||||
IrregexpInterpreter::Result IrregexpInterpreter::Match(
|
||||
Isolate* isolate, Tagged<JSRegExp> regexp, Tagged<String> subject_string,
|
||||
Isolate* isolate, JSRegExp regexp, String subject_string,
|
||||
int* output_registers, int output_register_count, int start_position,
|
||||
RegExp::CallOrigin call_origin) {
|
||||
if (v8_flags.regexp_tier_up) regexp->TierUpTick();
|
||||
if (v8_flags.regexp_tier_up) regexp.TierUpTick();
|
||||
|
||||
bool is_one_byte = String::IsOneByteRepresentationUnderneath(subject_string);
|
||||
Tagged<ByteArray> code_array = ByteArray::cast(regexp->bytecode(is_one_byte));
|
||||
int total_register_count = regexp->max_register_count();
|
||||
ByteArray code_array = ByteArray::cast(regexp.bytecode(is_one_byte));
|
||||
int total_register_count = regexp.max_register_count();
|
||||
|
||||
return MatchInternal(isolate, code_array, subject_string, output_registers,
|
||||
output_register_count, total_register_count,
|
||||
start_position, call_origin, regexp->backtrack_limit());
|
||||
start_position, call_origin, regexp.backtrack_limit());
|
||||
}
|
||||
|
||||
IrregexpInterpreter::Result IrregexpInterpreter::MatchInternal(
|
||||
Isolate* isolate, Tagged<ByteArray> code_array,
|
||||
Tagged<String> subject_string, int* output_registers,
|
||||
int output_register_count, int total_register_count, int start_position,
|
||||
RegExp::CallOrigin call_origin, uint32_t backtrack_limit) {
|
||||
DCHECK(subject_string->IsFlat());
|
||||
Isolate* isolate, ByteArray code_array, String subject_string,
|
||||
int* output_registers, int output_register_count, int total_register_count,
|
||||
int start_position, RegExp::CallOrigin call_origin,
|
||||
uint32_t backtrack_limit) {
|
||||
DCHECK(subject_string.IsFlat());
|
||||
|
||||
// TODO(chromium:1262676): Remove this CHECK once fixed.
|
||||
CHECK(IsByteArray(code_array));
|
||||
CHECK(code_array.IsByteArray());
|
||||
|
||||
// Note: Heap allocation *is* allowed in two situations if calling from
|
||||
// Runtime:
|
||||
|
@ -1080,7 +1080,7 @@ IrregexpInterpreter::Result IrregexpInterpreter::MatchInternal(
|
|||
DisallowGarbageCollection no_gc;
|
||||
|
||||
base::uc16 previous_char = '\n';
|
||||
String::FlatContent subject_content = subject_string->GetFlatContent(no_gc);
|
||||
String::FlatContent subject_content = subject_string.GetFlatContent(no_gc);
|
||||
// Because interrupts can result in GC and string content relocation, the
|
||||
// checksum verification in FlatContent may fail even though this code is
|
||||
// safe. See (2) above.
|
||||
|
@ -1122,10 +1122,10 @@ IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs(
|
|||
DisallowHandleAllocation no_handles;
|
||||
DisallowHandleDereference no_deref;
|
||||
|
||||
Tagged<String> subject_string = String::cast(Tagged<Object>(subject));
|
||||
Tagged<JSRegExp> regexp_obj = JSRegExp::cast(Tagged<Object>(regexp));
|
||||
String subject_string = String::cast(Object(subject));
|
||||
JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));
|
||||
|
||||
if (regexp_obj->MarkedForTierUp()) {
|
||||
if (regexp_obj.MarkedForTierUp()) {
|
||||
// Returning RETRY will re-enter through runtime, where actual recompilation
|
||||
// for tier-up takes place.
|
||||
return IrregexpInterpreter::RETRY;
|
||||
|
|
|
@ -49,18 +49,17 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic {
|
|||
RegExp::CallOrigin call_origin,
|
||||
Isolate* isolate, Address regexp);
|
||||
|
||||
static Result MatchInternal(Isolate* isolate, Tagged<ByteArray> code_array,
|
||||
Tagged<String> subject_string,
|
||||
int* output_registers, int output_register_count,
|
||||
static Result MatchInternal(Isolate* isolate, ByteArray code_array,
|
||||
String subject_string, int* output_registers,
|
||||
int output_register_count,
|
||||
int total_register_count, int start_position,
|
||||
RegExp::CallOrigin call_origin,
|
||||
uint32_t backtrack_limit);
|
||||
|
||||
private:
|
||||
static Result Match(Isolate* isolate, Tagged<JSRegExp> regexp,
|
||||
Tagged<String> subject_string, int* output_registers,
|
||||
int output_register_count, int start_position,
|
||||
RegExp::CallOrigin call_origin);
|
||||
static Result Match(Isolate* isolate, JSRegExp regexp, String subject_string,
|
||||
int* output_registers, int output_register_count,
|
||||
int start_position, RegExp::CallOrigin call_origin);
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
|
|
@ -182,25 +182,24 @@ uint32_t RegExpMacroAssembler::IsCharacterInRangeArray(uint32_t current_char,
|
|||
static constexpr uint32_t kTrue = 1;
|
||||
static constexpr uint32_t kFalse = 0;
|
||||
|
||||
Tagged<FixedUInt16Array> ranges =
|
||||
FixedUInt16Array::cast(Tagged<Object>(raw_byte_array));
|
||||
DCHECK_GE(ranges->length(), 1);
|
||||
FixedUInt16Array ranges = FixedUInt16Array::cast(Object(raw_byte_array));
|
||||
DCHECK_GE(ranges.length(), 1);
|
||||
|
||||
// Shortcut for fully out of range chars.
|
||||
if (current_char < ranges->get(0)) return kFalse;
|
||||
if (current_char >= ranges->get(ranges->length() - 1)) {
|
||||
if (current_char < ranges.get(0)) return kFalse;
|
||||
if (current_char >= ranges.get(ranges.length() - 1)) {
|
||||
// The last range may be open-ended.
|
||||
return (ranges->length() % 2) == 0 ? kFalse : kTrue;
|
||||
return (ranges.length() % 2) == 0 ? kFalse : kTrue;
|
||||
}
|
||||
|
||||
// Binary search for the matching range. `ranges` is encoded as
|
||||
// [from0, to0, from1, to1, ..., fromN, toN], or
|
||||
// [from0, to0, from1, to1, ..., fromN] (open-ended last interval).
|
||||
|
||||
int mid, lower = 0, upper = ranges->length();
|
||||
int mid, lower = 0, upper = ranges.length();
|
||||
do {
|
||||
mid = lower + (upper - lower) / 2;
|
||||
const base::uc16 elem = ranges->get(mid);
|
||||
const base::uc16 elem = ranges.get(mid);
|
||||
if (current_char < elem) {
|
||||
upper = mid;
|
||||
} else if (current_char > elem) {
|
||||
|
@ -211,7 +210,7 @@ uint32_t RegExpMacroAssembler::IsCharacterInRangeArray(uint32_t current_char,
|
|||
}
|
||||
} while (lower < upper);
|
||||
|
||||
const bool current_char_ge_last_elem = current_char >= ranges->get(mid);
|
||||
const bool current_char_ge_last_elem = current_char >= ranges.get(mid);
|
||||
const int current_range_start_index =
|
||||
current_char_ge_last_elem ? mid : mid - 1;
|
||||
|
||||
|
@ -278,16 +277,15 @@ bool NativeRegExpMacroAssembler::CanReadUnaligned() const {
|
|||
// static
|
||||
int NativeRegExpMacroAssembler::CheckStackGuardState(
|
||||
Isolate* isolate, int start_index, RegExp::CallOrigin call_origin,
|
||||
Address* return_address, Tagged<InstructionStream> re_code,
|
||||
Address* subject, const uint8_t** input_start, const uint8_t** input_end,
|
||||
uintptr_t gap) {
|
||||
Address* return_address, InstructionStream re_code, Address* subject,
|
||||
const uint8_t** input_start, const uint8_t** input_end) {
|
||||
DisallowGarbageCollection no_gc;
|
||||
Address old_pc = PointerAuthentication::AuthenticatePC(return_address, 0);
|
||||
DCHECK_LE(re_code->instruction_start(), old_pc);
|
||||
DCHECK_LE(old_pc, re_code->code(kAcquireLoad)->instruction_end());
|
||||
DCHECK_LE(re_code.instruction_start(), old_pc);
|
||||
DCHECK_LE(old_pc, re_code.code(kAcquireLoad).instruction_end());
|
||||
|
||||
StackLimitCheck check(isolate);
|
||||
bool js_has_overflowed = check.JsHasOverflowed(gap);
|
||||
bool js_has_overflowed = check.JsHasOverflowed();
|
||||
|
||||
if (call_origin == RegExp::CallOrigin::kFromJs) {
|
||||
// Direct calls from JavaScript can be interrupted in two ways:
|
||||
|
@ -312,8 +310,7 @@ int NativeRegExpMacroAssembler::CheckStackGuardState(
|
|||
// Prepare for possible GC.
|
||||
HandleScope handles(isolate);
|
||||
Handle<InstructionStream> code_handle(re_code, isolate);
|
||||
Handle<String> subject_handle(String::cast(Tagged<Object>(*subject)),
|
||||
isolate);
|
||||
Handle<String> subject_handle(String::cast(Object(*subject)), isolate);
|
||||
bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject_handle);
|
||||
int return_value = 0;
|
||||
|
||||
|
@ -325,8 +322,8 @@ int NativeRegExpMacroAssembler::CheckStackGuardState(
|
|||
return_value = EXCEPTION;
|
||||
} else if (check.InterruptRequested()) {
|
||||
AllowGarbageCollection yes_gc;
|
||||
Tagged<Object> result = isolate->stack_guard()->HandleInterrupts();
|
||||
if (IsException(result, isolate)) return_value = EXCEPTION;
|
||||
Object result = isolate->stack_guard()->HandleInterrupts();
|
||||
if (result.IsException(isolate)) return_value = EXCEPTION;
|
||||
}
|
||||
|
||||
// We are not using operator == here because it does a slow DCHECK
|
||||
|
@ -374,34 +371,34 @@ int NativeRegExpMacroAssembler::Match(Handle<JSRegExp> regexp,
|
|||
// DisallowGarbageCollection, since regexps might be preempted, and another
|
||||
// thread might do allocation anyway.
|
||||
|
||||
Tagged<String> subject_ptr = *subject;
|
||||
String subject_ptr = *subject;
|
||||
// Character offsets into string.
|
||||
int start_offset = previous_index;
|
||||
int char_length = subject_ptr->length() - start_offset;
|
||||
int char_length = subject_ptr.length() - start_offset;
|
||||
int slice_offset = 0;
|
||||
|
||||
// The string has been flattened, so if it is a cons string it contains the
|
||||
// full string in the first part.
|
||||
if (StringShape(subject_ptr).IsCons()) {
|
||||
DCHECK_EQ(0, ConsString::cast(subject_ptr)->second()->length());
|
||||
subject_ptr = ConsString::cast(subject_ptr)->first();
|
||||
DCHECK_EQ(0, ConsString::cast(subject_ptr).second().length());
|
||||
subject_ptr = ConsString::cast(subject_ptr).first();
|
||||
} else if (StringShape(subject_ptr).IsSliced()) {
|
||||
Tagged<SlicedString> slice = SlicedString::cast(subject_ptr);
|
||||
subject_ptr = slice->parent();
|
||||
slice_offset = slice->offset();
|
||||
SlicedString slice = SlicedString::cast(subject_ptr);
|
||||
subject_ptr = slice.parent();
|
||||
slice_offset = slice.offset();
|
||||
}
|
||||
if (StringShape(subject_ptr).IsThin()) {
|
||||
subject_ptr = ThinString::cast(subject_ptr)->actual();
|
||||
subject_ptr = ThinString::cast(subject_ptr).actual();
|
||||
}
|
||||
// Ensure that an underlying string has the same representation.
|
||||
bool is_one_byte = subject_ptr->IsOneByteRepresentation();
|
||||
DCHECK(IsExternalString(subject_ptr) || IsSeqString(subject_ptr));
|
||||
bool is_one_byte = subject_ptr.IsOneByteRepresentation();
|
||||
DCHECK(subject_ptr.IsExternalString() || subject_ptr.IsSeqString());
|
||||
// String is now either Sequential or External
|
||||
int char_size_shift = is_one_byte ? 0 : 1;
|
||||
|
||||
DisallowGarbageCollection no_gc;
|
||||
const uint8_t* input_start =
|
||||
subject_ptr->AddressOfCharacterAt(start_offset + slice_offset, no_gc);
|
||||
subject_ptr.AddressOfCharacterAt(start_offset + slice_offset, no_gc);
|
||||
int byte_length = char_length << char_size_shift;
|
||||
const uint8_t* input_end = input_start + byte_length;
|
||||
return Execute(*subject, start_offset, input_start, input_end, offsets_vector,
|
||||
|
@ -410,9 +407,9 @@ int NativeRegExpMacroAssembler::Match(Handle<JSRegExp> regexp,
|
|||
|
||||
// static
|
||||
int NativeRegExpMacroAssembler::ExecuteForTesting(
|
||||
Tagged<String> input, int start_offset, const uint8_t* input_start,
|
||||
String input, int start_offset, const uint8_t* input_start,
|
||||
const uint8_t* input_end, int* output, int output_size, Isolate* isolate,
|
||||
Tagged<JSRegExp> regexp) {
|
||||
JSRegExp regexp) {
|
||||
return Execute(input, start_offset, input_start, input_end, output,
|
||||
output_size, isolate, regexp);
|
||||
}
|
||||
|
@ -422,14 +419,13 @@ int NativeRegExpMacroAssembler::ExecuteForTesting(
|
|||
// the signature of the interpreter. We should get rid of JS objects passed to
|
||||
// internal methods.
|
||||
int NativeRegExpMacroAssembler::Execute(
|
||||
Tagged<String>
|
||||
input, // This needs to be the unpacked (sliced, cons) string.
|
||||
String input, // This needs to be the unpacked (sliced, cons) string.
|
||||
int start_offset, const uint8_t* input_start, const uint8_t* input_end,
|
||||
int* output, int output_size, Isolate* isolate, Tagged<JSRegExp> regexp) {
|
||||
int* output, int output_size, Isolate* isolate, JSRegExp regexp) {
|
||||
RegExpStackScope stack_scope(isolate);
|
||||
|
||||
bool is_one_byte = String::IsOneByteRepresentationUnderneath(input);
|
||||
Tagged<Code> code = Code::cast(regexp->code(isolate, is_one_byte));
|
||||
Code code = Code::cast(regexp.code(is_one_byte));
|
||||
RegExp::CallOrigin call_origin = RegExp::CallOrigin::kFromRuntime;
|
||||
|
||||
using RegexpMatcherSig =
|
||||
|
@ -443,7 +439,7 @@ int NativeRegExpMacroAssembler::Execute(
|
|||
output, output_size, call_origin, isolate, regexp.ptr());
|
||||
DCHECK_GE(result, SMALLEST_REGEXP_RESULT);
|
||||
|
||||
if (result == EXCEPTION && !isolate->has_exception()) {
|
||||
if (result == EXCEPTION && !isolate->has_pending_exception()) {
|
||||
// We detected a stack overflow (on the backtrack stack) in RegExp code,
|
||||
// but haven't created the exception yet. Additionally, we allow heap
|
||||
// allocation because even though it invalidates {input_start} and
|
||||
|
|
|
@ -301,10 +301,12 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
|
|||
int* offsets_vector, int offsets_vector_length,
|
||||
int previous_index, Isolate* isolate);
|
||||
|
||||
V8_EXPORT_PRIVATE static int ExecuteForTesting(
|
||||
Tagged<String> input, int start_offset, const uint8_t* input_start,
|
||||
const uint8_t* input_end, int* output, int output_size, Isolate* isolate,
|
||||
Tagged<JSRegExp> regexp);
|
||||
V8_EXPORT_PRIVATE static int ExecuteForTesting(String input, int start_offset,
|
||||
const uint8_t* input_start,
|
||||
const uint8_t* input_end,
|
||||
int* output, int output_size,
|
||||
Isolate* isolate,
|
||||
JSRegExp regexp);
|
||||
|
||||
bool CanReadUnaligned() const override;
|
||||
|
||||
|
@ -328,9 +330,9 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
|
|||
static int CheckStackGuardState(Isolate* isolate, int start_index,
|
||||
RegExp::CallOrigin call_origin,
|
||||
Address* return_address,
|
||||
Tagged<InstructionStream> re_code,
|
||||
Address* subject, const uint8_t** input_start,
|
||||
const uint8_t** input_end, uintptr_t gap);
|
||||
InstructionStream re_code, Address* subject,
|
||||
const uint8_t** input_start,
|
||||
const uint8_t** input_end);
|
||||
|
||||
static Address word_character_map_address() {
|
||||
return reinterpret_cast<Address>(&word_character_map[0]);
|
||||
|
@ -346,10 +348,9 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
|
|||
|
||||
private:
|
||||
// Returns a {Result} sentinel, or the number of successful matches.
|
||||
static int Execute(Tagged<String> input, int start_offset,
|
||||
const uint8_t* input_start, const uint8_t* input_end,
|
||||
int* output, int output_size, Isolate* isolate,
|
||||
Tagged<JSRegExp> regexp);
|
||||
static int Execute(String input, int start_offset, const uint8_t* input_start,
|
||||
const uint8_t* input_end, int* output, int output_size,
|
||||
Isolate* isolate, JSRegExp regexp);
|
||||
|
||||
ZoneUnorderedMap<uint32_t, Handle<FixedUInt16Array>> range_array_cache_;
|
||||
};
|
||||
|
|
|
@ -318,8 +318,7 @@ class ActionNode : public SeqRegExpNode {
|
|||
BEGIN_NEGATIVE_SUBMATCH,
|
||||
POSITIVE_SUBMATCH_SUCCESS,
|
||||
EMPTY_MATCH_CHECK,
|
||||
CLEAR_CAPTURES,
|
||||
MODIFY_FLAGS
|
||||
CLEAR_CAPTURES
|
||||
};
|
||||
static ActionNode* SetRegisterForLoop(int reg, int val,
|
||||
RegExpNode* on_success);
|
||||
|
@ -342,7 +341,6 @@ class ActionNode : public SeqRegExpNode {
|
|||
int repetition_register,
|
||||
int repetition_limit,
|
||||
RegExpNode* on_success);
|
||||
static ActionNode* ModifyFlags(RegExpFlags flags, RegExpNode* on_success);
|
||||
void Accept(NodeVisitor* visitor) override;
|
||||
void Emit(RegExpCompiler* compiler, Trace* trace) override;
|
||||
void GetQuickCheckDetails(QuickCheckDetails* details,
|
||||
|
@ -355,10 +353,6 @@ class ActionNode : public SeqRegExpNode {
|
|||
int GreedyLoopTextLength() override {
|
||||
return kNodeIsTooComplexForGreedyLoops;
|
||||
}
|
||||
RegExpFlags flags() {
|
||||
DCHECK_EQ(action_type(), MODIFY_FLAGS);
|
||||
return RegExpFlags{data_.u_modify_flags.flags};
|
||||
}
|
||||
|
||||
private:
|
||||
union {
|
||||
|
@ -388,13 +382,9 @@ class ActionNode : public SeqRegExpNode {
|
|||
int range_from;
|
||||
int range_to;
|
||||
} u_clear_captures;
|
||||
struct {
|
||||
int flags;
|
||||
} u_modify_flags;
|
||||
} data_;
|
||||
ActionNode(ActionType action_type, RegExpNode* on_success)
|
||||
: SeqRegExpNode(on_success), action_type_(action_type) {}
|
||||
|
||||
ActionType action_type_;
|
||||
friend class DotPrinterImpl;
|
||||
friend Zone;
|
||||
|
@ -509,11 +499,12 @@ class AssertionNode : public SeqRegExpNode {
|
|||
|
||||
class BackReferenceNode : public SeqRegExpNode {
|
||||
public:
|
||||
BackReferenceNode(int start_reg, int end_reg, bool read_backward,
|
||||
RegExpNode* on_success)
|
||||
BackReferenceNode(int start_reg, int end_reg, RegExpFlags flags,
|
||||
bool read_backward, RegExpNode* on_success)
|
||||
: SeqRegExpNode(on_success),
|
||||
start_reg_(start_reg),
|
||||
end_reg_(end_reg),
|
||||
flags_(flags),
|
||||
read_backward_(read_backward) {}
|
||||
void Accept(NodeVisitor* visitor) override;
|
||||
int start_register() { return start_reg_; }
|
||||
|
@ -531,6 +522,7 @@ class BackReferenceNode : public SeqRegExpNode {
|
|||
private:
|
||||
int start_reg_;
|
||||
int end_reg_;
|
||||
RegExpFlags flags_;
|
||||
bool read_backward_;
|
||||
};
|
||||
|
||||
|
|
|
@ -67,7 +67,8 @@ class RegExpTextBuilder {
|
|||
bool ignore_case() const { return IsIgnoreCase(flags_); }
|
||||
bool IsUnicodeMode() const {
|
||||
// Either /v or /u enable UnicodeMode
|
||||
// https://tc39.es/ecma262/#sec-parsepattern
|
||||
// TODO(v8:11935): Change permalink once proposal is in stage 4.
|
||||
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#sec-parsepattern
|
||||
return IsUnicode(flags_) || IsUnicodeSets(flags_);
|
||||
}
|
||||
Zone* zone() const { return zone_; }
|
||||
|
@ -263,7 +264,7 @@ RegExpTree* RegExpTextBuilder::PopLastAtom() {
|
|||
characters_ = nullptr;
|
||||
atom = zone()->New<RegExpAtom>(char_vector);
|
||||
return atom;
|
||||
} else if (!text_.empty()) {
|
||||
} else if (text_.size() > 0) {
|
||||
atom = text_.back();
|
||||
text_.pop_back();
|
||||
return atom;
|
||||
|
@ -314,7 +315,8 @@ class RegExpBuilder {
|
|||
void FlushTerms();
|
||||
bool IsUnicodeMode() const {
|
||||
// Either /v or /u enable UnicodeMode
|
||||
// https://tc39.es/ecma262/#sec-parsepattern
|
||||
// TODO(v8:11935): Change permalink once proposal is in stage 4.
|
||||
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#sec-parsepattern
|
||||
return IsUnicode(flags_) || IsUnicodeSets(flags_);
|
||||
}
|
||||
Zone* zone() const { return zone_; }
|
||||
|
@ -461,22 +463,17 @@ class RegExpParserImpl final {
|
|||
RegExpTree* ParseClassSetOperand(const RegExpBuilder* builder,
|
||||
ClassSetOperandType* type_out,
|
||||
ZoneList<CharacterRange>* ranges,
|
||||
CharacterClassStrings* strings,
|
||||
base::uc32* character);
|
||||
CharacterClassStrings* strings);
|
||||
base::uc32 ParseClassSetCharacter();
|
||||
// Parses and returns a single escaped character.
|
||||
base::uc32 ParseCharacterEscape(InClassEscapeState in_class_escape_state,
|
||||
bool* is_escaped_unicode_character);
|
||||
|
||||
void AddMaybeSimpleCaseFoldedRange(ZoneList<CharacterRange>* ranges,
|
||||
CharacterRange new_range);
|
||||
|
||||
RegExpTree* ParseClassUnion(const RegExpBuilder* builder, bool is_negated,
|
||||
RegExpTree* first_operand,
|
||||
ClassSetOperandType first_operand_type,
|
||||
ZoneList<CharacterRange>* ranges,
|
||||
CharacterClassStrings* strings,
|
||||
base::uc32 first_character);
|
||||
CharacterClassStrings* strings);
|
||||
RegExpTree* ParseClassIntersection(const RegExpBuilder* builder,
|
||||
bool is_negated, RegExpTree* first_operand,
|
||||
ClassSetOperandType first_operand_type);
|
||||
|
@ -507,10 +504,11 @@ class RegExpParserImpl final {
|
|||
int captures_started() const { return captures_started_; }
|
||||
int position() const { return next_pos_ - 1; }
|
||||
bool failed() const { return failed_; }
|
||||
RegExpFlags flags() const { return flags_; }
|
||||
RegExpFlags flags() const { return top_level_flags_; }
|
||||
bool IsUnicodeMode() const {
|
||||
// Either /v or /u enable UnicodeMode
|
||||
// https://tc39.es/ecma262/#sec-parsepattern
|
||||
// TODO(v8:11935): Change permalink once proposal is in stage 4.
|
||||
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#sec-parsepattern
|
||||
return IsUnicode(flags()) || IsUnicodeSets(flags()) || force_unicode_;
|
||||
}
|
||||
bool unicode_sets() const { return IsUnicodeSets(flags()); }
|
||||
|
@ -597,11 +595,10 @@ class RegExpParserImpl final {
|
|||
ZoneList<RegExpCapture*>* captures_;
|
||||
ZoneSet<RegExpCapture*, RegExpCaptureNameLess>* named_captures_;
|
||||
ZoneList<RegExpBackReference*>* named_back_references_;
|
||||
ZoneList<CharacterRange>* temp_ranges_;
|
||||
const CharT* const input_;
|
||||
const int input_length_;
|
||||
base::uc32 current_;
|
||||
RegExpFlags flags_;
|
||||
const RegExpFlags top_level_flags_;
|
||||
bool force_unicode_ = false; // Force parser to act as if unicode were set.
|
||||
int next_pos_;
|
||||
int captures_started_;
|
||||
|
@ -628,7 +625,7 @@ RegExpParserImpl<CharT>::RegExpParserImpl(
|
|||
input_(input),
|
||||
input_length_(input_length),
|
||||
current_(kEndMarker),
|
||||
flags_(flags),
|
||||
top_level_flags_(flags),
|
||||
next_pos_(0),
|
||||
captures_started_(0),
|
||||
capture_count_(0),
|
||||
|
@ -919,7 +916,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseDisjunction() {
|
|||
capture->set_body(body);
|
||||
body = capture;
|
||||
} else if (group_type == GROUPING) {
|
||||
body = zone()->template New<RegExpGroup>(body, builder->flags());
|
||||
body = zone()->template New<RegExpGroup>(body);
|
||||
} else {
|
||||
DCHECK(group_type == POSITIVE_LOOKAROUND ||
|
||||
group_type == NEGATIVE_LOOKAROUND);
|
||||
|
@ -987,7 +984,6 @@ RegExpTree* RegExpParserImpl<CharT>::ParseDisjunction() {
|
|||
case '(': {
|
||||
state = ParseOpenParenthesis(state CHECK_FAILED);
|
||||
builder = state->builder();
|
||||
flags_ = builder->flags();
|
||||
continue;
|
||||
}
|
||||
case '[': {
|
||||
|
@ -1041,8 +1037,8 @@ RegExpTree* RegExpParserImpl<CharT>::ParseDisjunction() {
|
|||
builder->AddEmpty();
|
||||
} else {
|
||||
RegExpCapture* capture = GetCapture(index);
|
||||
RegExpTree* atom =
|
||||
zone()->template New<RegExpBackReference>(capture);
|
||||
RegExpTree* atom = zone()->template New<RegExpBackReference>(
|
||||
capture, builder->flags());
|
||||
builder->AddAtom(atom);
|
||||
}
|
||||
break;
|
||||
|
@ -1250,91 +1246,43 @@ RegExpParserState* RegExpParserImpl<CharT>::ParseOpenParenthesis(
|
|||
bool is_named_capture = false;
|
||||
const ZoneVector<base::uc16>* capture_name = nullptr;
|
||||
SubexpressionType subexpr_type = CAPTURE;
|
||||
RegExpFlags flags = state->builder()->flags();
|
||||
bool parsing_modifiers = false;
|
||||
bool modifiers_polarity = true;
|
||||
RegExpFlags modifiers;
|
||||
Advance();
|
||||
if (current() == '?') {
|
||||
do {
|
||||
switch (Next()) {
|
||||
case '-':
|
||||
if (!v8_flags.js_regexp_modifiers) {
|
||||
ReportError(RegExpError::kInvalidGroup);
|
||||
return nullptr;
|
||||
}
|
||||
Advance();
|
||||
parsing_modifiers = true;
|
||||
if (modifiers_polarity == false) {
|
||||
ReportError(RegExpError::kMultipleFlagDashes);
|
||||
return nullptr;
|
||||
}
|
||||
modifiers_polarity = false;
|
||||
break;
|
||||
case 'm':
|
||||
case 'i':
|
||||
case 's': {
|
||||
if (!v8_flags.js_regexp_modifiers) {
|
||||
ReportError(RegExpError::kInvalidGroup);
|
||||
return nullptr;
|
||||
}
|
||||
Advance();
|
||||
parsing_modifiers = true;
|
||||
RegExpFlag flag = TryRegExpFlagFromChar(current()).value();
|
||||
if ((modifiers & flag) != 0) {
|
||||
ReportError(RegExpError::kRepeatedFlag);
|
||||
return nullptr;
|
||||
}
|
||||
modifiers |= flag;
|
||||
flags.set(flag, modifiers_polarity);
|
||||
break;
|
||||
}
|
||||
case ':':
|
||||
switch (Next()) {
|
||||
case ':':
|
||||
Advance(2);
|
||||
subexpr_type = GROUPING;
|
||||
break;
|
||||
case '=':
|
||||
Advance(2);
|
||||
lookaround_type = RegExpLookaround::LOOKAHEAD;
|
||||
subexpr_type = POSITIVE_LOOKAROUND;
|
||||
break;
|
||||
case '!':
|
||||
Advance(2);
|
||||
lookaround_type = RegExpLookaround::LOOKAHEAD;
|
||||
subexpr_type = NEGATIVE_LOOKAROUND;
|
||||
break;
|
||||
case '<':
|
||||
Advance();
|
||||
if (Next() == '=') {
|
||||
Advance(2);
|
||||
parsing_modifiers = false;
|
||||
subexpr_type = GROUPING;
|
||||
break;
|
||||
case '=':
|
||||
Advance(2);
|
||||
parsing_modifiers = false;
|
||||
lookaround_type = RegExpLookaround::LOOKAHEAD;
|
||||
lookaround_type = RegExpLookaround::LOOKBEHIND;
|
||||
subexpr_type = POSITIVE_LOOKAROUND;
|
||||
break;
|
||||
case '!':
|
||||
} else if (Next() == '!') {
|
||||
Advance(2);
|
||||
parsing_modifiers = false;
|
||||
lookaround_type = RegExpLookaround::LOOKAHEAD;
|
||||
lookaround_type = RegExpLookaround::LOOKBEHIND;
|
||||
subexpr_type = NEGATIVE_LOOKAROUND;
|
||||
break;
|
||||
case '<':
|
||||
Advance();
|
||||
parsing_modifiers = false;
|
||||
if (Next() == '=') {
|
||||
Advance(2);
|
||||
lookaround_type = RegExpLookaround::LOOKBEHIND;
|
||||
subexpr_type = POSITIVE_LOOKAROUND;
|
||||
break;
|
||||
} else if (Next() == '!') {
|
||||
Advance(2);
|
||||
lookaround_type = RegExpLookaround::LOOKBEHIND;
|
||||
subexpr_type = NEGATIVE_LOOKAROUND;
|
||||
break;
|
||||
}
|
||||
is_named_capture = true;
|
||||
has_named_captures_ = true;
|
||||
Advance();
|
||||
break;
|
||||
default:
|
||||
ReportError(RegExpError::kInvalidGroup);
|
||||
return nullptr;
|
||||
}
|
||||
} while (parsing_modifiers);
|
||||
}
|
||||
if (modifiers_polarity == false) {
|
||||
// We encountered a dash.
|
||||
if (modifiers == 0) {
|
||||
ReportError(RegExpError::kInvalidFlagGroup);
|
||||
return nullptr;
|
||||
}
|
||||
is_named_capture = true;
|
||||
has_named_captures_ = true;
|
||||
Advance();
|
||||
break;
|
||||
default:
|
||||
ReportError(RegExpError::kInvalidGroup);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
if (subexpr_type == CAPTURE) {
|
||||
|
@ -1351,7 +1299,7 @@ RegExpParserState* RegExpParserImpl<CharT>::ParseOpenParenthesis(
|
|||
// Store current state and begin new disjunction parsing.
|
||||
return zone()->template New<RegExpParserState>(
|
||||
state, subexpr_type, lookaround_type, captures_started_, capture_name,
|
||||
flags, zone());
|
||||
state->builder()->flags(), zone());
|
||||
}
|
||||
|
||||
// In order to know whether an escape is a backreference or not we have to scan
|
||||
|
@ -1609,7 +1557,8 @@ bool RegExpParserImpl<CharT>::ParseNamedBackReference(
|
|||
if (state->IsInsideCaptureGroup(name)) {
|
||||
builder->AddEmpty();
|
||||
} else {
|
||||
RegExpBackReference* atom = zone()->template New<RegExpBackReference>();
|
||||
RegExpBackReference* atom =
|
||||
zone()->template New<RegExpBackReference>(builder->flags());
|
||||
atom->set_name(name);
|
||||
|
||||
builder->AddAtom(atom);
|
||||
|
@ -1941,7 +1890,7 @@ bool LookupPropertyValueName(UProperty property,
|
|||
ExtractStringsFromUnicodeSet(set, result_strings, flags, zone);
|
||||
}
|
||||
const bool needs_case_folding = IsUnicodeSets(flags) && IsIgnoreCase(flags);
|
||||
if (needs_case_folding) set.closeOver(USET_SIMPLE_CASE_INSENSITIVE);
|
||||
if (needs_case_folding) CharacterRange::UnicodeSimpleCloseOver(set);
|
||||
set.removeAllStrings();
|
||||
if (negate) set.complement();
|
||||
for (int i = 0; i < set.getRangeCount(); i++) {
|
||||
|
@ -2147,22 +2096,13 @@ bool RegExpParserImpl<CharT>::AddPropertyClassRange(
|
|||
if (!IsSupportedBinaryProperty(property, unicode_sets())) return false;
|
||||
if (!IsExactPropertyAlias(name, property)) return false;
|
||||
// Negation of properties with strings is not allowed.
|
||||
// TODO(v8:11935): Change permalink once proposal is in stage 4.
|
||||
// See
|
||||
// https://tc39.es/ecma262/#sec-static-semantics-maycontainstrings
|
||||
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#sec-static-semantics-maycontainstrings
|
||||
if (negate && IsBinaryPropertyOfStrings(property)) return false;
|
||||
if (unicode_sets()) {
|
||||
// In /v mode we can't simple lookup the "false" binary property values,
|
||||
// as the spec requires us to perform case folding before calculating the
|
||||
// complement.
|
||||
// See https://tc39.es/ecma262/#sec-compiletocharset
|
||||
// UnicodePropertyValueExpression :: LoneUnicodePropertyNameOrValue
|
||||
return LookupPropertyValueName(property, "Y", negate, add_to_ranges,
|
||||
add_to_strings, flags(), zone());
|
||||
} else {
|
||||
return LookupPropertyValueName(property, negate ? "N" : "Y", false,
|
||||
add_to_ranges, add_to_strings, flags(),
|
||||
zone());
|
||||
}
|
||||
return LookupPropertyValueName(property, negate ? "N" : "Y", false,
|
||||
add_to_ranges, add_to_strings, flags(),
|
||||
zone());
|
||||
} else {
|
||||
// Both property name and value name are specified. Attempt to interpret
|
||||
// the property name as enumerated property.
|
||||
|
@ -2385,7 +2325,8 @@ base::uc32 RegExpParserImpl<CharT>::ParseCharacterEscape(
|
|||
return c;
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#prod-ClassRanges
|
||||
// TODO(v8:11935): Change permalink once proposal is in stage 4.
|
||||
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassRanges
|
||||
template <class CharT>
|
||||
RegExpTree* RegExpParserImpl<CharT>::ParseClassRanges(
|
||||
ZoneList<CharacterRange>* ranges, bool add_unicode_case_equivalents) {
|
||||
|
@ -2534,7 +2475,8 @@ void AddClassString(ZoneList<base::uc32>* normalized_string,
|
|||
|
||||
} // namespace
|
||||
|
||||
// https://tc39.es/ecma262/#prod-ClassStringDisjunction
|
||||
// TODO(v8:11935): Change permalink once proposal is in stage 4.
|
||||
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassStringDisjunction
|
||||
template <class CharT>
|
||||
RegExpTree* RegExpParserImpl<CharT>::ParseClassStringDisjunction(
|
||||
ZoneList<CharacterRange>* ranges, CharacterClassStrings* strings) {
|
||||
|
@ -2584,7 +2526,8 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassStringDisjunction(
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#prod-ClassSetOperand
|
||||
// TODO(v8:11935): Change permalink once proposal is in stage 4.
|
||||
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSetOperand
|
||||
// Tree returned based on type_out:
|
||||
// * kNestedClass: RegExpClassSetExpression
|
||||
// * For all other types: RegExpClassSetOperand
|
||||
|
@ -2595,13 +2538,12 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassSetOperand(
|
|||
zone()->template New<ZoneList<CharacterRange>>(1, zone());
|
||||
CharacterClassStrings* strings =
|
||||
zone()->template New<CharacterClassStrings>(zone());
|
||||
base::uc32 character;
|
||||
RegExpTree* tree = ParseClassSetOperand(builder, type_out, ranges, strings,
|
||||
&character CHECK_FAILED);
|
||||
RegExpTree* tree =
|
||||
ParseClassSetOperand(builder, type_out, ranges, strings CHECK_FAILED);
|
||||
DCHECK_IMPLIES(*type_out != ClassSetOperandType::kNestedClass,
|
||||
tree == nullptr);
|
||||
DCHECK_IMPLIES(*type_out == ClassSetOperandType::kClassSetCharacter,
|
||||
ranges->is_empty());
|
||||
ranges->length() == 1);
|
||||
DCHECK_IMPLIES(*type_out == ClassSetOperandType::kClassSetCharacter,
|
||||
strings->empty());
|
||||
DCHECK_IMPLIES(*type_out == ClassSetOperandType::kNestedClass,
|
||||
|
@ -2616,27 +2558,21 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassSetOperand(
|
|||
// CharacterClassEscape includes \p{}, which can contain ranges, strings or
|
||||
// both and \P{}, which could contain nothing (i.e. \P{Any}).
|
||||
if (tree == nullptr) {
|
||||
if (*type_out == ClassSetOperandType::kClassSetCharacter) {
|
||||
AddMaybeSimpleCaseFoldedRange(ranges,
|
||||
CharacterRange::Singleton(character));
|
||||
}
|
||||
tree = zone()->template New<RegExpClassSetOperand>(ranges, strings);
|
||||
}
|
||||
return tree;
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#prod-ClassSetOperand
|
||||
// Based on |type_out| either a tree is returned or
|
||||
// |ranges|/|strings|/|character| modified. If a tree is returned,
|
||||
// ranges/strings are not modified. If |type_out| is kNestedClass, a tree of
|
||||
// type RegExpClassSetExpression is returned. If | type_out| is
|
||||
// kClassSetCharacter, |character| is set and nullptr returned. For all other
|
||||
// types, |ranges|/|strings|/|character| is modified and nullptr is returned.
|
||||
// TODO(v8:11935): Change permalink once proposal is in stage 4.
|
||||
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSetOperand
|
||||
// Based on |type_out| either a tree is returned or ranges/strings modified.
|
||||
// If a tree is returned, ranges/strings are not modified.
|
||||
// If |type_out| is kNestedClass, a tree of type RegExpClassSetExpression is
|
||||
// returned. For all other types, ranges is modified and nullptr is returned.
|
||||
template <class CharT>
|
||||
RegExpTree* RegExpParserImpl<CharT>::ParseClassSetOperand(
|
||||
const RegExpBuilder* builder, ClassSetOperandType* type_out,
|
||||
ZoneList<CharacterRange>* ranges, CharacterClassStrings* strings,
|
||||
base::uc32* character) {
|
||||
ZoneList<CharacterRange>* ranges, CharacterClassStrings* strings) {
|
||||
DCHECK(unicode_sets());
|
||||
base::uc32 c = current();
|
||||
if (c == '\\') {
|
||||
|
@ -2663,7 +2599,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassSetOperand(
|
|||
|
||||
*type_out = ClassSetOperandType::kClassSetCharacter;
|
||||
c = ParseClassSetCharacter(CHECK_FAILED);
|
||||
*character = c;
|
||||
ranges->Add(CharacterRange::Singleton(c), zone());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -2717,28 +2653,13 @@ bool MayContainStrings(ClassSetOperandType type, RegExpTree* operand) {
|
|||
|
||||
} // namespace
|
||||
|
||||
template <class CharT>
|
||||
void RegExpParserImpl<CharT>::AddMaybeSimpleCaseFoldedRange(
|
||||
ZoneList<CharacterRange>* ranges, CharacterRange new_range) {
|
||||
DCHECK(unicode_sets());
|
||||
if (ignore_case()) {
|
||||
ZoneList<CharacterRange>* new_ranges =
|
||||
zone()->template New<ZoneList<CharacterRange>>(2, zone());
|
||||
new_ranges->Add(new_range, zone());
|
||||
CharacterRange::AddUnicodeCaseEquivalents(new_ranges, zone());
|
||||
ranges->AddAll(*new_ranges, zone());
|
||||
} else {
|
||||
ranges->Add(new_range, zone());
|
||||
}
|
||||
CharacterRange::Canonicalize(ranges);
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#prod-ClassUnion
|
||||
// TODO(v8:11935): Change permalink once proposal is in stage 4.
|
||||
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassUnion
|
||||
template <class CharT>
|
||||
RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
|
||||
const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand,
|
||||
ClassSetOperandType first_operand_type, ZoneList<CharacterRange>* ranges,
|
||||
CharacterClassStrings* strings, base::uc32 character) {
|
||||
CharacterClassStrings* strings) {
|
||||
DCHECK(unicode_sets());
|
||||
ZoneList<RegExpTree*>* operands =
|
||||
zone()->template New<ZoneList<RegExpTree*>>(2, zone());
|
||||
|
@ -2752,6 +2673,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
|
|||
operands->Add(first_operand, zone());
|
||||
}
|
||||
ClassSetOperandType last_type = first_operand_type;
|
||||
const bool needs_case_folding = ignore_case();
|
||||
while (has_more() && current() != ']') {
|
||||
if (current() == '-') {
|
||||
// Mix of ClassSetRange and ClassSubtraction is not allowed.
|
||||
|
@ -2768,36 +2690,42 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
|
|||
// represent a character range.
|
||||
// In case one of them is not a ClassSetCharacter, it is a syntax error,
|
||||
// as '-' can not be used unescaped within a class with /v.
|
||||
// TODO(v8:11935): Change permalink once proposal is in stage 4.
|
||||
// See
|
||||
// https://tc39.es/ecma262/#prod-ClassSetRange
|
||||
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSetRange
|
||||
if (last_type != ClassSetOperandType::kClassSetCharacter) {
|
||||
return ReportError(RegExpError::kInvalidCharacterClass);
|
||||
}
|
||||
base::uc32 from = character;
|
||||
ParseClassSetOperand(builder, &last_type, ranges, strings,
|
||||
&character CHECK_FAILED);
|
||||
ParseClassSetOperand(builder, &last_type, ranges, strings CHECK_FAILED);
|
||||
if (last_type != ClassSetOperandType::kClassSetCharacter) {
|
||||
return ReportError(RegExpError::kInvalidCharacterClass);
|
||||
}
|
||||
if (from > character) {
|
||||
// Remove the last two singleton characters added to ranges, and combine
|
||||
// them into a range.
|
||||
auto rhs_ranges = ranges->RemoveLast();
|
||||
auto lhs_ranges = ranges->RemoveLast();
|
||||
DCHECK(lhs_ranges.IsSingleton());
|
||||
DCHECK(rhs_ranges.IsSingleton());
|
||||
base::uc32 from = lhs_ranges.from();
|
||||
base::uc32 to = rhs_ranges.from();
|
||||
if (from > to) {
|
||||
return ReportError(RegExpError::kOutOfOrderCharacterClass);
|
||||
}
|
||||
AddMaybeSimpleCaseFoldedRange(ranges,
|
||||
CharacterRange::Range(from, character));
|
||||
ranges->Add(CharacterRange::Range(from, to), zone());
|
||||
last_type = ClassSetOperandType::kClassSetRange;
|
||||
} else {
|
||||
DCHECK_NE(current(), '-');
|
||||
if (last_type == ClassSetOperandType::kClassSetCharacter) {
|
||||
AddMaybeSimpleCaseFoldedRange(ranges,
|
||||
CharacterRange::Singleton(character));
|
||||
}
|
||||
RegExpTree* operand = ParseClassSetOperand(
|
||||
builder, &last_type, ranges, strings, &character CHECK_FAILED);
|
||||
RegExpTree* operand = ParseClassSetOperand(builder, &last_type, ranges,
|
||||
strings CHECK_FAILED);
|
||||
if (operand != nullptr) {
|
||||
may_contain_strings |= MayContainStrings(last_type, operand);
|
||||
// Add the range we started building as operand and reset the current
|
||||
// range.
|
||||
if (!ranges->is_empty() || !strings->empty()) {
|
||||
if (needs_case_folding) {
|
||||
CharacterRange::Canonicalize(ranges);
|
||||
CharacterRange::AddUnicodeCaseEquivalents(ranges, zone());
|
||||
}
|
||||
may_contain_strings |= !strings->empty();
|
||||
operands->Add(
|
||||
zone()->template New<RegExpClassSetOperand>(ranges, strings),
|
||||
|
@ -2814,12 +2742,12 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
|
|||
return ReportError(RegExpError::kUnterminatedCharacterClass);
|
||||
}
|
||||
|
||||
if (last_type == ClassSetOperandType::kClassSetCharacter) {
|
||||
AddMaybeSimpleCaseFoldedRange(ranges, CharacterRange::Singleton(character));
|
||||
}
|
||||
|
||||
// Add the range we started building as operand.
|
||||
if (!ranges->is_empty() || !strings->empty()) {
|
||||
if (needs_case_folding) {
|
||||
CharacterRange::Canonicalize(ranges);
|
||||
CharacterRange::AddUnicodeCaseEquivalents(ranges, zone());
|
||||
}
|
||||
may_contain_strings |= !strings->empty();
|
||||
operands->Add(zone()->template New<RegExpClassSetOperand>(ranges, strings),
|
||||
zone());
|
||||
|
@ -2845,7 +2773,8 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
|
|||
may_contain_strings, operands);
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#prod-ClassIntersection
|
||||
// TODO(v8:11935): Change permalink once proposal is in stage 4.
|
||||
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassIntersection
|
||||
template <class CharT>
|
||||
RegExpTree* RegExpParserImpl<CharT>::ParseClassIntersection(
|
||||
const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand,
|
||||
|
@ -2886,7 +2815,8 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassIntersection(
|
|||
may_contain_strings, operands);
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#prod-ClassSubtraction
|
||||
// TODO(v8:11935): Change permalink once proposal is in stage 4.
|
||||
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSubtraction
|
||||
template <class CharT>
|
||||
RegExpTree* RegExpParserImpl<CharT>::ParseClassSubtraction(
|
||||
const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand,
|
||||
|
@ -2961,16 +2891,12 @@ RegExpTree* RegExpParserImpl<CharT>::ParseCharacterClass(
|
|||
ClassSetOperandType operand_type;
|
||||
CharacterClassStrings* strings =
|
||||
zone()->template New<CharacterClassStrings>(zone());
|
||||
base::uc32 character;
|
||||
RegExpTree* operand = ParseClassSetOperand(
|
||||
builder, &operand_type, ranges, strings, &character CHECK_FAILED);
|
||||
RegExpTree* operand = ParseClassSetOperand(builder, &operand_type, ranges,
|
||||
strings CHECK_FAILED);
|
||||
switch (current()) {
|
||||
case '-':
|
||||
if (Next() == '-') {
|
||||
if (operand == nullptr) {
|
||||
if (operand_type == ClassSetOperandType::kClassSetCharacter) {
|
||||
ranges->Add(CharacterRange::Singleton(character), zone());
|
||||
}
|
||||
operand =
|
||||
zone()->template New<RegExpClassSetOperand>(ranges, strings);
|
||||
}
|
||||
|
@ -2982,9 +2908,6 @@ RegExpTree* RegExpParserImpl<CharT>::ParseCharacterClass(
|
|||
case '&':
|
||||
if (Next() == '&') {
|
||||
if (operand == nullptr) {
|
||||
if (operand_type == ClassSetOperandType::kClassSetCharacter) {
|
||||
ranges->Add(CharacterRange::Singleton(character), zone());
|
||||
}
|
||||
operand =
|
||||
zone()->template New<RegExpClassSetOperand>(ranges, strings);
|
||||
}
|
||||
|
@ -2993,7 +2916,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseCharacterClass(
|
|||
}
|
||||
}
|
||||
return ParseClassUnion(builder, is_negated, operand, operand_type, ranges,
|
||||
strings, character);
|
||||
strings);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3124,7 +3047,7 @@ bool RegExpBuilder::AddQuantifierToAtom(
|
|||
RegExpTree* atom = text_builder().PopLastAtom();
|
||||
if (atom != nullptr) {
|
||||
FlushText();
|
||||
} else if (!terms_.empty()) {
|
||||
} else if (terms_.size() > 0) {
|
||||
atom = terms_.back();
|
||||
terms_.pop_back();
|
||||
if (atom->IsLookaround()) {
|
||||
|
|
|
@ -87,8 +87,8 @@ class RegExp final : public AllStatic {
|
|||
RegExpFlags flags, uint32_t backtrack_limit);
|
||||
|
||||
// Ensures that a regexp is fully compiled and ready to be executed on a
|
||||
// subject string. Returns true on success. Throw and return false on
|
||||
// failure.
|
||||
// subject string. Returns true on success. Return false on failure, and
|
||||
// then an exception will be pending.
|
||||
V8_WARN_UNUSED_RESULT static bool EnsureFullyCompiled(Isolate* isolate,
|
||||
Handle<JSRegExp> re,
|
||||
Handle<String> subject);
|
||||
|
@ -211,16 +211,14 @@ class RegExpResultsCache final : public AllStatic {
|
|||
|
||||
// Attempt to retrieve a cached result. On failure, 0 is returned as a Smi.
|
||||
// On success, the returned result is guaranteed to be a COW-array.
|
||||
static Tagged<Object> Lookup(Heap* heap, Tagged<String> key_string,
|
||||
Tagged<Object> key_pattern,
|
||||
Tagged<FixedArray>* last_match_out,
|
||||
ResultsCacheType type);
|
||||
static Object Lookup(Heap* heap, String key_string, Object key_pattern,
|
||||
FixedArray* last_match_out, ResultsCacheType type);
|
||||
// Attempt to add value_array to the cache specified by type. On success,
|
||||
// value_array is turned into a COW-array.
|
||||
static void Enter(Isolate* isolate, Handle<String> key_string,
|
||||
Handle<Object> key_pattern, Handle<FixedArray> value_array,
|
||||
Handle<FixedArray> last_match_cache, ResultsCacheType type);
|
||||
static void Clear(Tagged<FixedArray> cache);
|
||||
static void Clear(FixedArray cache);
|
||||
|
||||
static constexpr int kRegExpResultsCacheSize = 0x100;
|
||||
|
||||
|
|
|
@ -82,6 +82,29 @@ const icu::UnicodeSet& RegExpCaseFolding::SpecialAddSet() {
|
|||
return set.Pointer()->set;
|
||||
}
|
||||
|
||||
icu::UnicodeSet BuildUnicodeNonSimpleCloseOverSet() {
|
||||
icu::UnicodeSet set;
|
||||
set.add(0x390);
|
||||
set.add(0x3b0);
|
||||
set.add(0x1fd3);
|
||||
set.add(0x1fe3);
|
||||
set.add(0xfb05, 0xfb06);
|
||||
set.freeze();
|
||||
return set;
|
||||
}
|
||||
|
||||
struct UnicodeNonSimpleCloseOverSetData {
|
||||
UnicodeNonSimpleCloseOverSetData() : set(BuildUnicodeNonSimpleCloseOverSet()) {}
|
||||
const icu::UnicodeSet set;
|
||||
};
|
||||
|
||||
//static
|
||||
const icu::UnicodeSet& RegExpCaseFolding::UnicodeNonSimpleCloseOverSet() {
|
||||
static base::LazyInstance<UnicodeNonSimpleCloseOverSetData>::type set =
|
||||
LAZY_INSTANCE_INITIALIZER;
|
||||
return set.Pointer()->set;
|
||||
}
|
||||
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
|
|
@ -70,11 +70,21 @@ namespace internal {
|
|||
// another character. Characters that match no other characters in
|
||||
// their equivalence class are added to IgnoreSet. Characters that
|
||||
// match at least one other character are added to SpecialAddSet.
|
||||
//
|
||||
// For unicode ignoreCase ("iu" and "iv"),
|
||||
// UnicodeSet::closeOver(USET_CASE_INSENSITIVE) adds all characters that are in
|
||||
// the same equivalence class. This includes characaters that are in the same
|
||||
// equivalence class using full case folding. According to the spec, only
|
||||
// simple case folding shall be considered. We therefore create
|
||||
// UnicodeNonSimpleCloseOverSet containing all characters for which
|
||||
// UnicodeSet::closeOver adds characters that are not simple case folds. This
|
||||
// set should be used similar to IgnoreSet described above.
|
||||
|
||||
class RegExpCaseFolding final : public AllStatic {
|
||||
public:
|
||||
static const icu::UnicodeSet& IgnoreSet();
|
||||
static const icu::UnicodeSet& SpecialAddSet();
|
||||
static const icu::UnicodeSet& UnicodeNonSimpleCloseOverSet();
|
||||
|
||||
// This implements ECMAScript 2020 21.2.2.8.2 (Runtime Semantics:
|
||||
// Canonicalize) step 3, which is used to determine whether
|
||||
|
|
|
@ -14,13 +14,9 @@ include("../js-cxxflags.mozbuild")
|
|||
|
||||
CXXFLAGS += ["-Wno-error=type-limits", "-Wno-error=return-type"]
|
||||
|
||||
# Suppress spurious warnings in third-party code.
|
||||
# See bug 1810584 and bug 1879225.
|
||||
# Suppress spurious warnings in third-party code. See bug 1810584.
|
||||
if CONFIG["CC_TYPE"] == "gcc":
|
||||
CXXFLAGS += ["-Wno-error=nonnull", "-Wno-narrowing"]
|
||||
if CONFIG["CC_TYPE"] == "clang":
|
||||
CXXFLAGS += ["-Wno-c++11-narrowing"]
|
||||
|
||||
CXXFLAGS += ["-Wno-error=nonnull"]
|
||||
|
||||
UNIFIED_SOURCES += [
|
||||
"imported/regexp-bytecode-generator.cc",
|
||||
|
|
|
@ -9,8 +9,8 @@ origin:
|
|||
description: A fast regular expression engine from V8
|
||||
url: https://v8.dev
|
||||
|
||||
release: ec89cca93594ef6ae683d6608904cd5e30378115 (Wed Feb 07 21:40:08 2024).
|
||||
revision: ec89cca93594ef6ae683d6608904cd5e30378115
|
||||
release: 30a887aeb92153885619d8bb9fa57cda7adf9276 (Thu Jul 06 11:42:30 2023).
|
||||
revision: 30a887aeb92153885619d8bb9fa57cda7adf9276
|
||||
|
||||
license: BSD-3-Clause
|
||||
license-file: LICENSE.v8
|
||||
|
|
|
@ -376,8 +376,6 @@ DefaultJitOptions::DefaultJitOptions() {
|
|||
|
||||
// ***** Irregexp shim flags *****
|
||||
|
||||
// Whether the stage 3 regexp modifiers proposal is enabled.
|
||||
SET_DEFAULT(js_regexp_modifiers, false);
|
||||
// V8 uses this for differential fuzzing to handle stack overflows.
|
||||
// We address the same problem in StackLimitCheck::HasOverflowed.
|
||||
SET_DEFAULT(correctness_fuzzer_suppressions, false);
|
||||
|
|
|
@ -143,7 +143,6 @@ struct DefaultJitOptions {
|
|||
// Irregexp shim flags
|
||||
bool correctness_fuzzer_suppressions;
|
||||
bool enable_regexp_unaligned_accesses;
|
||||
bool js_regexp_modifiers;
|
||||
bool regexp_possessive_quantifier;
|
||||
bool regexp_optimization;
|
||||
bool regexp_peephole_optimization;
|
||||
|
|
|
@ -298,6 +298,7 @@ void RegExpObject::initAndZeroLastIndex(JSAtom* source, RegExpFlags flags,
|
|||
zeroLastIndex(cx);
|
||||
}
|
||||
|
||||
#if defined(DEBUG) || defined(JS_JITSPEW)
|
||||
template <typename KnownF, typename UnknownF>
|
||||
void ForEachRegExpFlag(JS::RegExpFlags flags, KnownF known, UnknownF unknown) {
|
||||
uint8_t raw = flags.value();
|
||||
|
@ -335,14 +336,6 @@ void ForEachRegExpFlag(JS::RegExpFlags flags, KnownF known, UnknownF unknown) {
|
|||
}
|
||||
}
|
||||
|
||||
std::ostream& JS::operator<<(std::ostream& os, RegExpFlags flags) {
|
||||
ForEachRegExpFlag(
|
||||
flags, [&](const char* name, const char* c) { os << c; },
|
||||
[&](uint8_t value) { os << '?'; });
|
||||
return os;
|
||||
}
|
||||
|
||||
#if defined(DEBUG) || defined(JS_JITSPEW)
|
||||
void RegExpObject::dumpOwnFields(js::JSONPrinter& json) const {
|
||||
{
|
||||
js::GenericPrinter& out = json.beginStringProperty("source");
|
||||
|
@ -1127,7 +1120,36 @@ static bool ParseRegExpFlags(const CharT* chars, size_t length,
|
|||
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
uint8_t flag;
|
||||
if (!JS::MaybeParseRegExpFlag(chars[i], &flag) || *flagsOut & flag) {
|
||||
switch (chars[i]) {
|
||||
case 'd':
|
||||
flag = RegExpFlag::HasIndices;
|
||||
break;
|
||||
case 'g':
|
||||
flag = RegExpFlag::Global;
|
||||
break;
|
||||
case 'i':
|
||||
flag = RegExpFlag::IgnoreCase;
|
||||
break;
|
||||
case 'm':
|
||||
flag = RegExpFlag::Multiline;
|
||||
break;
|
||||
case 's':
|
||||
flag = RegExpFlag::DotAll;
|
||||
break;
|
||||
case 'u':
|
||||
flag = RegExpFlag::Unicode;
|
||||
break;
|
||||
case 'v':
|
||||
flag = RegExpFlag::UnicodeSets;
|
||||
break;
|
||||
case 'y':
|
||||
flag = RegExpFlag::Sticky;
|
||||
break;
|
||||
default:
|
||||
*invalidFlag = chars[i];
|
||||
return false;
|
||||
}
|
||||
if (*flagsOut & flag) {
|
||||
*invalidFlag = chars[i];
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -103,7 +103,7 @@ class RegExpShared
|
|||
|
||||
size_t byteCodeLength() const {
|
||||
MOZ_ASSERT(byteCode);
|
||||
return byteCode->length();
|
||||
return byteCode->length;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче