Bug 1703740: Update irregexp r=mgaudet

Pulling in some small upstream fixes:

Bugs involving quantifiers inside look-around assertions
https://bugs.chromium.org/p/v8/issues/detail?id=11290
https://bugs.chromium.org/p/v8/issues/detail?id=11616

Allocating unnecessary memory in RegExpStack
https://bugs.chromium.org/p/v8/issues/detail?id=11540

--trace-regexp-assembler broken by adding RISC-V support
https://bugs.chromium.org/p/v8/issues/detail?id=11572

Differential Revision: https://phabricator.services.mozilla.com/D111334
This commit is contained in:
Iain Ireland 2021-04-08 22:36:18 +00:00
Родитель 2ce6674095
Коммит 9eda529c4b
8 изменённых файлов: 61 добавлений и 35 удалений

Просмотреть файл

@ -1,2 +1,2 @@
Imported using import-irregexp.py from:
https://github.com/v8/v8/tree/c5fd776de594f1f9f87c82f4050075a561872a2a/src/regexp
https://github.com/v8/v8/tree/a6a27731f63596de76001c9ff57dae45fd987fa1/src/regexp

Просмотреть файл

@ -1430,9 +1430,11 @@ EatsAtLeastInfo LoopChoiceNode::EatsAtLeastFromLoopEntry() {
DCHECK_EQ(alternatives_->length(), 2); // There's just loop and continue.
if (read_backward()) {
// Can't do anything special for a backward loop, so return the basic values
// that we got during analysis.
return *eats_at_least_info();
// The eats_at_least value is not used if reading backward. The
// EatsAtLeastPropagator should've zeroed it as well.
DCHECK_EQ(eats_at_least_info()->eats_at_least_from_possibly_start, 0);
DCHECK_EQ(eats_at_least_info()->eats_at_least_from_not_start, 0);
return {};
}
// Figure out how much the loop body itself eats, not including anything in
@ -3526,14 +3528,23 @@ class EatsAtLeastPropagator : public AllStatic {
}
static void VisitAction(ActionNode* that) {
// POSITIVE_SUBMATCH_SUCCESS rewinds input, so we must not consider
// successor nodes for eats_at_least. SET_REGISTER_FOR_LOOP indicates a loop
// entry point, which means the loop body will run at least the minimum
// number of times before the continuation case can run. Otherwise the
// current node eats at least as much as its successor.
// - BEGIN_SUBMATCH and POSITIVE_SUBMATCH_SUCCESS wrap lookarounds.
// Lookarounds rewind input, so their eats_at_least value must not
// propagate to surroundings.
// TODO(jgruber): Instead of resetting EAL to 0 at lookaround boundaries,
// analysis should instead skip over the lookaround and look at whatever
// follows the lookaround. A simple solution would be to store a pointer to
// the associated POSITIVE_SUBMATCH_SUCCESS node in the BEGIN_SUBMATCH
// node, and use that during analysis.
// - SET_REGISTER_FOR_LOOP indicates a loop entry point, which means the
// loop body will run at least the minimum number of times before the
// continuation case can run. Otherwise the current node eats at least as
// much as its successor.
switch (that->action_type()) {
case ActionNode::BEGIN_SUBMATCH:
case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
break; // Was already initialized to zero.
DCHECK(that->eats_at_least_info()->IsZero());
break;
case ActionNode::SET_REGISTER_FOR_LOOP:
that->set_eats_at_least_info(
that->on_success()->EatsAtLeastFromLoopEntry());
@ -3555,7 +3566,10 @@ class EatsAtLeastPropagator : public AllStatic {
}
static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) {
that->set_eats_at_least_info(*that->continue_node()->eats_at_least_info());
if (!that->read_backward()) {
that->set_eats_at_least_info(
*that->continue_node()->eats_at_least_info());
}
}
static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {}

Просмотреть файл

@ -12,8 +12,6 @@ namespace internal {
// -------------------------------------------------------------------
// Dot/dotty output
#ifdef DEBUG
class DotPrinterImpl : public NodeVisitor {
public:
explicit DotPrinterImpl(std::ostream& os) : os_(os) {}
@ -238,14 +236,10 @@ void DotPrinterImpl::VisitAction(ActionNode* that) {
Visit(successor);
}
#endif // DEBUG
void DotPrinter::DotPrint(const char* label, RegExpNode* node) {
#ifdef DEBUG
StdoutStream os;
DotPrinterImpl printer(os);
printer.PrintNode(label, node);
#endif // DEBUG
}
} // namespace internal

Просмотреть файл

@ -11,11 +11,8 @@ namespace internal {
RegExpMacroAssemblerTracer::RegExpMacroAssemblerTracer(
Isolate* isolate, RegExpMacroAssembler* assembler)
: RegExpMacroAssembler(isolate, assembler->zone()), assembler_(assembler) {
IrregexpImplementation type = assembler->Implementation();
DCHECK_LT(type, 9);
const char* impl_names[] = {"IA32", "ARM", "ARM64", "MIPS", "S390",
"PPC", "X64", "X87", "Bytecode"};
PrintF("RegExpMacroAssembler%s();\n", impl_names[type]);
PrintF("RegExpMacroAssembler%s();\n",
ImplementationToString(assembler->Implementation()));
}
RegExpMacroAssemblerTracer::~RegExpMacroAssemblerTracer() = default;

Просмотреть файл

@ -39,19 +39,33 @@ class RegExpMacroAssembler {
static constexpr int kUseCharactersValue = -1;
#define IMPLEMENTATIONS_LIST(V) \
V(IA32) \
V(ARM) \
V(ARM64) \
V(MIPS) \
V(RISCV) \
V(S390) \
V(PPC) \
V(X64) \
V(Bytecode)
enum IrregexpImplementation {
kIA32Implementation,
kARMImplementation,
kARM64Implementation,
kMIPSImplementation,
kRISCVImplementation,
kS390Implementation,
kPPCImplementation,
kX64Implementation,
kX87Implementation,
kBytecodeImplementation
#define V(Name) k##Name##Implementation,
IMPLEMENTATIONS_LIST(V)
#undef V
};
inline const char* ImplementationToString(IrregexpImplementation impl) {
static const char* const kNames[] = {
#define V(Name) #Name,
IMPLEMENTATIONS_LIST(V)
#undef V
};
return kNames[impl];
}
#undef IMPLEMENTATIONS_LIST
enum StackCheckFlag {
kNoStackLimitCheck = false,
kCheckStackLimit = true

Просмотреть файл

@ -108,6 +108,11 @@ struct EatsAtLeastInfo final {
}
}
bool IsZero() const {
return eats_at_least_from_possibly_start == 0 &&
eats_at_least_from_not_start == 0;
}
// Any successful match starting from the current node will consume at least
// this many characters. This does not necessarily mean that there is a
// possible match with exactly this many characters, but we generally try to

Просмотреть файл

@ -10,8 +10,7 @@ namespace internal {
RegExpStackScope::RegExpStackScope(Isolate* isolate)
: regexp_stack_(isolate->regexp_stack()) {
// Initialize, if not already initialized.
regexp_stack_->EnsureCapacity(0);
DCHECK(regexp_stack_->IsValid());
// Irregexp is not reentrant in several ways; in particular, the
// RegExpStackScope is not reentrant since the destructor frees allocated
// memory. Protect against reentrancy here.
@ -78,8 +77,8 @@ void RegExpStack::ThreadLocal::FreeAndInvalidate() {
Address RegExpStack::EnsureCapacity(size_t size) {
if (size > kMaximumStackSize) return kNullAddress;
if (size < kMinimumDynamicStackSize) size = kMinimumDynamicStackSize;
if (thread_local_.memory_size_ < size) {
if (size < kMinimumDynamicStackSize) size = kMinimumDynamicStackSize;
byte* new_memory = NewArray<byte>(size);
if (thread_local_.memory_size_ > 0) {
// Copy original memory into top of new memory.

Просмотреть файл

@ -131,6 +131,9 @@ class RegExpStack {
// you have to call EnsureCapacity before using it again.
void Reset();
// Whether the ThreadLocal storage has been invalidated.
bool IsValid() const { return thread_local_.memory_ != nullptr; }
ThreadLocal thread_local_;
Isolate* isolate_;