зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1634135: Throw 'regexp too big' errors properly r=mgaudet
If a regular expression is too big, the assembler may fail with RegExpError::kTooLarge. When it does so, we want to throw an error: "regexp too big". Until the most recent reimport of irregexp, we were actually reporting an OOM in these cases, because `CompilationResult::code` was default-constructed as an UndefinedValue and we took the "OOM in GetCode" path. Now `CompilationResult::code` is a Handle, so we crash if we try to access the value. Making the situation slightly more complicated is the fact that we still have a macroassembler live, which means that we can't GC, which means that we can't report an error. The old code used an AutoSuppressGC for this (https://searchfox.org/mozilla-central/source/js/src/irregexp/RegExpEngine.cpp#1703), but that seems like an extremely blunt instrument. Instead, I've refactored `CompilePattern` to call a separate `Assemble` function. This means that we clean up the macroassembler before we call `JS_ReportErrorASCII`. The new function is a straight copy-paste of the old code, except for error handling and `.` to `->` conversions for the values being passed by reference. Note that the order of checks has changed after calling `compiler->Assemble(...)`: now we check `result.Succeeded()` before examining `result.code`. We also change the shared labels in SMRegExpMacroAssembler to be NonAssertingLabels. This suppresses assertions in the Label destructor that they are not used without being bound. The assertion is already suppressed for OOM (https://searchfox.org/mozilla-central/source/js/src/jit/Label.h#82-86), which is why we did not trigger it previously. Differential Revision: https://phabricator.services.mozilla.com/D73758
This commit is contained in:
Родитель
4c2823c938
Коммит
e41d82d09e
|
@ -329,6 +329,121 @@ static void SampleCharacters(HandleLinearString input,
|
|||
}
|
||||
}
|
||||
|
||||
enum class AssembleResult {
|
||||
Success,
|
||||
TooLarge,
|
||||
OutOfMemory,
|
||||
};
|
||||
|
||||
static MOZ_MUST_USE AssembleResult Assemble(JSContext* cx,
|
||||
RegExpCompiler* compiler,
|
||||
RegExpCompileData* data,
|
||||
MutableHandleRegExpShared re,
|
||||
HandleAtom pattern, Zone* zone,
|
||||
bool useNativeCode, bool isLatin1) {
|
||||
// Because we create a StackMacroAssembler, this function is not allowed
|
||||
// to GC. If needed, we allocate and throw errors in the caller.
|
||||
Maybe<jit::JitContext> jctx;
|
||||
Maybe<js::jit::StackMacroAssembler> stack_masm;
|
||||
UniquePtr<RegExpMacroAssembler> masm;
|
||||
if (useNativeCode) {
|
||||
NativeRegExpMacroAssembler::Mode mode =
|
||||
isLatin1 ? NativeRegExpMacroAssembler::LATIN1
|
||||
: NativeRegExpMacroAssembler::UC16;
|
||||
// If we are compiling native code, we need a macroassembler,
|
||||
// which needs a jit context.
|
||||
jctx.emplace(cx, nullptr);
|
||||
stack_masm.emplace();
|
||||
uint32_t num_capture_registers = re->pairCount() * 2;
|
||||
masm = MakeUnique<SMRegExpMacroAssembler>(cx, stack_masm.ref(), zone, mode,
|
||||
num_capture_registers);
|
||||
} else {
|
||||
masm = MakeUnique<RegExpBytecodeGenerator>(cx->isolate, zone);
|
||||
}
|
||||
if (!masm) {
|
||||
return AssembleResult::OutOfMemory;
|
||||
}
|
||||
|
||||
bool isLargePattern =
|
||||
pattern->length() > v8::internal::RegExp::kRegExpTooLargeToOptimize;
|
||||
masm->set_slow_safe(isLargePattern);
|
||||
if (compiler->optimize()) {
|
||||
compiler->set_optimize(!isLargePattern);
|
||||
}
|
||||
|
||||
// When matching a regexp with known maximum length that is anchored
|
||||
// at the end, we may be able to skip the beginning of long input
|
||||
// strings. This decision is made here because it depends on
|
||||
// information in the AST that isn't replicated in the Node
|
||||
// structure used inside the compiler.
|
||||
bool is_start_anchored = data->tree->IsAnchoredAtStart();
|
||||
bool is_end_anchored = data->tree->IsAnchoredAtEnd();
|
||||
int max_length = data->tree->max_match();
|
||||
static const int kMaxBacksearchLimit = 1024;
|
||||
if (is_end_anchored && !is_start_anchored && !re->sticky() &&
|
||||
max_length < kMaxBacksearchLimit) {
|
||||
masm->SetCurrentPositionFromEnd(max_length);
|
||||
}
|
||||
|
||||
if (re->global()) {
|
||||
RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL;
|
||||
if (data->tree->min_match() > 0) {
|
||||
mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK;
|
||||
} else if (re->unicode()) {
|
||||
mode = RegExpMacroAssembler::GLOBAL_UNICODE;
|
||||
}
|
||||
masm->set_global_mode(mode);
|
||||
}
|
||||
|
||||
// The masm tracer works as a thin wrapper around another macroassembler.
|
||||
RegExpMacroAssembler* masm_ptr = masm.get();
|
||||
#ifdef DEBUG
|
||||
UniquePtr<RegExpMacroAssembler> tracer_masm;
|
||||
if (jit::JitOptions.traceRegExpAssembler) {
|
||||
tracer_masm = MakeUnique<RegExpMacroAssemblerTracer>(cx->isolate, masm_ptr);
|
||||
masm_ptr = tracer_masm.get();
|
||||
}
|
||||
#endif
|
||||
|
||||
// Compile the regexp.
|
||||
V8HandleString wrappedPattern(v8::internal::String(pattern), cx->isolate);
|
||||
RegExpCompiler::CompilationResult result = compiler->Assemble(
|
||||
cx->isolate, masm_ptr, data->node, data->capture_count, wrappedPattern);
|
||||
if (!result.Succeeded()) {
|
||||
MOZ_ASSERT(result.error == RegExpError::kTooLarge);
|
||||
return AssembleResult::TooLarge;
|
||||
}
|
||||
if (result.code->value().isUndefined()) {
|
||||
// SMRegExpMacroAssembler::GetCode returns undefined on OOM.
|
||||
MOZ_ASSERT(useNativeCode);
|
||||
return AssembleResult::OutOfMemory;
|
||||
}
|
||||
|
||||
re->updateMaxRegisters(result.num_registers);
|
||||
if (useNativeCode) {
|
||||
// Transfer ownership of the tables from the macroassembler to the
|
||||
// RegExpShared.
|
||||
SMRegExpMacroAssembler::TableVector& tables =
|
||||
static_cast<SMRegExpMacroAssembler*>(masm.get())->tables();
|
||||
for (uint32_t i = 0; i < tables.length(); i++) {
|
||||
if (!re->addTable(std::move(tables[i]))) {
|
||||
return AssembleResult::OutOfMemory;
|
||||
}
|
||||
}
|
||||
re->setJitCode(v8::internal::Code::cast(*result.code).inner(), isLatin1);
|
||||
} else {
|
||||
// Transfer ownership of the bytecode from the HandleScope to the
|
||||
// RegExpShared.
|
||||
ByteArray bytecode =
|
||||
v8::internal::ByteArray::cast(*result.code).takeOwnership(cx->isolate);
|
||||
uint32_t length = bytecode->length;
|
||||
re->setByteCode(bytecode.release(), isLatin1);
|
||||
js::AddCellMemory(re, length, MemoryUse::RegExpSharedBytecode);
|
||||
}
|
||||
|
||||
return AssembleResult::Success;
|
||||
}
|
||||
|
||||
bool CompilePattern(JSContext* cx, MutableHandleRegExpShared re,
|
||||
HandleLinearString input, RegExpShared::CodeKind codeKind) {
|
||||
RootedAtom pattern(cx, re->getSource());
|
||||
|
@ -397,108 +512,17 @@ bool CompilePattern(JSContext* cx, MutableHandleRegExpShared re,
|
|||
bool useNativeCode = codeKind == RegExpShared::CodeKind::Jitcode;
|
||||
MOZ_ASSERT_IF(useNativeCode, IsNativeRegExpEnabled());
|
||||
|
||||
Maybe<jit::JitContext> jctx;
|
||||
Maybe<js::jit::StackMacroAssembler> stack_masm;
|
||||
UniquePtr<RegExpMacroAssembler> masm;
|
||||
if (useNativeCode) {
|
||||
NativeRegExpMacroAssembler::Mode mode =
|
||||
isLatin1 ? NativeRegExpMacroAssembler::LATIN1
|
||||
: NativeRegExpMacroAssembler::UC16;
|
||||
// If we are compiling native code, we need a macroassembler,
|
||||
// which needs a jit context.
|
||||
jctx.emplace(cx, nullptr);
|
||||
stack_masm.emplace();
|
||||
uint32_t num_capture_registers = re->pairCount() * 2;
|
||||
masm = MakeUnique<SMRegExpMacroAssembler>(cx, stack_masm.ref(), &zone, mode,
|
||||
num_capture_registers);
|
||||
} else {
|
||||
masm = MakeUnique<RegExpBytecodeGenerator>(cx->isolate, &zone);
|
||||
switch (Assemble(cx, &compiler, &data, re, pattern, &zone, useNativeCode,
|
||||
isLatin1)) {
|
||||
case AssembleResult::TooLarge:
|
||||
JS_ReportErrorASCII(cx, "regexp too big");
|
||||
return false;
|
||||
case AssembleResult::OutOfMemory:
|
||||
ReportOutOfMemory(cx);
|
||||
return false;
|
||||
case AssembleResult::Success:
|
||||
break;
|
||||
}
|
||||
if (!masm) {
|
||||
ReportOutOfMemory(cx);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool largePattern =
|
||||
pattern->length() > v8::internal::RegExp::kRegExpTooLargeToOptimize;
|
||||
masm->set_slow_safe(largePattern);
|
||||
if (compiler.optimize()) {
|
||||
compiler.set_optimize(!largePattern);
|
||||
}
|
||||
|
||||
// When matching a regexp with known maximum length that is anchored
|
||||
// at the end, we may be able to skip the beginning of long input
|
||||
// strings. This decision is made here because it depends on
|
||||
// information in the AST that isn't replicated in the Node
|
||||
// structure used inside the compiler.
|
||||
bool is_start_anchored = data.tree->IsAnchoredAtStart();
|
||||
bool is_end_anchored = data.tree->IsAnchoredAtEnd();
|
||||
int max_length = data.tree->max_match();
|
||||
static const int kMaxBacksearchLimit = 1024;
|
||||
if (is_end_anchored && !is_start_anchored && !re->sticky() &&
|
||||
max_length < kMaxBacksearchLimit) {
|
||||
masm->SetCurrentPositionFromEnd(max_length);
|
||||
}
|
||||
|
||||
if (re->global()) {
|
||||
RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL;
|
||||
if (data.tree->min_match() > 0) {
|
||||
mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK;
|
||||
} else if (re->unicode()) {
|
||||
mode = RegExpMacroAssembler::GLOBAL_UNICODE;
|
||||
}
|
||||
masm->set_global_mode(mode);
|
||||
}
|
||||
|
||||
// The masm tracer works as a thin wrapper around another macroassembler.
|
||||
RegExpMacroAssembler* masm_ptr = masm.get();
|
||||
#ifdef DEBUG
|
||||
UniquePtr<RegExpMacroAssembler> tracer_masm;
|
||||
if (jit::JitOptions.traceRegExpAssembler) {
|
||||
tracer_masm = MakeUnique<RegExpMacroAssemblerTracer>(cx->isolate, masm_ptr);
|
||||
masm_ptr = tracer_masm.get();
|
||||
}
|
||||
#endif
|
||||
|
||||
// Compile the regexp.
|
||||
V8HandleString wrappedPattern(v8::internal::String(pattern), cx->isolate);
|
||||
RegExpCompiler::CompilationResult result = compiler.Assemble(
|
||||
cx->isolate, masm_ptr, data.node, data.capture_count, wrappedPattern);
|
||||
if (result.code->value().isUndefined()) {
|
||||
// SMRegExpMacroAssembler::GetCode returns undefined on OOM.
|
||||
MOZ_ASSERT(useNativeCode);
|
||||
ReportOutOfMemory(cx);
|
||||
return false;
|
||||
}
|
||||
if (!result.Succeeded()) {
|
||||
MOZ_ASSERT(result.error == RegExpError::kTooLarge);
|
||||
JS_ReportErrorASCII(cx, "regexp too big");
|
||||
return false;
|
||||
}
|
||||
|
||||
re->updateMaxRegisters(result.num_registers);
|
||||
if (useNativeCode) {
|
||||
// Transfer ownership of the tables from the macroassembler to the
|
||||
// RegExpShared.
|
||||
SMRegExpMacroAssembler::TableVector& tables =
|
||||
static_cast<SMRegExpMacroAssembler*>(masm.get())->tables();
|
||||
for (uint32_t i = 0; i < tables.length(); i++) {
|
||||
if (!re->addTable(std::move(tables[i]))) {
|
||||
ReportOutOfMemory(cx);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
re->setJitCode(v8::internal::Code::cast(*result.code).inner(), isLatin1);
|
||||
} else {
|
||||
// Transfer ownership of the bytecode from the HandleScope to the
|
||||
// RegExpShared.
|
||||
ByteArray bytecode =
|
||||
v8::internal::ByteArray::cast(*result.code).takeOwnership(cx->isolate);
|
||||
uint32_t length = bytecode->length;
|
||||
re->setByteCode(bytecode.release(), isLatin1);
|
||||
js::AddCellMemory(re, length, MemoryUse::RegExpSharedBytecode);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -223,13 +223,17 @@ class SMRegExpMacroAssembler final : public NativeRegExpMacroAssembler {
|
|||
js::jit::Register backtrack_stack_pointer_;
|
||||
js::jit::Register temp0_, temp1_, temp2_;
|
||||
|
||||
js::jit::Label entry_label_;
|
||||
js::jit::Label start_label_;
|
||||
js::jit::Label backtrack_label_;
|
||||
js::jit::Label success_label_;
|
||||
js::jit::Label exit_label_;
|
||||
js::jit::Label stack_overflow_label_;
|
||||
js::jit::Label exit_with_exception_label_;
|
||||
// These labels are used in various API calls and bound (if used) in
|
||||
// GetCode. If we abort in the middle of a compilation, as may
|
||||
// happen if a regexp is too big, they may be used but not
|
||||
// bound.
|
||||
js::jit::NonAssertingLabel entry_label_;
|
||||
js::jit::NonAssertingLabel start_label_;
|
||||
js::jit::NonAssertingLabel backtrack_label_;
|
||||
js::jit::NonAssertingLabel success_label_;
|
||||
js::jit::NonAssertingLabel exit_label_;
|
||||
js::jit::NonAssertingLabel stack_overflow_label_;
|
||||
js::jit::NonAssertingLabel exit_with_exception_label_;
|
||||
|
||||
// When we generate the code to push a backtrack label's address
|
||||
// onto the backtrack stack, we don't know its final address. We
|
||||
|
|
Загрузка…
Ссылка в новой задаче