Bug 1362154: Part 4: Store named capture information in RegExpShared r=mgaudet

If a regexp contains any named captures, the irregexp parser returns information about those captures in the form of an ArrayObject, where even elements store the capture name and odd elements store the corresponding capture index. We split this information into two parts. The names are used to create a template object with a property for each capture name. The capture indices are stored as a heap-allocated array. Both of these are stored on the RegExpShared.

In the next patch, we use the template object to create a `groups` object for the regexp match result, and use the array of capture indices to populate that object.

Differential Revision: https://phabricator.services.mozilla.com/D76036
This commit is contained in:
Iain Ireland 2020-05-20 21:04:01 +00:00
Родитель 803c01420d
Коммит 960ad8afed
4 изменённых файлов: 95 добавлений и 1 удалений

Просмотреть файл

@ -111,6 +111,7 @@ enum class ZealMode {
_(RareArgumentsData) \
_(RegExpStatics) \
_(RegExpSharedBytecode) \
_(RegExpSharedNamedCaptureData) \
_(TypedArrayElements) \
_(TypeDescrTraceList) \
_(NativeIterator) \

Просмотреть файл

@ -497,7 +497,15 @@ bool CompilePattern(JSContext* cx, MutableHandleRegExpShared re,
}
}
// Add one to account for the whole-match capture
re->useRegExpMatch(data.capture_count + 1);
uint32_t pairCount = data.capture_count + 1;
re->useRegExpMatch(pairCount);
if (!data.capture_name_map.is_null()) {
RootedNativeObject namedCaptures(cx, data.capture_name_map->inner());
if (!RegExpShared::initializeNamedCaptures(cx, re, namedCaptures)) {
return false;
}
}
}
MOZ_ASSERT(re->kind() == RegExpShared::Kind::RegExp);

Просмотреть файл

@ -961,6 +961,7 @@ void RegExpShared::traceChildren(JSTracer* trc) {
for (auto& comp : compilationArray) {
TraceNullableEdge(trc, &comp.jitCode, "RegExpShared code");
}
TraceNullableEdge(trc, &groupsTemplate_, "RegExpShared groups template");
}
#else
for (auto& comp : compilationArray) {
@ -985,6 +986,13 @@ void RegExpShared::finalize(JSFreeOp* fop) {
fop->free_(this, comp.byteCode, length, MemoryUse::RegExpSharedBytecode);
}
}
#ifdef ENABLE_NEW_REGEXP
if (namedCaptureIndices_) {
size_t length = numNamedCaptures() * sizeof(uint32_t);
fop->free_(this, namedCaptureIndices_, length,
MemoryUse::RegExpSharedNamedCaptureData);
}
#endif
tables.~JitCodeTables();
}
@ -1126,6 +1134,66 @@ void RegExpShared::useRegExpMatch(size_t pairCount) {
ticks_ = jit::JitOptions.regexpWarmUpThreshold;
}
/* static */
bool RegExpShared::initializeNamedCaptures(JSContext* cx, HandleRegExpShared re,
HandleNativeObject namedCaptures) {
MOZ_ASSERT(re->kind() == RegExpShared::Kind::RegExp);
MOZ_ASSERT(!re->groupsTemplate_);
MOZ_ASSERT(!re->namedCaptureIndices_);
// The irregexp parser returns named capture information in the form
// of an ArrayObject, where even elements store the capture name and
// odd elements store the corresponding capture index. We create a
// template object with a property for each capture name, and store
// the capture indices as a heap-allocated array.
uint32_t numNamedCaptures = namedCaptures->getDenseInitializedLength() / 2;
// Create a plain template object.
RootedPlainObject templateObject(
cx, NewObjectWithGivenProto<PlainObject>(cx, nullptr));
if (!templateObject) {
return false;
}
// Create a new group for the template.
Rooted<TaggedProto> proto(cx, templateObject->taggedProto());
ObjectGroup* group = ObjectGroupRealm::makeGroup(
cx, templateObject->realm(), templateObject->getClass(), proto);
if (!group) {
return false;
}
templateObject->setGroup(group);
// Initialize the properties of the template.
RootedValue dummyString(cx, StringValue(cx->runtime()->emptyString));
for (uint32_t i = 0; i < numNamedCaptures; i++) {
RootedString name(cx, namedCaptures->getDenseElement(i * 2).toString());
RootedId id(cx, NameToId(name->asAtom().asPropertyName()));
if (!NativeDefineDataProperty(cx, templateObject, id, dummyString,
JSPROP_ENUMERATE)) {
return false;
}
}
// Allocate the capture index array.
uint32_t arraySize = numNamedCaptures * sizeof(uint32_t);
uint32_t* captureIndices = static_cast<uint32_t*>(js_malloc(arraySize));
if (!captureIndices) {
return false;
}
// Populate the capture index array
for (uint32_t i = 0; i < numNamedCaptures; i++) {
captureIndices[i] = namedCaptures->getDenseElement(i * 2 + 1).toInt32();
}
re->numNamedCaptures_ = numNamedCaptures;
re->groupsTemplate_ = templateObject;
re->namedCaptureIndices_ = captureIndices;
js::AddCellMemory(re, arraySize, MemoryUse::RegExpSharedNamedCaptureData);
return true;
}
void RegExpShared::tierUpTick() {
MOZ_ASSERT(kind() == RegExpShared::Kind::RegExp);
if (ticks_ > 0) {

Просмотреть файл

@ -148,6 +148,12 @@ class RegExpShared : public gc::TenuredCell {
bool canStringMatch = false;
#endif
#ifdef ENABLE_NEW_REGEXP
uint32_t numNamedCaptures_ = {};
uint32_t* namedCaptureIndices_ = {};
GCPtr<PlainObject*> groupsTemplate_ = {};
#endif
static int CompilationIndex(bool latin1) { return latin1 ? 0 : 1; }
// Tables referenced by JIT code.
@ -209,6 +215,10 @@ class RegExpShared : public gc::TenuredCell {
// Use the regular expression engine for this regexp.
void useRegExpMatch(size_t parenCount);
static bool initializeNamedCaptures(JSContext* cx, HandleRegExpShared re,
HandleNativeObject namedCaptures);
PlainObject* getGroupsTemplate() { return groupsTemplate_; }
void tierUpTick();
bool markedForTierUp() const;
@ -229,6 +239,13 @@ class RegExpShared : public gc::TenuredCell {
maxRegisters_ = std::max(maxRegisters_, numRegisters);
}
uint32_t numNamedCaptures() const { return numNamedCaptures_; }
int32_t getNamedCaptureIndex(uint32_t idx) const {
MOZ_ASSERT(idx < numNamedCaptures());
MOZ_ASSERT(namedCaptureIndices_);
return namedCaptureIndices_[idx];
}
#endif
JSAtom* getSource() const { return headerAndSource.ptr(); }