зеркало из https://github.com/mozilla/gecko-dev.git
Bug 579173 - Use ropes to avoid large copies in simple string replace cases (r=lw)
This commit is contained in:
Родитель
4518b8acb7
Коммит
4617f42c0b
176
js/src/jsstr.cpp
176
js/src/jsstr.cpp
|
@ -1424,6 +1424,91 @@ StringMatch(const jschar *text, jsuint textlen,
|
|||
UnrolledMatch<ManualCmp>(text, textlen, pat, patlen);
|
||||
}
|
||||
|
||||
static const size_t sRopeMatchThresholdRatioLog2 = 5;
|
||||
|
||||
static jsint
|
||||
RopeMatch(JSString *textstr, const jschar *pat, jsuint patlen)
|
||||
{
|
||||
JS_ASSERT(textstr->isTopNode());
|
||||
|
||||
if (patlen == 0)
|
||||
return 0;
|
||||
if (textstr->length() < patlen)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* List of leaf nodes in the rope. If we run out of memory when trying to
|
||||
* append to this list, we can still fall back to StringMatch, so use the
|
||||
* system allocator so we don't report OOM in that case.
|
||||
*/
|
||||
Vector<JSString *, 16, SystemAllocPolicy> strs;
|
||||
|
||||
/*
|
||||
* We don't want to do rope matching if there is a poor node-to-char ratio,
|
||||
* since this means spending a lot of time in the match loop below. We also
|
||||
* need to build the list of leaf nodes. Do both here: iterate over the
|
||||
* nodes so long as there are not too many.
|
||||
*/
|
||||
size_t textstrlen = textstr->length();
|
||||
size_t threshold = textstrlen >> sRopeMatchThresholdRatioLog2;
|
||||
JSRopeLeafIterator iter(textstr);
|
||||
for (JSString *str = iter.init(); str; str = iter.next()) {
|
||||
if (threshold-- == 0 || !strs.append(str))
|
||||
return StringMatch(textstr->chars(), textstrlen, pat, patlen);
|
||||
}
|
||||
|
||||
/* Absolute offset from the beginning of the logical string textstr. */
|
||||
jsint pos = 0;
|
||||
|
||||
// TODO: consider branching to a simple loop if patlen == 1
|
||||
|
||||
for (JSString **outerp = strs.begin(); outerp != strs.end(); ++outerp) {
|
||||
/* First try to match without spanning two nodes. */
|
||||
const jschar *chars;
|
||||
size_t len;
|
||||
(*outerp)->getCharsAndLength(chars, len);
|
||||
jsint matchResult = StringMatch(chars, len, pat, patlen);
|
||||
if (matchResult != -1)
|
||||
return pos + matchResult;
|
||||
|
||||
/* Test the overlap. */
|
||||
JSString **innerp = outerp;
|
||||
|
||||
/*
|
||||
* Start searching at the first place where StringMatch wouldn't have
|
||||
* found the match.
|
||||
*/
|
||||
const jschar *const text = chars + (patlen > len ? 0 : len - patlen + 1);
|
||||
const jschar *const textend = chars + len;
|
||||
const jschar p0 = *pat;
|
||||
const jschar *const p1 = pat + 1;
|
||||
const jschar *const patend = pat + patlen;
|
||||
for (const jschar *t = text; t != textend; ) {
|
||||
if (*t++ != p0)
|
||||
continue;
|
||||
const jschar *ttend = textend;
|
||||
for (const jschar *pp = p1, *tt = t; pp != patend; ++pp, ++tt) {
|
||||
while (tt == ttend) {
|
||||
if (++innerp == strs.end())
|
||||
return -1;
|
||||
(*innerp)->getCharsAndEnd(tt, ttend);
|
||||
}
|
||||
if (*pp != *tt)
|
||||
goto break_continue;
|
||||
}
|
||||
|
||||
/* Matched! */
|
||||
return pos + (t - chars) - 1; /* -1 because of *t++ above */
|
||||
|
||||
break_continue:;
|
||||
}
|
||||
|
||||
pos += len;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static JSBool
|
||||
str_indexOf(JSContext *cx, uintN argc, Value *vp)
|
||||
{
|
||||
|
@ -1674,8 +1759,18 @@ class RegExpGuard
|
|||
(patlen > sMaxFlatPatLen || js_ContainsRegExpMetaChars(pat, patlen)))) {
|
||||
return false;
|
||||
}
|
||||
textstr->getCharsAndLength(text, textlen);
|
||||
match = StringMatch(text, textlen, pat, patlen);
|
||||
/*
|
||||
* textstr could be a rope, so we want to avoid flattening it for as
|
||||
* long as possible.
|
||||
*/
|
||||
if (textstr->isTopNode()) {
|
||||
match = RopeMatch(textstr, pat, patlen);
|
||||
} else {
|
||||
const jschar *text;
|
||||
size_t textlen;
|
||||
textstr->getCharsAndLength(text, textlen);
|
||||
match = StringMatch(text, textlen, pat, patlen);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1683,8 +1778,6 @@ class RegExpGuard
|
|||
JSString *patstr;
|
||||
const jschar *pat;
|
||||
size_t patlen;
|
||||
const jschar *text;
|
||||
size_t textlen;
|
||||
jsint match;
|
||||
|
||||
/* If the pattern is not already a regular expression, make it so. */
|
||||
|
@ -2140,22 +2233,71 @@ BuildFlatReplacement(JSContext *cx, JSString *textstr, JSString *repstr,
|
|||
return true;
|
||||
}
|
||||
|
||||
const jschar *rep;
|
||||
size_t replen;
|
||||
repstr->getCharsAndLength(rep, replen);
|
||||
JSRopeBuilder builder(cx);
|
||||
size_t match = g.match; /* Avoid signed/unsigned warnings. */
|
||||
size_t matchEnd = match + g.patlen;
|
||||
|
||||
JSCharBuffer cb(cx);
|
||||
if (!cb.reserve(g.textlen - g.patlen + replen) ||
|
||||
!cb.append(g.text, static_cast<size_t>(g.match)) ||
|
||||
!cb.append(rep, replen) ||
|
||||
!cb.append(g.text + g.match + g.patlen, g.text + g.textlen)) {
|
||||
return false;
|
||||
if (textstr->isTopNode()) {
|
||||
/*
|
||||
* If we are replacing over a rope, avoid flattening it by iterating
|
||||
* through it, building a new rope.
|
||||
*/
|
||||
JSRopeLeafIterator iter(textstr);
|
||||
size_t pos = 0;
|
||||
for (JSString *str = iter.init(); str; str = iter.next()) {
|
||||
size_t len = str->length();
|
||||
size_t strEnd = pos + len;
|
||||
if (pos < matchEnd && strEnd > match) {
|
||||
/*
|
||||
* We need to special-case any part of the rope that overlaps
|
||||
* with the replacement string.
|
||||
*/
|
||||
if (match >= pos) {
|
||||
/*
|
||||
* If this part of the rope overlaps with the left side of
|
||||
* the pattern, then it must be the only one to overlap with
|
||||
* the first character in the pattern, so we include the
|
||||
* replacement string here.
|
||||
*/
|
||||
JSString *leftSide = js_NewDependentString(cx, str, 0, match - pos);
|
||||
if (!leftSide ||
|
||||
!builder.append(cx, leftSide) ||
|
||||
!builder.append(cx, repstr)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If str runs off the end of the matched string, append the
|
||||
* last part of str.
|
||||
*/
|
||||
if (strEnd > matchEnd) {
|
||||
JSString *rightSide = js_NewDependentString(cx, str, matchEnd - pos,
|
||||
strEnd - matchEnd);
|
||||
if (!rightSide || !builder.append(cx, rightSide))
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (!builder.append(cx, str))
|
||||
return false;
|
||||
}
|
||||
pos += str->length();
|
||||
}
|
||||
} else {
|
||||
JSString *leftSide = js_NewDependentString(cx, textstr, 0, match);
|
||||
if (!leftSide)
|
||||
return false;
|
||||
JSString *rightSide = js_NewDependentString(cx, textstr, match + g.patlen,
|
||||
textstr->length() - match - g.patlen);
|
||||
if (!rightSide ||
|
||||
!builder.append(cx, leftSide) ||
|
||||
!builder.append(cx, repstr) ||
|
||||
!builder.append(cx, rightSide)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
JSString *str = js_NewStringFromCharBuffer(cx, cb);
|
||||
if (!str)
|
||||
return false;
|
||||
vp->setString(str);
|
||||
vp->setString(builder.getStr());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -67,11 +67,14 @@ enum {
|
|||
extern jschar *
|
||||
js_GetDependentStringChars(JSString *str);
|
||||
|
||||
extern JSString * JS_FASTCALL
|
||||
js_ConcatStrings(JSContext *cx, JSString *left, JSString *right);
|
||||
|
||||
JS_STATIC_ASSERT(JS_BITS_PER_WORD >= 32);
|
||||
|
||||
struct JSRopeBufferInfo {
|
||||
/* Number of jschars we can hold, not including null terminator. */
|
||||
size_t capacity;
|
||||
size_t capacity;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -421,7 +424,7 @@ struct JSString {
|
|||
}
|
||||
|
||||
inline void ropeIncrementTraversalCount() {
|
||||
JS_ASSERT(isInteriorNode());
|
||||
JS_ASSERT(isRope());
|
||||
mLengthAndFlags += ROPE_TRAVERSAL_COUNT_UNIT;
|
||||
}
|
||||
|
||||
|
@ -555,6 +558,58 @@ class JSRopeNodeIterator {
|
|||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* An iterator that returns the leaves of a rope (which hold the actual string
|
||||
* data) in order. The usage is the same as JSRopeNodeIterator.
|
||||
*/
|
||||
class JSRopeLeafIterator {
|
||||
private:
|
||||
JSRopeNodeIterator mNodeIterator;
|
||||
|
||||
public:
|
||||
|
||||
JSRopeLeafIterator(JSString *topNode) :
|
||||
mNodeIterator(topNode) {
|
||||
JS_ASSERT(topNode->isTopNode());
|
||||
}
|
||||
|
||||
inline JSString *init() {
|
||||
JSString *str = mNodeIterator.init();
|
||||
while (str->isRope()) {
|
||||
str = mNodeIterator.next();
|
||||
JS_ASSERT(str);
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
inline JSString *next() {
|
||||
JSString *str;
|
||||
do {
|
||||
str = mNodeIterator.next();
|
||||
} while (str && str->isRope());
|
||||
return str;
|
||||
}
|
||||
};
|
||||
|
||||
class JSRopeBuilder {
|
||||
private:
|
||||
JSString *mStr;
|
||||
|
||||
public:
|
||||
JSRopeBuilder(JSContext *cx);
|
||||
|
||||
inline bool append(JSContext *cx, JSString *str) {
|
||||
mStr = js_ConcatStrings(cx, mStr, str);
|
||||
if (!mStr)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
inline JSString *getStr() {
|
||||
return mStr;
|
||||
}
|
||||
};
|
||||
|
||||
JS_STATIC_ASSERT(JSString::INTERIOR_NODE & JSString::ROPE_BIT);
|
||||
JS_STATIC_ASSERT(JSString::TOP_NODE & JSString::ROPE_BIT);
|
||||
|
||||
|
@ -566,9 +621,6 @@ JS_STATIC_ASSERT(sizeof(JSString) % JS_GCTHING_ALIGN == 0);
|
|||
extern const jschar *
|
||||
js_GetStringChars(JSContext *cx, JSString *str);
|
||||
|
||||
extern JSString * JS_FASTCALL
|
||||
js_ConcatStrings(JSContext *cx, JSString *left, JSString *right);
|
||||
|
||||
extern const jschar *
|
||||
js_UndependString(JSContext *cx, JSString *str);
|
||||
|
||||
|
|
|
@ -72,4 +72,9 @@ JSString::intString(jsint i)
|
|||
return &JSString::intStringTable[u];
|
||||
}
|
||||
|
||||
inline
|
||||
JSRopeBuilder::JSRopeBuilder(JSContext *cx) {
|
||||
mStr = cx->runtime->emptyString;
|
||||
}
|
||||
|
||||
#endif /* jsstrinlines_h___ */
|
||||
|
|
|
@ -80,4 +80,47 @@ new TestCase ( SECTION, "'qwe ert x\t\n 345654AB'.replace(new RegExp('x\\s*\\d+(
|
|||
"qwe ert ****", 'qwe ert x\t\n 345654AB'.replace(new RegExp('x\\s*\\d+(..)$'),'****'));
|
||||
|
||||
|
||||
/*
|
||||
* Test replacement over ropes. The char to rope node ratio must be sufficiently
|
||||
* high for the special-case code to be tested.
|
||||
*/
|
||||
var stringA = "abcdef";
|
||||
var stringB = "ghijk";
|
||||
var stringC = "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz";
|
||||
stringC += stringC;
|
||||
stringC += stringC;
|
||||
stringC[0]; /* flatten stringC */
|
||||
var stringD = "lmn";
|
||||
|
||||
new TestCase ( SECTION, "(stringA + stringB + stringC).replace('aa', '')",
|
||||
stringA + stringB + stringC, (stringA + stringB + stringC).replace('aa', ''));
|
||||
|
||||
new TestCase ( SECTION, "(stringA + stringB + stringC).replace('abc', 'AA')",
|
||||
"AAdefghijk" + stringC, (stringA + stringB + stringC).replace('abc', 'AA'));
|
||||
|
||||
new TestCase ( SECTION, "(stringA + stringB + stringC).replace('def', 'AA')",
|
||||
"abcAAghijk" + stringC, (stringA + stringB + stringC).replace('def', 'AA'));
|
||||
|
||||
new TestCase ( SECTION, "(stringA + stringB + stringC).replace('efg', 'AA')",
|
||||
"abcdAAhijk" + stringC, (stringA + stringB + stringC).replace('efg', 'AA'));
|
||||
|
||||
new TestCase ( SECTION, "(stringA + stringB + stringC).replace('fgh', 'AA')",
|
||||
"abcdeAAijk" + stringC, (stringA + stringB + stringC).replace('fgh', 'AA'));
|
||||
|
||||
new TestCase ( SECTION, "(stringA + stringB + stringC).replace('ghi', 'AA')",
|
||||
"abcdefAAjk" + stringC, (stringA + stringB + stringC).replace('ghi', 'AA'));
|
||||
|
||||
new TestCase ( SECTION, "(stringC + stringD).replace('lmn', 'AA')",
|
||||
stringC + "AA", (stringC + stringD).replace('lmn', 'AA'));
|
||||
|
||||
new TestCase ( SECTION, "(stringC + stringD).replace('lmno', 'AA')",
|
||||
stringC + stringD, (stringC + stringD).replace('lmno', 'AA'));
|
||||
|
||||
new TestCase ( SECTION, "(stringC + stringD).replace('mn', 'AA')",
|
||||
stringC + "lAA", (stringC + stringD).replace('mn', 'AA'));
|
||||
|
||||
new TestCase ( SECTION, "(stringC + stringD).replace('n', 'AA')",
|
||||
stringC + "lmAA", (stringC + stringD).replace('n', 'AA'));
|
||||
|
||||
|
||||
test();
|
||||
|
|
Загрузка…
Ссылка в новой задаче