diff --git a/js/src/jsstr.cpp b/js/src/jsstr.cpp index 9d3c83aa6543..8d37aa94f31f 100644 --- a/js/src/jsstr.cpp +++ b/js/src/jsstr.cpp @@ -1424,6 +1424,91 @@ StringMatch(const jschar *text, jsuint textlen, UnrolledMatch(text, textlen, pat, patlen); } +static const size_t sRopeMatchThresholdRatioLog2 = 5; + +static jsint +RopeMatch(JSString *textstr, const jschar *pat, jsuint patlen) +{ + JS_ASSERT(textstr->isTopNode()); + + if (patlen == 0) + return 0; + if (textstr->length() < patlen) + return -1; + + /* + * List of leaf nodes in the rope. If we run out of memory when trying to + * append to this list, we can still fall back to StringMatch, so use the + * system allocator so we don't report OOM in that case. + */ + Vector strs; + + /* + * We don't want to do rope matching if there is a poor node-to-char ratio, + * since this means spending a lot of time in the match loop below. We also + * need to build the list of leaf nodes. Do both here: iterate over the + * nodes so long as there are not too many. + */ + size_t textstrlen = textstr->length(); + size_t threshold = textstrlen >> sRopeMatchThresholdRatioLog2; + JSRopeLeafIterator iter(textstr); + for (JSString *str = iter.init(); str; str = iter.next()) { + if (threshold-- == 0 || !strs.append(str)) + return StringMatch(textstr->chars(), textstrlen, pat, patlen); + } + + /* Absolute offset from the beginning of the logical string textstr. */ + jsint pos = 0; + + // TODO: consider branching to a simple loop if patlen == 1 + + for (JSString **outerp = strs.begin(); outerp != strs.end(); ++outerp) { + /* First try to match without spanning two nodes. */ + const jschar *chars; + size_t len; + (*outerp)->getCharsAndLength(chars, len); + jsint matchResult = StringMatch(chars, len, pat, patlen); + if (matchResult != -1) + return pos + matchResult; + + /* Test the overlap. */ + JSString **innerp = outerp; + + /* + * Start searching at the first place where StringMatch wouldn't have + * found the match. + */ + const jschar *const text = chars + (patlen > len ? 0 : len - patlen + 1); + const jschar *const textend = chars + len; + const jschar p0 = *pat; + const jschar *const p1 = pat + 1; + const jschar *const patend = pat + patlen; + for (const jschar *t = text; t != textend; ) { + if (*t++ != p0) + continue; + const jschar *ttend = textend; + for (const jschar *pp = p1, *tt = t; pp != patend; ++pp, ++tt) { + while (tt == ttend) { + if (++innerp == strs.end()) + return -1; + (*innerp)->getCharsAndEnd(tt, ttend); + } + if (*pp != *tt) + goto break_continue; + } + + /* Matched! */ + return pos + (t - chars) - 1; /* -1 because of *t++ above */ + + break_continue:; + } + + pos += len; + } + + return -1; +} + static JSBool str_indexOf(JSContext *cx, uintN argc, Value *vp) { @@ -1674,8 +1759,18 @@ class RegExpGuard (patlen > sMaxFlatPatLen || js_ContainsRegExpMetaChars(pat, patlen)))) { return false; } - textstr->getCharsAndLength(text, textlen); - match = StringMatch(text, textlen, pat, patlen); + /* + * textstr could be a rope, so we want to avoid flattening it for as + * long as possible. + */ + if (textstr->isTopNode()) { + match = RopeMatch(textstr, pat, patlen); + } else { + const jschar *text; + size_t textlen; + textstr->getCharsAndLength(text, textlen); + match = StringMatch(text, textlen, pat, patlen); + } return true; } @@ -1683,8 +1778,6 @@ class RegExpGuard JSString *patstr; const jschar *pat; size_t patlen; - const jschar *text; - size_t textlen; jsint match; /* If the pattern is not already a regular expression, make it so. */ @@ -2140,22 +2233,71 @@ BuildFlatReplacement(JSContext *cx, JSString *textstr, JSString *repstr, return true; } - const jschar *rep; - size_t replen; - repstr->getCharsAndLength(rep, replen); + JSRopeBuilder builder(cx); + size_t match = g.match; /* Avoid signed/unsigned warnings. */ + size_t matchEnd = match + g.patlen; - JSCharBuffer cb(cx); - if (!cb.reserve(g.textlen - g.patlen + replen) || - !cb.append(g.text, static_cast(g.match)) || - !cb.append(rep, replen) || - !cb.append(g.text + g.match + g.patlen, g.text + g.textlen)) { - return false; + if (textstr->isTopNode()) { + /* + * If we are replacing over a rope, avoid flattening it by iterating + * through it, building a new rope. + */ + JSRopeLeafIterator iter(textstr); + size_t pos = 0; + for (JSString *str = iter.init(); str; str = iter.next()) { + size_t len = str->length(); + size_t strEnd = pos + len; + if (pos < matchEnd && strEnd > match) { + /* + * We need to special-case any part of the rope that overlaps + * with the replacement string. + */ + if (match >= pos) { + /* + * If this part of the rope overlaps with the left side of + * the pattern, then it must be the only one to overlap with + * the first character in the pattern, so we include the + * replacement string here. + */ + JSString *leftSide = js_NewDependentString(cx, str, 0, match - pos); + if (!leftSide || + !builder.append(cx, leftSide) || + !builder.append(cx, repstr)) { + return false; + } + } + + /* + * If str runs off the end of the matched string, append the + * last part of str. + */ + if (strEnd > matchEnd) { + JSString *rightSide = js_NewDependentString(cx, str, matchEnd - pos, + strEnd - matchEnd); + if (!rightSide || !builder.append(cx, rightSide)) + return false; + } + } else { + if (!builder.append(cx, str)) + return false; + } + pos += str->length(); + } + } else { + JSString *leftSide = js_NewDependentString(cx, textstr, 0, match); + if (!leftSide) + return false; + JSString *rightSide = js_NewDependentString(cx, textstr, match + g.patlen, + textstr->length() - match - g.patlen); + if (!rightSide || + !builder.append(cx, leftSide) || + !builder.append(cx, repstr) || + !builder.append(cx, rightSide)) { + return false; + } } - JSString *str = js_NewStringFromCharBuffer(cx, cb); - if (!str) - return false; - vp->setString(str); + vp->setString(builder.getStr()); return true; } diff --git a/js/src/jsstr.h b/js/src/jsstr.h index abc5930ea026..254b98dda926 100644 --- a/js/src/jsstr.h +++ b/js/src/jsstr.h @@ -67,11 +67,14 @@ enum { extern jschar * js_GetDependentStringChars(JSString *str); +extern JSString * JS_FASTCALL +js_ConcatStrings(JSContext *cx, JSString *left, JSString *right); + JS_STATIC_ASSERT(JS_BITS_PER_WORD >= 32); struct JSRopeBufferInfo { /* Number of jschars we can hold, not including null terminator. */ - size_t capacity; + size_t capacity; }; /* @@ -421,7 +424,7 @@ struct JSString { } inline void ropeIncrementTraversalCount() { - JS_ASSERT(isInteriorNode()); + JS_ASSERT(isRope()); mLengthAndFlags += ROPE_TRAVERSAL_COUNT_UNIT; } @@ -555,6 +558,58 @@ class JSRopeNodeIterator { } }; +/* + * An iterator that returns the leaves of a rope (which hold the actual string + * data) in order. The usage is the same as JSRopeNodeIterator. + */ +class JSRopeLeafIterator { + private: + JSRopeNodeIterator mNodeIterator; + + public: + + JSRopeLeafIterator(JSString *topNode) : + mNodeIterator(topNode) { + JS_ASSERT(topNode->isTopNode()); + } + + inline JSString *init() { + JSString *str = mNodeIterator.init(); + while (str->isRope()) { + str = mNodeIterator.next(); + JS_ASSERT(str); + } + return str; + } + + inline JSString *next() { + JSString *str; + do { + str = mNodeIterator.next(); + } while (str && str->isRope()); + return str; + } +}; + +class JSRopeBuilder { + private: + JSString *mStr; + + public: + JSRopeBuilder(JSContext *cx); + + inline bool append(JSContext *cx, JSString *str) { + mStr = js_ConcatStrings(cx, mStr, str); + if (!mStr) + return false; + return true; + } + + inline JSString *getStr() { + return mStr; + } +}; + JS_STATIC_ASSERT(JSString::INTERIOR_NODE & JSString::ROPE_BIT); JS_STATIC_ASSERT(JSString::TOP_NODE & JSString::ROPE_BIT); @@ -566,9 +621,6 @@ JS_STATIC_ASSERT(sizeof(JSString) % JS_GCTHING_ALIGN == 0); extern const jschar * js_GetStringChars(JSContext *cx, JSString *str); -extern JSString * JS_FASTCALL -js_ConcatStrings(JSContext *cx, JSString *left, JSString *right); - extern const jschar * js_UndependString(JSContext *cx, JSString *str); diff --git a/js/src/jsstrinlines.h b/js/src/jsstrinlines.h index d1b122a5f2e2..0d157dc0befa 100644 --- a/js/src/jsstrinlines.h +++ b/js/src/jsstrinlines.h @@ -72,4 +72,9 @@ JSString::intString(jsint i) return &JSString::intStringTable[u]; } +inline +JSRopeBuilder::JSRopeBuilder(JSContext *cx) { + mStr = cx->runtime->emptyString; +} + #endif /* jsstrinlines_h___ */ diff --git a/js/src/tests/js1_2/regexp/string_replace.js b/js/src/tests/js1_2/regexp/string_replace.js index 633f11719df0..5d5077b1953e 100644 --- a/js/src/tests/js1_2/regexp/string_replace.js +++ b/js/src/tests/js1_2/regexp/string_replace.js @@ -80,4 +80,47 @@ new TestCase ( SECTION, "'qwe ert x\t\n 345654AB'.replace(new RegExp('x\\s*\\d+( "qwe ert ****", 'qwe ert x\t\n 345654AB'.replace(new RegExp('x\\s*\\d+(..)$'),'****')); +/* + * Test replacement over ropes. The char to rope node ratio must be sufficiently + * high for the special-case code to be tested. + */ +var stringA = "abcdef"; +var stringB = "ghijk"; +var stringC = "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"; +stringC += stringC; +stringC += stringC; +stringC[0]; /* flatten stringC */ +var stringD = "lmn"; + +new TestCase ( SECTION, "(stringA + stringB + stringC).replace('aa', '')", + stringA + stringB + stringC, (stringA + stringB + stringC).replace('aa', '')); + +new TestCase ( SECTION, "(stringA + stringB + stringC).replace('abc', 'AA')", + "AAdefghijk" + stringC, (stringA + stringB + stringC).replace('abc', 'AA')); + +new TestCase ( SECTION, "(stringA + stringB + stringC).replace('def', 'AA')", + "abcAAghijk" + stringC, (stringA + stringB + stringC).replace('def', 'AA')); + +new TestCase ( SECTION, "(stringA + stringB + stringC).replace('efg', 'AA')", + "abcdAAhijk" + stringC, (stringA + stringB + stringC).replace('efg', 'AA')); + +new TestCase ( SECTION, "(stringA + stringB + stringC).replace('fgh', 'AA')", + "abcdeAAijk" + stringC, (stringA + stringB + stringC).replace('fgh', 'AA')); + +new TestCase ( SECTION, "(stringA + stringB + stringC).replace('ghi', 'AA')", + "abcdefAAjk" + stringC, (stringA + stringB + stringC).replace('ghi', 'AA')); + +new TestCase ( SECTION, "(stringC + stringD).replace('lmn', 'AA')", + stringC + "AA", (stringC + stringD).replace('lmn', 'AA')); + +new TestCase ( SECTION, "(stringC + stringD).replace('lmno', 'AA')", + stringC + stringD, (stringC + stringD).replace('lmno', 'AA')); + +new TestCase ( SECTION, "(stringC + stringD).replace('mn', 'AA')", + stringC + "lAA", (stringC + stringD).replace('mn', 'AA')); + +new TestCase ( SECTION, "(stringC + stringD).replace('n', 'AA')", + stringC + "lmAA", (stringC + stringD).replace('n', 'AA')); + + test();