bug 682592: do bidi scan when nsTextFragment content is changed

2011-09-09 12:27:00 -04:00 · 2011-09-09 12:27:00 -04:00 · 4b460d74be
--- a/content/base/src/nsGenericDOMDataNode.cpp
+++ b/content/base/src/nsGenericDOMDataNode.cpp
@ -328,12 +328,13 @@ nsGenericDOMDataNode::SetTextInternal(PRUint32 aOffset, PRUint32 aCount,
  }

  if (aOffset == 0 && endOffset == textLength) {
-    // Replacing whole text or old text was empty
-    mText.SetTo(aBuffer, aLength);
+    // Replacing whole text or old text was empty.  Don't bother to check for
+    // bidi in this string if the document already has bidi enabled.
+    mText.SetTo(aBuffer, aLength, !document || !document->GetBidiEnabled());
  }
  else if (aOffset == textLength) {
    // Appending to existing
-    mText.Append(aBuffer, aLength);
+    mText.Append(aBuffer, aLength, !document || !document->GetBidiEnabled());
  }
  else {
    // Merging old and new
@ -355,12 +356,16 @@ nsGenericDOMDataNode::SetTextInternal(PRUint32 aOffset, PRUint32 aCount,
    }

    // XXX Add OOM checking to this
-    mText.SetTo(to, newLength);
+    mText.SetTo(to, newLength, !document || !document->GetBidiEnabled());

    delete [] to;
  }

-  UpdateBidiStatus(aBuffer, aLength);
+  if (document && mText.IsBidi()) {
+    // If we found bidi characters in mText.SetTo() above, indicate that the
+    // document contains bidi characters.
+    document->SetBidiEnabled();
+  }

  // Notify observers
  if (aNotify) {
@ -971,21 +976,6 @@ nsGenericDOMDataNode::AppendTextTo(nsAString& aResult)
  mText.AppendTo(aResult);
 }

-void nsGenericDOMDataNode::UpdateBidiStatus(const PRUnichar* aBuffer, PRUint32 aLength)
-{
-  nsIDocument *document = GetCurrentDoc();
-  if (document && document->GetBidiEnabled()) {
-    // OK, we already know it's Bidi, so we won't test again
-    return;
-  }
-
-  mText.UpdateBidiFlag(aBuffer, aLength);
-
-  if (document && mText.IsBidi()) {
-    document->SetBidiEnabled();
-  }
-}
-
 already_AddRefed<nsIAtom>
 nsGenericDOMDataNode::GetCurrentValueAtom()
 {
--- a/content/base/src/nsGenericDOMDataNode.h
+++ b/content/base/src/nsGenericDOMDataNode.h
@ -364,8 +364,6 @@ protected:
  nsTextFragment mText;

 private:
-  void UpdateBidiStatus(const PRUnichar* aBuffer, PRUint32 aLength);
-
  already_AddRefed<nsIAtom> GetCurrentValueAtom();
 };

--- a/content/base/src/nsTextFragment.cpp
+++ b/content/base/src/nsTextFragment.cpp
@ -117,6 +117,7 @@ nsTextFragment::ReleaseText()
  }

  m1b = nsnull;
+  mState.mIsBidi = PR_FALSE;

  // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;
  mAllBits = 0;
@ -145,8 +146,8 @@ nsTextFragment::operator=(const nsTextFragment& aOther)
  return *this;
 }

-static inline PRBool
-Is8BitUnvectorized(const PRUnichar *str, const PRUnichar *end)
+static inline PRInt32
+FirstNon8BitUnvectorized(const PRUnichar *str, const PRUnichar *end)
 {
 #if PR_BYTES_PER_WORD == 4
  const size_t mask = 0xff00ff00;
@ -168,7 +169,7 @@ Is8BitUnvectorized(const PRUnichar *str, const PRUnichar *end)
    NS_MIN(len, PRInt32(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(PRUnichar)));
  for (; i < alignLen; i++) {
    if (str[i] > 255)
-      return PR_FALSE;
+      return i;
  }

  // Check one word at a time.
@ -176,40 +177,47 @@ Is8BitUnvectorized(const PRUnichar *str, const PRUnichar *end)
  for (; i < wordWalkEnd; i += numUnicharsPerWord) {
    const size_t word = *reinterpret_cast<const size_t*>(str + i);
    if (word & mask)
-      return PR_FALSE;
+      return i;
  }

  // Take care of the remainder one character at a time.
  for (; i < len; i++) {
    if (str[i] > 255)
-      return PR_FALSE;
+      return i;
  }

-  return PR_TRUE;
+  return -1;
 }

 #ifdef MOZILLA_MAY_SUPPORT_SSE2
 namespace mozilla {
  namespace SSE2 {
-    PRBool Is8Bit(const PRUnichar *str, const PRUnichar *end);
+    PRInt32 FirstNon8Bit(const PRUnichar *str, const PRUnichar *end);
  }
 }
 #endif

-static inline PRBool
-Is8Bit(const PRUnichar *str, const PRUnichar *end)
+/*
+ * This function returns -1 if all characters in str are 8 bit characters.
+ * Otherwise, it returns a value less than or equal to the index of the first
+ * non-8bit character in str. For example, if first non-8bit character is at
+ * position 25, it may return 25, or for example 24, or 16. But it guarantees
+ * there is no non-8bit character before returned value.
+ */
+static inline PRInt32
+FirstNon8Bit(const PRUnichar *str, const PRUnichar *end)
 {
 #ifdef MOZILLA_MAY_SUPPORT_SSE2
  if (mozilla::supports_sse2()) {
-    return mozilla::SSE2::Is8Bit(str, end);
+    return mozilla::SSE2::FirstNon8Bit(str, end);
  }
 #endif

-  return Is8BitUnvectorized(str, end);
+  return FirstNon8BitUnvectorized(str, end);
 }

 void
-nsTextFragment::SetTo(const PRUnichar* aBuffer, PRInt32 aLength)
+nsTextFragment::SetTo(const PRUnichar* aBuffer, PRInt32 aLength, PRBool aUpdateBidi)
 {
  ReleaseText();

@ -268,15 +276,21 @@ nsTextFragment::SetTo(const PRUnichar* aBuffer, PRInt32 aLength)
  }

  // See if we need to store the data in ucs2 or not
-  PRBool need2 = !Is8Bit(ucp, uend);
+  PRInt32 first16bit = FirstNon8Bit(ucp, uend);

-  if (need2) {
+  if (first16bit != -1) { // aBuffer contains no non-8bit character
    // Use ucs2 storage because we have to
    m2b = (PRUnichar *)nsMemory::Clone(aBuffer,
                                       aLength * sizeof(PRUnichar));
    if (!m2b) {
      return;
    }
+
+    mState.mIs2b = PR_TRUE;
+    if (aUpdateBidi) {
+      UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
+    }
+
  } else {
    // Use 1 byte storage because we can
    char* buff = (char *)nsMemory::Alloc(aLength * sizeof(char));
@ -288,11 +302,11 @@ nsTextFragment::SetTo(const PRUnichar* aBuffer, PRInt32 aLength)
    LossyConvertEncoding16to8 converter(buff);
    copy_string(aBuffer, aBuffer+aLength, converter);
    m1b = buff;
+    mState.mIs2b = PR_FALSE;
  }

  // Setup our fields
  mState.mInHeap = PR_TRUE;
-  mState.mIs2b = need2;
  mState.mLength = aLength;
 }

@ -323,12 +337,12 @@ nsTextFragment::CopyTo(PRUnichar *aDest, PRInt32 aOffset, PRInt32 aCount)
 }

 void
-nsTextFragment::Append(const PRUnichar* aBuffer, PRUint32 aLength)
+nsTextFragment::Append(const PRUnichar* aBuffer, PRUint32 aLength, PRBool aUpdateBidi)
 {
  // This is a common case because some callsites create a textnode
  // with a value by creating the node and then calling AppendData.
  if (mState.mLength == 0) {
-    SetTo(aBuffer, aLength);
+    SetTo(aBuffer, aLength, aUpdateBidi);

    return;
  }
@ -341,17 +355,22 @@ nsTextFragment::Append(const PRUnichar* aBuffer, PRUint32 aLength)
    if (!buff) {
      return;
    }
-    
+
    memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(PRUnichar));
    mState.mLength += aLength;
    m2b = buff;

+    if (aUpdateBidi) {
+      UpdateBidiFlag(aBuffer, aLength);
+    }
+
    return;
  }

  // Current string is a 1-byte string, check if the new data fits in one byte too.
+  PRInt32 first16bit = FirstNon8Bit(aBuffer, aBuffer + aLength);

-  if (!Is8Bit(aBuffer, aBuffer + aLength)) {
+  if (first16bit != -1) { // aBuffer contains no non-8bit character
    // The old data was 1-byte, but the new is not so we have to expand it
    // all to 2-byte
    PRUnichar* buff = (PRUnichar*)nsMemory::Alloc((mState.mLength + aLength) *
@ -365,7 +384,6 @@ nsTextFragment::Append(const PRUnichar* aBuffer, PRUint32 aLength)
    copy_string(m1b, m1b+mState.mLength, converter);

    memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(PRUnichar));
-
    mState.mLength += aLength;
    mState.mIs2b = PR_TRUE;

@ -376,6 +394,10 @@ nsTextFragment::Append(const PRUnichar* aBuffer, PRUint32 aLength)

    mState.mInHeap = PR_TRUE;

+    if (aUpdateBidi) {
+      UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
+    }
+
    return;
  }

--- a/content/base/src/nsTextFragment.h
+++ b/content/base/src/nsTextFragment.h
@ -1,4 +1,3 @@
-/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
@ -114,8 +113,8 @@ public:

  /**
   * Return PR_TRUE if this fragment contains Bidi text
-   * For performance reasons this flag is not set automatically, but
-   * requires an explicit call to UpdateBidiFlag()
+   * For performance reasons this flag is only set if explicitely requested (by
+   * setting the aUpdateBidi argument on SetTo or Append to true).
   */
  PRBool IsBidi() const
  {
@ -156,14 +155,17 @@ public:

  /**
   * Change the contents of this fragment to be a copy of the given
-   * buffer.
+   * buffer. If aUpdateBidi is true, contents of the fragment will be scanned,
+   * and mState.mIsBidi will be turned on if it includes any Bidi characters.
   */
-  void SetTo(const PRUnichar* aBuffer, PRInt32 aLength);
+  void SetTo(const PRUnichar* aBuffer, PRInt32 aLength, PRBool aUpdateBidi);

  /**
-   * Append aData to the end of this fragment.
+   * Append aData to the end of this fragment. If aUpdateBidi is true, contents
+   * of the fragment will be scanned, and mState.mIsBidi will be turned on if
+   * it includes any Bidi characters.
   */
-  void Append(const PRUnichar* aBuffer, PRUint32 aLength);
+  void Append(const PRUnichar* aBuffer, PRUint32 aLength, PRBool aUpdateBidi);

  /**
   * Append the contents of this string fragment to aString
@ -208,12 +210,6 @@ public:
    return mState.mIs2b ? m2b[aIndex] : static_cast<unsigned char>(m1b[aIndex]);
  }

-  /**
-   * Scan the contents of the fragment and turn on mState.mIsBidi if it
-   * includes any Bidi characters.
-   */
-  void UpdateBidiFlag(const PRUnichar* aBuffer, PRUint32 aLength);
-
  struct FragmentBits {
    // PRUint32 to ensure that the values are unsigned, because we
    // want 0/1, not 0/-1!
@ -240,6 +236,12 @@ public:
 private:
  void ReleaseText();

+  /**
+   * Scan the contents of the fragment and turn on mState.mIsBidi if it
+   * includes any Bidi characters.
+   */
+  void UpdateBidiFlag(const PRUnichar* aBuffer, PRUint32 aLength);
+ 
  union {
    PRUnichar *m2b;
    const char *m1b; // This is const since it can point to shared data
--- a/content/base/src/nsTextFragmentSSE2.cpp
+++ b/content/base/src/nsTextFragmentSSE2.cpp
@ -15,8 +15,8 @@ is_zero (__m128i x)
    _mm_movemask_epi8(_mm_cmpeq_epi8(x, _mm_setzero_si128())) == 0xffff;
 }

-PRBool
-Is8Bit(const PRUnichar *str, const PRUnichar *end)
+PRInt32
+FirstNon8Bit(const PRUnichar *str, const PRUnichar *end)
 {
  const PRUint32 numUnicharsPerVector = 8;

@ -39,7 +39,7 @@ Is8Bit(const PRUnichar *str, const PRUnichar *end)
    NS_MIN(len, PRInt32(((-NS_PTR_TO_INT32(str)) & 0xf) / sizeof(PRUnichar)));
  for (; i < alignLen; i++) {
    if (str[i] > 255)
-      return PR_FALSE;
+      return i;
  }

  // Check one XMM register (16 bytes) at a time.
@ -48,7 +48,7 @@ Is8Bit(const PRUnichar *str, const PRUnichar *end)
  for(; i < vectWalkEnd; i += numUnicharsPerVector) {
    const __m128i vect = *reinterpret_cast<const __m128i*>(str + i);
    if (!is_zero(_mm_and_si128(vect, vectmask)))
-      return PR_FALSE;
+      return i;
  }

  // Check one word at a time.
@ -56,17 +56,17 @@ Is8Bit(const PRUnichar *str, const PRUnichar *end)
  for(; i < wordWalkEnd; i += numUnicharsPerWord) {
    const size_t word = *reinterpret_cast<const size_t*>(str + i);
    if (word & mask)
-      return PR_FALSE;
+      return i;
  }

  // Take care of the remainder one character at a time.
  for (; i < len; i++) {
    if (str[i] > 255) {
-      return PR_FALSE;
+      return i;
    }
  }

-  return PR_TRUE;
+  return -1;
 }

 } // namespace SSE2
--- a/content/base/test/Makefile.in
+++ b/content/base/test/Makefile.in
@ -506,6 +506,9 @@ _TEST_FILES2 = \
 		test_bug675121.html \
 		file_bug675121.sjs \
 		test_bug654352.html \
+		test_bug682592.html \
+		bug682592-subframe.html \
+		bug682592-subframe-ref.html \
 		$(NULL)

 _CHROME_FILES =	\
--- a/content/base/test/bug682592-subframe-ref.html
+++ b/content/base/test/bug682592-subframe-ref.html
@ -0,0 +1,11 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >
+    <title></title>
+</head>
+<body>
+<p id="content"></p>
+</body>
+</html>
+
--- a/content/base/test/bug682592-subframe.html
+++ b/content/base/test/bug682592-subframe.html
@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >
+    <title></title>
+</head>
+<body>
+<p id="content"></p>
+</body>
+</html>
--- a/content/base/test/test_bug682592.html
+++ b/content/base/test/test_bug682592.html
@ -0,0 +1,170 @@
+<!DOCTYPE html>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=682592
+-->
+<head>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >
+    <title>Test for bug 682592</title>
+    <script type="text/javascript" src="/tests/SimpleTest/WindowSnapshot.js"></script>
+    <script type="text/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
+    <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" />
+</head>
+<body>
+<p id="display"></p>
+<div id="content">
+<iframe id="iframe-ref" src="bug682592-subframe-ref.html"></iframe>
+<iframe id="iframe-test"></iframe>
+</div>
+<pre id="test">
+<script class="testbody" type="text/javascript;version=1.7">
+/** Test for Bug 682592 **/
+
+/* 
+   We want to check that bidi is detected correctly. So, we have a reference
+   document where ltr is set explicitely with <bdo> element. Then, we compare
+   it with test document.
+
+   In mozilla, once bidi has been detected in a document, document always
+   consider it's in bidi mode. So, if one fragment enables bidi correctly, and
+   we create or update a fragment in the same document, that operation may not
+   enable bidi by itself, but it will not be detected. So, we need to have new
+   document for each test.
+
+   So, instead of many diferent reftests, this mochitest implements a
+   reftest-like. It creates reference text fragments in reference iframe, test
+   text fragments in test iframe, and compare the documents. Then, it reloads
+   test iframe. Reference iframe does not need to be reloaded between tests.
+   It's ok (and maybe, desired) to keep bidi always enabled in that document. 
+*/
+
+SimpleTest.waitForExplicitFinish();
+var refFrame = document.getElementById("iframe-ref")
+var testFrame = document.getElementById("iframe-test");
+
+refFrame.addEventListener("load", function() {
+  testFrame.addEventListener("load", function() {
+    try {
+      tests.next();
+      ok(compareSnapshots(snapshotWindow(testFrame.contentWindow), 
+                          snapshotWindow(refFrame.contentWindow), true)[0], 
+         "bidi is not detected correctly");
+
+      testFrame.contentWindow.location.reload();
+    } catch (err if err instanceof StopIteration) {
+      SimpleTest.finish();
+    }
+  }, false);
+  testFrame.src = "bug682592-subframe.html"
+}, false);
+
+var rtl = "עִבְרִית";
+var non8bit =  "ʃ";
+var is8bit = "a";
+
+// concats aStr aNumber of times
+function strMult(aStr, aNumber) {
+  if (aNumber === 0) {
+    return "";
+  }
+  return strMult(aStr, aNumber - 1) + aStr;
+}
+
+function runTests () {
+  var ltr = "", prefix = null;
+  var refContainer = refFrame.contentDocument.getElementById('content');
+  var testContainer, textNode;
+  var i = 0;
+
+  // 8bit chars + bidi
+  for (i = 0; i <= 16; i++) {
+    ltr = strMult(is8bit, i);
+    refContainer.innerHTML = ltr + '<bdo dir="rtl">' + rtl + '</bdo>';
+    testContainer = testFrame.contentDocument.getElementById('content');
+    testContainer.innerHTML = ltr + rtl;
+    yield;
+  }
+
+  // non-8bit char + 8bit chars + bidi
+  for (i = 0; i <= 16; i++) {
+    ltr = non8bit + strMult(is8bit, i);
+    refContainer.innerHTML = ltr + '<bdo dir="rtl">' + rtl + '</bdo>';
+    testContainer = testFrame.contentDocument.getElementById('content');
+    testContainer.innerHTML = ltr + rtl;
+    yield;
+  }
+
+  // appendData
+  for (i = 0; i <= 16; i++) {
+    ltr = strMult(is8bit, i);
+    refContainer.innerHTML = ltr + '<bdo dir="rtl">' + rtl + '</bdo>';
+    testContainer = testFrame.contentDocument.getElementById('content');
+    textNode = document.createTextNode("");
+    testContainer.appendChild(textNode);
+    textNode.appendData(ltr + rtl);
+    yield;
+  }
+
+  for (i = 0; i <= 16; i++) {
+    ltr = non8bit + strMult(is8bit, i);
+    refContainer.innerHTML = ltr + '<bdo dir="rtl">' + rtl + '</bdo>';
+    testContainer = testFrame.contentDocument.getElementById('content');
+    textNode = document.createTextNode("");
+    testContainer.appendChild(textNode);
+    textNode.appendData(ltr + rtl);
+    yield;
+  }
+
+  // appendData with 8bit prefix
+  for (i = 0; i <= 16; i++) {
+    prefix = is8bit;
+    ltr = strMult(is8bit, i);
+    refContainer.innerHTML = prefix + ltr + '<bdo dir="rtl">' + rtl + '</bdo>';
+    testContainer = testFrame.contentDocument.getElementById('content');
+    textNode = document.createTextNode(prefix);
+    testContainer.appendChild(textNode);
+    textNode.appendData(ltr + rtl);
+    yield;
+  }
+
+  for (i = 0; i <= 16; i++) {
+    prefix = is8bit;
+    ltr = non8bit + strMult(is8bit, i);
+    refContainer.innerHTML = prefix + ltr + '<bdo dir="rtl">' + rtl + '</bdo>';
+    testContainer = testFrame.contentDocument.getElementById('content');
+    textNode = document.createTextNode(prefix);
+    testContainer.appendChild(textNode);
+    textNode.appendData(ltr + rtl);
+    yield;
+  }
+
+  // appendData with non-8bit prefix
+  for (i = 0; i <= 16; i++) {
+    prefix = non8bit;
+    ltr = strMult(is8bit, i);
+    refContainer.innerHTML = prefix + ltr + '<bdo dir="rtl">' + rtl + '</bdo>';
+    testContainer = testFrame.contentDocument.getElementById('content');
+    textNode = document.createTextNode(prefix);
+    testContainer.appendChild(textNode);
+    textNode.appendData(ltr + rtl);
+    yield;
+  }
+
+  for (i = 0; i <= 16; i++) {
+    prefix = non8bit;
+    ltr = non8bit + strMult(is8bit, i);
+    refContainer.innerHTML = prefix + ltr + '<bdo dir="rtl">' + rtl + '</bdo>';
+    testContainer = testFrame.contentDocument.getElementById('content');
+    textNode = document.createTextNode(prefix);
+    testContainer.appendChild(textNode);
+    textNode.appendData(ltr + rtl);
+    yield;
+  }
+};
+
+var tests = runTests();
+
+</script>
+</pre>
+</body>
+</html>