From a5cac4d5973db8acefe43040991c505c08d5b5d9 Mon Sep 17 00:00:00 2001
From: Mihai Alexandru Michis <malexandru@mozilla.com>
Date: Thu, 24 Oct 2019 19:41:56 +0300
Subject: [PATCH] Backed out changeset 0063a5b2f5b3 (bug 1589786) for causing
 bustages in TestDLLBlocklist.obj and sandboxBroker.i_o CLOSED TREE

---
 toolkit/components/find/nsFind.cpp            | 81 +++++--------------
 toolkit/components/find/nsFind.h              | 12 +--
 .../windowcreator/test/test_nsFind.html       |  6 --
 xpcom/string/nsCharTraits.h                   |  3 -
 4 files changed, 20 insertions(+), 82 deletions(-)

diff --git a/toolkit/components/find/nsFind.cpp b/toolkit/components/find/nsFind.cpp
index 7a3e4f8fd00f..769519544aa2 100644
--- a/toolkit/components/find/nsFind.cpp
+++ b/toolkit/components/find/nsFind.cpp
@@ -415,45 +415,7 @@ nsFind::SetEntireWord(bool aEntireWord) {
 // are intermixed in the dom. We don't have string classes which can deal with
 // intermixed strings, so all the handling is done explicitly here.
 
-char32_t nsFind::DecodeChar(const char16_t* t2b, int32_t* index) const {
-  char32_t c = t2b[*index];
-  if (mFindBackward) {
-    if (*index >= 1 && IS_SURROGATE_PAIR(t2b[*index - 1], t2b[*index])) {
-      c = SURROGATE_TO_UCS4(t2b[*index - 1], t2b[*index]);
-      (*index)--;
-    }
-  } else {
-    if (IS_SURROGATE_PAIR(t2b[*index], t2b[*index + 1])) {
-      c = SURROGATE_TO_UCS4(t2b[*index], t2b[*index + 1]);
-      (*index)++;
-    }
-  }
-  return c;
-}
-
-bool nsFind::BreakInBetween(char32_t x, char32_t y) const {
-  char16_t x16[2], y16[2];
-  int32_t x16len, y16len;
-  if (IS_IN_BMP(x)) {
-    x16[0] = (char16_t)x;
-    x16len = 1;
-  } else {
-    x16[0] = H_SURROGATE(x);
-    x16[1] = L_SURROGATE(x);
-    x16len = 2;
-  }
-  if (IS_IN_BMP(y)) {
-    y16[0] = (char16_t)y;
-    y16len = 1;
-  } else {
-    y16[0] = H_SURROGATE(y);
-    y16[1] = L_SURROGATE(y);
-    y16len = 2;
-  }
-  return mWordBreaker->BreakInBetween(x16, x16len, y16, y16len);
-}
-
-char32_t nsFind::PeekNextChar(State& aState) const {
+char16_t nsFind::PeekNextChar(State& aState) const {
   // We need to restore the necessary state before this function returns.
   StateRestorer restorer(aState);
 
@@ -475,7 +437,7 @@ char32_t nsFind::PeekNextChar(State& aState) const {
   MOZ_ASSERT(len);
 
   int32_t index = mFindBackward ? len - 1 : 0;
-  return t1b ? CHAR_TO_UNICHAR(t1b[index]) : DecodeChar(t2b, &index);
+  return t1b ? CHAR_TO_UNICHAR(t1b[index]) : t2b[index];
 }
 
 #define NBSP_CHARCODE (CHAR_TO_UNICHAR(160))
@@ -556,10 +518,10 @@ nsFind::Find(const nsAString& aPatText, nsRange* aSearchRange,
   nsINode* endNode = aEndPoint->GetEndContainer();
   uint32_t endOffset = aEndPoint->EndOffset();
 
-  char32_t c = 0;
-  char32_t patc = 0;
-  char32_t prevChar = 0;
-  char32_t prevCharInMatch = 0;
+  char16_t c = 0;
+  char16_t patc = 0;
+  char16_t prevChar = 0;
+  char16_t prevCharInMatch = 0;
 
   State state(mFindBackward, *root, *aStartPoint);
   Text* current = nullptr;
@@ -665,13 +627,12 @@ nsFind::Find(const nsAString& aPatText, nsRange* aSearchRange,
     // Save the previous character for word boundary detection
     prevChar = c;
     // The two characters we'll be comparing:
-    c = (t2b ? DecodeChar(t2b, &findex) : CHAR_TO_UNICHAR(t1b[findex]));
-    patc = DecodeChar(patStr, &pindex);
+    c = (t2b ? t2b[findex] : CHAR_TO_UNICHAR(t1b[findex]));
+    patc = patStr[pindex];
 
-    DEBUG_FIND_PRINTF(
-        "Comparing '%c'=%#x to '%c'=%#x (%d of %d), findex=%d%s\n", (char)c,
-        (int)c, (char)patc, (int)patc, pindex, patLen, findex,
-        inWhitespace ? " (inWhitespace)" : "");
+    DEBUG_FIND_PRINTF("Comparing '%c'=%x to '%c' (%d of %d), findex=%d%s\n",
+                      (char)c, (int)c, patc, pindex, patLen, findex,
+                      inWhitespace ? " (inWhitespace)" : "");
 
     // Do we need to go back to non-whitespace mode? If inWhitespace, then this
     // space in the pat str has already matched at least one space in the
@@ -687,7 +648,7 @@ nsFind::Find(const nsAString& aPatText, nsRange* aSearchRange,
         NS_ASSERTION(false, "Missed a whitespace match");
       }
 #endif
-      patc = DecodeChar(patStr, &pindex);
+      patc = patStr[pindex];
     }
     if (!inWhitespace && IsSpace(patc)) {
       inWhitespace = true;
@@ -741,7 +702,7 @@ nsFind::Find(const nsAString& aPatText, nsRange* aSearchRange,
     wordBreakPrev = false;
     if (mWordBreaker) {
       if (prevChar == NBSP_CHARCODE) prevChar = CHAR_TO_UNICHAR(' ');
-      wordBreakPrev = BreakInBetween(prevChar, c);
+      wordBreakPrev = mWordBreaker->BreakInBetween(&prevChar, 1, &c, 1);
     }
 
     // Compare. Match if we're in whitespace and c is whitespace, or if the
@@ -764,7 +725,6 @@ nsFind::Find(const nsAString& aPatText, nsRange* aSearchRange,
       if (!matchAnchorNode) {
         matchAnchorNode = state.GetCurrentNode();
         matchAnchorOffset = findex;
-        if (!IS_IN_BMP(c)) matchAnchorOffset -= incr;
       }
 
       // Are we done?
@@ -779,20 +739,17 @@ nsFind::Find(const nsAString& aPatText, nsRange* aSearchRange,
 
           char16_t nextChar;
           // If still in array boundaries, get nextChar.
-          if (mFindBackward ? (nextfindex >= 0) : (nextfindex < fragLen)) {
-            if (t2b)
-              nextChar = DecodeChar(t2b, &nextfindex);
-            else
-              nextChar = CHAR_TO_UNICHAR(t1b[nextfindex]);
-          } else {
-            // Get next character from the next node.
+          if (mFindBackward ? (nextfindex >= 0) : (nextfindex < fragLen))
+            nextChar =
+                (t2b ? t2b[nextfindex] : CHAR_TO_UNICHAR(t1b[nextfindex]));
+          // Get next character from the next node.
+          else
             nextChar = PeekNextChar(state);
-          }
 
           if (nextChar == NBSP_CHARCODE) nextChar = CHAR_TO_UNICHAR(' ');
 
           // If a word break isn't there when it needs to be, reset search.
-          if (!BreakInBetween(c, nextChar)) {
+          if (!mWordBreaker->BreakInBetween(&c, 1, &nextChar, 1)) {
             matchAnchorNode = nullptr;
             continue;
           }
diff --git a/toolkit/components/find/nsFind.h b/toolkit/components/find/nsFind.h
index 76cc4537d717..d926f89a7e59 100644
--- a/toolkit/components/find/nsFind.h
+++ b/toolkit/components/find/nsFind.h
@@ -50,20 +50,10 @@ class nsFind : public nsIFind {
   struct State;
   class StateRestorer;
 
-  // Extract a character from a string, handling surrogate pairs and
-  // incrementing the index if a surrogate pair is encountered
-  char32_t DecodeChar(const char16_t* t2b, int32_t* index) const;
-
-  // Determine if a line break can occur between two characters
-  //
-  // This could be improved because some languages require more context than two
-  // characters to determine where line breaks can occur
-  bool BreakInBetween(char32_t x, char32_t y) const;
-
   // Get the first character from the next node (last if mFindBackward).
   //
   // This will mutate the state, but then restore it afterwards.
-  char32_t PeekNextChar(State&) const;
+  char16_t PeekNextChar(State&) const;
 };
 
 #endif  // nsFind_h__
diff --git a/toolkit/components/windowcreator/test/test_nsFind.html b/toolkit/components/windowcreator/test/test_nsFind.html
index 660164002144..a0dea3c7920a 100644
--- a/toolkit/components/windowcreator/test/test_nsFind.html
+++ b/toolkit/components/windowcreator/test/test_nsFind.html
@@ -2,7 +2,6 @@
 <html>
 <!--
 https://bugzilla.mozilla.org/show_bug.cgi?id=450048
-https://bugzilla.mozilla.org/show_bug.cgi?id=1589786
 -->
 <head>
   <meta charset="UTF-8">
@@ -51,10 +50,6 @@ async function runTests() {
   retRange = rf.Find(searchValue, searchRange, startPt, endPt);
   ok(retRange, "\"" + searchValue + "\" not found (not caseSensitive)");
 
-  searchValue = "𐐸𐐯𐑊𐐬";
-  retRange = rf.Find(searchValue, searchRange, startPt, endPt);
-  ok(retRange, "\"" + searchValue + "\" not found (not caseSensitive)");
-
   rf.caseSensitive = true;
 
   // searchValue = "TexT";
@@ -257,7 +252,6 @@ async function runTests() {
 <p id="quotes">"straight" and &ldquo;curly&rdquo; and &lsquo;didn't&rsquo; and 'doesn&rsquo;t'</p>
 <p id="nullcharsnative">native null&#0;</p>
 <p id="nullcharsinjected"></p>
-<p id="deseret">𐐐𐐯𐑊𐐬 𐐶𐐯𐑉𐑊𐐼!</p>
 <div id="content" style="display: none">
 
 </div>
diff --git a/xpcom/string/nsCharTraits.h b/xpcom/string/nsCharTraits.h
index b6f3cc368481..88d242ea12bc 100644
--- a/xpcom/string/nsCharTraits.h
+++ b/xpcom/string/nsCharTraits.h
@@ -50,9 +50,6 @@
 #define NS_IS_LOW_SURROGATE(u) ((uint32_t(u) & 0xFFFFFC00) == 0xDC00)
 // Faster than testing NS_IS_HIGH_SURROGATE || NS_IS_LOW_SURROGATE
 #define IS_SURROGATE(u) ((uint32_t(u) & 0xFFFFF800) == 0xD800)
-// Easier to type than NS_IS_HIGH_SURROGATE && NS_IS_LOW_SURROGATE
-#define IS_SURROGATE_PAIR(h, l) \
-  (NS_IS_HIGH_SURROGATE(h) && NS_IS_LOW_SURROGATE(l))
 
 // Everything else is not a surrogate: 0x000 -- 0xD7FF, 0xE000 -- 0xFFFF