Bug 1776013 - Consume mfbt SIMD for String builtins r=iain

Differential Revision: https://phabricator.services.mozilla.com/D150068
2022-07-14 00:39:23 +00:00 · 2022-07-14 00:39:23 +00:00 · 758dbfe527
--- a/js/src/builtin/String.cpp
+++ b/js/src/builtin/String.cpp
@ -14,6 +14,7 @@
 #endif
 #include "mozilla/PodOperations.h"
 #include "mozilla/Range.h"
+#include "mozilla/SIMD.h"
 #include "mozilla/TextUtils.h"

 #include <algorithm>
@ -76,6 +77,7 @@ using mozilla::IsAsciiHexDigit;
 using mozilla::IsNaN;
 using mozilla::PodCopy;
 using mozilla::RangedPtr;
+using mozilla::SIMD;

 using JS::AutoCheckCannotGC;
 using JS::AutoStableStringChars;
@ -1718,7 +1720,7 @@ struct MemCmp {
  using Extent = uint32_t;
  static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*,
                                                uint32_t patLen) {
-    return (patLen - 1) * sizeof(PatChar);
+    return (patLen - 2) * sizeof(PatChar);
  }
  static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,
                                      Extent extent) {
@ -1745,78 +1747,35 @@ struct ManualCmp {
  }
 };

-template <typename TextChar, typename PatChar>
-static const TextChar* FirstCharMatcherUnrolled(const TextChar* text,
-                                                uint32_t n, const PatChar pat) {
-  const TextChar* textend = text + n;
-  const TextChar* t = text;
-
-  switch ((textend - t) & 7) {
-    case 0:
-      if (*t++ == pat) return t - 1;
-      [[fallthrough]];
-    case 7:
-      if (*t++ == pat) return t - 1;
-      [[fallthrough]];
-    case 6:
-      if (*t++ == pat) return t - 1;
-      [[fallthrough]];
-    case 5:
-      if (*t++ == pat) return t - 1;
-      [[fallthrough]];
-    case 4:
-      if (*t++ == pat) return t - 1;
-      [[fallthrough]];
-    case 3:
-      if (*t++ == pat) return t - 1;
-      [[fallthrough]];
-    case 2:
-      if (*t++ == pat) return t - 1;
-      [[fallthrough]];
-    case 1:
-      if (*t++ == pat) return t - 1;
-  }
-  while (textend != t) {
-    if (t[0] == pat) return t;
-    if (t[1] == pat) return t + 1;
-    if (t[2] == pat) return t + 2;
-    if (t[3] == pat) return t + 3;
-    if (t[4] == pat) return t + 4;
-    if (t[5] == pat) return t + 5;
-    if (t[6] == pat) return t + 6;
-    if (t[7] == pat) return t + 7;
-    t += 8;
-  }
-  return nullptr;
-}
-
-static const char* FirstCharMatcher8bit(const char* text, uint32_t n,
-                                        const char pat) {
-  return reinterpret_cast<const char*>(memchr(text, pat, n));
-}
-
 template <class InnerMatch, typename TextChar, typename PatChar>
 static int Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat,
                   uint32_t patlen) {
-  MOZ_ASSERT(patlen > 0);
-
-  if (sizeof(TextChar) == 1 && sizeof(PatChar) > 1 && pat[0] > 0xff) {
-    return -1;
-  }
+  MOZ_ASSERT(patlen > 1);

  const typename InnerMatch::Extent extent =
      InnerMatch::computeExtent(pat, patlen);

  uint32_t i = 0;
  uint32_t n = textlen - patlen + 1;
+
  while (i < n) {
    const TextChar* pos;

+    // This is a bit awkward. Consider the case where we're searching "abcdef"
+    // for "def". n will be 4, because we know in advance that the last place we
+    // can *start* a successful search will be at 'd'. However, if we just use n
+    // - i, then our first search will be looking through "abcd" for "de",
+    // because our memchr2xN functions search for two characters at a time. So
+    // we just have to compensate by adding 1. This will never exceed textlen
+    // because we know patlen is at least two.
+    size_t searchLen = n - i + 1;
    if (sizeof(TextChar) == 1) {
      MOZ_ASSERT(pat[0] <= 0xff);
-      pos = (TextChar*)FirstCharMatcher8bit((char*)text + i, n - i, pat[0]);
+      pos = (TextChar*)SIMD::memchr2x8((char*)text + i, pat[0], pat[1],
+                                       searchLen);
    } else {
-      pos = FirstCharMatcherUnrolled(text + i, n - i, char16_t(pat[0]));
+      pos = (TextChar*)SIMD::memchr2x16((char16_t*)(text + i), char16_t(pat[0]),
+                                        char16_t(pat[1]), searchLen);
    }

    if (pos == nullptr) {
@ -1824,7 +1783,9 @@ static int Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat,
    }

    i = static_cast<uint32_t>(pos - text);
-    if (InnerMatch::match(pat + 1, text + i + 1, extent)) {
+    const uint32_t inlineLookaheadChars = 2;
+    if (InnerMatch::match(pat + inlineLookaheadChars,
+                          text + i + inlineLookaheadChars, extent)) {
      return i;
    }

@ -1843,22 +1804,26 @@ static MOZ_ALWAYS_INLINE int StringMatch(const TextChar* text, uint32_t textLen,
    return -1;
  }

-#if defined(__i386__) || defined(_M_IX86) || defined(__i386)
-  /*
-   * Given enough registers, the unrolled loop below is faster than the
-   * following loop. 32-bit x86 does not have enough registers.
-   */
-  if (patLen == 1) {
-    const PatChar p0 = *pat;
-    const TextChar* end = text + textLen;
-    for (const TextChar* c = text; c != end; ++c) {
-      if (*c == p0) {
-        return c - text;
-      }
-    }
+  if (sizeof(TextChar) == 1 && sizeof(PatChar) > 1 && pat[0] > 0xff) {
    return -1;
  }
-#endif
+
+  if (patLen == 1) {
+    const TextChar* pos;
+    if (sizeof(TextChar) == 1) {
+      MOZ_ASSERT(pat[0] <= 0xff);
+      pos = (TextChar*)SIMD::memchr8((char*)text, pat[0], textLen);
+    } else {
+      pos =
+          (TextChar*)SIMD::memchr16((char16_t*)text, char16_t(pat[0]), textLen);
+    }
+
+    if (pos == nullptr) {
+      return -1;
+    }
+
+    return pos - text;
+  }

  /*
   * If the text or pattern string is short, BMH will be more expensive than