зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1811184 - Move FirstNon8Bit vector implementation to xsimd r=hsivonen
xsimd does not support ppc yet, so leave the VMX implementation untouched. Differential Revision: https://phabricator.services.mozilla.com/D167225
This commit is contained in:
Родитель
160324ff1c
Коммит
6b485d174d
|
@ -580,6 +580,7 @@ LOCAL_INCLUDES += [
|
|||
"/netwerk/url-classifier",
|
||||
"/parser/htmlparser",
|
||||
"/security/manager/ssl",
|
||||
"/third_party/xsimd/include",
|
||||
"/widget",
|
||||
"/xpcom/ds",
|
||||
]
|
||||
|
|
|
@ -158,10 +158,8 @@ static inline int32_t FirstNon8BitUnvectorized(const char16_t* str,
|
|||
return -1;
|
||||
}
|
||||
|
||||
#ifdef MOZILLA_MAY_SUPPORT_SSE2
|
||||
namespace mozilla::SSE2 {
|
||||
int32_t FirstNon8Bit(const char16_t* str, const char16_t* end);
|
||||
} // namespace mozilla::SSE2
|
||||
#if defined(MOZILLA_MAY_SUPPORT_SSE2)
|
||||
# include "nsTextFragmentGeneric.h"
|
||||
#endif
|
||||
|
||||
#ifdef __powerpc__
|
||||
|
@ -182,7 +180,7 @@ int32_t FirstNon8Bit(const char16_t* str, const char16_t* end);
|
|||
static inline int32_t FirstNon8Bit(const char16_t* str, const char16_t* end) {
|
||||
#ifdef MOZILLA_MAY_SUPPORT_SSE2
|
||||
if (mozilla::supports_sse2()) {
|
||||
return mozilla::SSE2::FirstNon8Bit(str, end);
|
||||
return mozilla::FirstNon8Bit<xsimd::sse2>(str, end);
|
||||
}
|
||||
#elif defined(__powerpc__)
|
||||
if (mozilla::supports_vmx()) {
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nscore.h"
|
||||
#include "nsTextFragmentImpl.h"
|
||||
#include <algorithm>
|
||||
#include <xsimd/xsimd.hpp>
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
template <class Arch>
|
||||
int32_t FirstNon8Bit(const char16_t* str, const char16_t* end) {
|
||||
const uint32_t numUnicharsPerVector = xsimd::batch<int16_t, Arch>::size;
|
||||
using p = Non8BitParameters<sizeof(size_t)>;
|
||||
const size_t mask = p::mask();
|
||||
const uint32_t numUnicharsPerWord = p::numUnicharsPerWord();
|
||||
const int32_t len = end - str;
|
||||
int32_t i = 0;
|
||||
|
||||
// Align ourselves to the Arch boundary
|
||||
int32_t alignLen = std::min(
|
||||
len, int32_t(((-NS_PTR_TO_INT32(str)) & (Arch::alignment() - 1)) /
|
||||
sizeof(char16_t)));
|
||||
for (; i < alignLen; i++) {
|
||||
if (str[i] > 255) return i;
|
||||
}
|
||||
|
||||
// Check one batch at a time.
|
||||
const int32_t vectWalkEnd =
|
||||
((len - i) / numUnicharsPerVector) * numUnicharsPerVector;
|
||||
const uint16_t shortMask = 0xff00;
|
||||
xsimd::batch<int16_t, Arch> vectmask(static_cast<int16_t>(shortMask));
|
||||
for (; i < vectWalkEnd; i += numUnicharsPerVector) {
|
||||
const auto vect = xsimd::batch<int16_t, Arch>::load_aligned(str + i);
|
||||
if (xsimd::any((vect & vectmask) != 0)) return i;
|
||||
}
|
||||
|
||||
// Check one word at a time.
|
||||
const int32_t wordWalkEnd =
|
||||
((len - i) / numUnicharsPerWord) * numUnicharsPerWord;
|
||||
for (; i < wordWalkEnd; i += numUnicharsPerWord) {
|
||||
const size_t word = *reinterpret_cast<const size_t*>(str + i);
|
||||
if (word & mask) return i;
|
||||
}
|
||||
|
||||
// Take care of the remainder one character at a time.
|
||||
for (; i < len; i++) {
|
||||
if (str[i] > 255) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
} // namespace mozilla
|
|
@ -1,65 +1,10 @@
|
|||
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* this source code form is subject to the terms of the mozilla public
|
||||
* license, v. 2.0. if a copy of the mpl was not distributed with this file,
|
||||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
// This file should only be compiled if you're on x86 or x86_64. Additionally,
|
||||
// you'll need to compile this file with -msse2 if you're using gcc.
|
||||
#include "nsTextFragmentGeneric.h"
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include "nscore.h"
|
||||
#include "nsTextFragmentImpl.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace mozilla::SSE2 {
|
||||
|
||||
static inline bool is_zero(__m128i x) {
|
||||
return _mm_movemask_epi8(_mm_cmpeq_epi8(x, _mm_setzero_si128())) == 0xffff;
|
||||
}
|
||||
|
||||
int32_t FirstNon8Bit(const char16_t* str, const char16_t* end) {
|
||||
const uint32_t numUnicharsPerVector = 8;
|
||||
using p = Non8BitParameters<sizeof(size_t)>;
|
||||
const size_t mask = p::mask();
|
||||
const uint32_t numUnicharsPerWord = p::numUnicharsPerWord();
|
||||
const int32_t len = end - str;
|
||||
int32_t i = 0;
|
||||
|
||||
// Align ourselves to a 16-byte boundary, as required by _mm_load_si128
|
||||
// (i.e. MOVDQA).
|
||||
int32_t alignLen = std::min(
|
||||
len, int32_t(((-NS_PTR_TO_INT32(str)) & 0xf) / sizeof(char16_t)));
|
||||
for (; i < alignLen; i++) {
|
||||
if (str[i] > 255) return i;
|
||||
}
|
||||
|
||||
// Check one XMM register (16 bytes) at a time.
|
||||
const int32_t vectWalkEnd =
|
||||
((len - i) / numUnicharsPerVector) * numUnicharsPerVector;
|
||||
const uint16_t shortMask = 0xff00;
|
||||
__m128i vectmask = _mm_set1_epi16(static_cast<int16_t>(shortMask));
|
||||
for (; i < vectWalkEnd; i += numUnicharsPerVector) {
|
||||
const __m128i vect = *reinterpret_cast<const __m128i*>(str + i);
|
||||
if (!is_zero(_mm_and_si128(vect, vectmask))) return i;
|
||||
}
|
||||
|
||||
// Check one word at a time.
|
||||
const int32_t wordWalkEnd =
|
||||
((len - i) / numUnicharsPerWord) * numUnicharsPerWord;
|
||||
for (; i < wordWalkEnd; i += numUnicharsPerWord) {
|
||||
const size_t word = *reinterpret_cast<const size_t*>(str + i);
|
||||
if (word & mask) return i;
|
||||
}
|
||||
|
||||
// Take care of the remainder one character at a time.
|
||||
for (; i < len; i++) {
|
||||
if (str[i] > 255) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
} // namespace mozilla::SSE2
|
||||
namespace mozilla {
|
||||
template int32_t FirstNon8Bit<xsimd::sse2>(const char16_t*, const char16_t*);
|
||||
} // namespace mozilla
|
||||
|
|
Загрузка…
Ссылка в новой задаче