зеркало из https://github.com/mozilla/gecko-dev.git
Bug 585538 - Use SIMD UTF8 to UTF16 code on Linux 32-bit. r=khuey, a2.0=bsmedberg
--HG-- extra : rebase_source : 21031b1e8366c00a9c4745e69f206d2358a294e2
This commit is contained in:
Родитель
34c407b464
Коммит
29ff79fc83
|
@ -75,7 +75,22 @@ CPPSRCS += \
|
|||
nsUnicodeToCP1252.cpp \
|
||||
nsUnicodeToMacRoman.cpp \
|
||||
$(NULL)
|
||||
endif
|
||||
|
||||
# Are we targeting x86-32 or x86-64? If so, we want to include the SSE2
|
||||
# version of nsUTF8ToUnicodeSSE2.cpp.
|
||||
ifneq (,$(INTEL_ARCHITECTURE))
|
||||
CPPSRCS += nsUTF8ToUnicodeSSE2.cpp
|
||||
|
||||
# nsUTF8ToUnicodeSSE2.cpp uses SSE2 intrinsics, so we need to pass -msse2 if
|
||||
# we're using gcc. (See bug 585538 comment 12.)
|
||||
ifdef GNU_CC
|
||||
nsUTF8ToUnicodeSSE2.$(OBJ_SUFFIX): CXXFLAGS+=-msse2
|
||||
endif
|
||||
|
||||
ifdef SOLARIS_SUNPRO_CXX
|
||||
nsUTF8ToUnicodeSSE2.$(OBJ_SUFFIX): OS_CXXFLAGS += -xarch=sse2 -xO4
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(MOZ_WIDGET_TOOLKIT),os2)
|
||||
|
@ -96,12 +111,6 @@ endif
|
|||
endif
|
||||
endif
|
||||
|
||||
ifeq (86,$(findstring 86,$(OS_TEST)))
|
||||
ifdef SOLARIS_SUNPRO_CXX
|
||||
nsUTF8ToUnicode.$(OBJ_SUFFIX): OS_CXXFLAGS += -xarch=sse2 -xO4
|
||||
endif
|
||||
endif
|
||||
|
||||
EXTRA_DSO_LDOPTS = \
|
||||
../util/$(LIB_PREFIX)ucvutil_s.$(LIB_SUFFIX) \
|
||||
$(MOZ_UNICHARUTIL_LIBS) \
|
||||
|
|
|
@ -35,9 +35,6 @@
|
|||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
// So SSE.h will include emmintrin.h in an appropriate way:
|
||||
#define MOZILLA_SSE_INCLUDE_HEADER_FOR_SSE2
|
||||
|
||||
#include "nsUCSupport.h"
|
||||
#include "nsUTF8ToUnicode.h"
|
||||
#include "mozilla/SSE.h"
|
||||
|
@ -116,90 +113,7 @@ NS_IMETHODIMP nsUTF8ToUnicode::Reset()
|
|||
// number of bytes left in src and the number of unichars available in
|
||||
// dst.)
|
||||
|
||||
#ifdef MOZILLA_COMPILE_WITH_SSE2
|
||||
|
||||
static inline void
|
||||
Convert_ascii_run (const char *&src,
|
||||
PRUnichar *&dst,
|
||||
PRInt32 len)
|
||||
{
|
||||
if (len > 15 && mozilla::use_sse2()) {
|
||||
__m128i in, out1, out2;
|
||||
__m128d *outp1, *outp2;
|
||||
__m128i zeroes;
|
||||
PRUint32 offset;
|
||||
|
||||
// align input to 16 bytes
|
||||
while ((NS_PTR_TO_UINT32(src) & 15) && len > 0) {
|
||||
if (*src & 0x80U)
|
||||
return;
|
||||
*dst++ = (PRUnichar) *src++;
|
||||
len--;
|
||||
}
|
||||
|
||||
zeroes = _mm_setzero_si128();
|
||||
|
||||
offset = NS_PTR_TO_UINT32(dst) & 15;
|
||||
|
||||
// Note: all these inner loops have to break, not return; we need
|
||||
// to let the single-char loop below catch any leftover
|
||||
// byte-at-a-time ASCII chars, since this function must consume
|
||||
// all available ASCII chars before it returns
|
||||
|
||||
if (offset == 0) {
|
||||
while (len > 15) {
|
||||
in = _mm_load_si128((__m128i *) src);
|
||||
if (_mm_movemask_epi8(in))
|
||||
break;
|
||||
out1 = _mm_unpacklo_epi8(in, zeroes);
|
||||
out2 = _mm_unpackhi_epi8(in, zeroes);
|
||||
_mm_stream_si128((__m128i *) dst, out1);
|
||||
_mm_stream_si128((__m128i *) (dst + 8), out2);
|
||||
dst += 16;
|
||||
src += 16;
|
||||
len -= 16;
|
||||
}
|
||||
} else if (offset == 8) {
|
||||
outp1 = (__m128d *) &out1;
|
||||
outp2 = (__m128d *) &out2;
|
||||
while (len > 15) {
|
||||
in = _mm_load_si128((__m128i *) src);
|
||||
if (_mm_movemask_epi8(in))
|
||||
break;
|
||||
out1 = _mm_unpacklo_epi8(in, zeroes);
|
||||
out2 = _mm_unpackhi_epi8(in, zeroes);
|
||||
_mm_storel_epi64((__m128i *) dst, out1);
|
||||
_mm_storel_epi64((__m128i *) (dst + 8), out2);
|
||||
_mm_storeh_pd((double *) (dst + 4), *outp1);
|
||||
_mm_storeh_pd((double *) (dst + 12), *outp2);
|
||||
src += 16;
|
||||
dst += 16;
|
||||
len -= 16;
|
||||
}
|
||||
} else {
|
||||
while (len > 15) {
|
||||
in = _mm_load_si128((__m128i *) src);
|
||||
if (_mm_movemask_epi8(in))
|
||||
break;
|
||||
out1 = _mm_unpacklo_epi8(in, zeroes);
|
||||
out2 = _mm_unpackhi_epi8(in, zeroes);
|
||||
_mm_storeu_si128((__m128i *) dst, out1);
|
||||
_mm_storeu_si128((__m128i *) (dst + 8), out2);
|
||||
src += 16;
|
||||
dst += 16;
|
||||
len -= 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// finish off a byte at a time
|
||||
|
||||
while (len-- > 0 && (*src & 0x80U) == 0) {
|
||||
*dst++ = (PRUnichar) *src++;
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(__arm__) || defined(_M_ARM)
|
||||
#if defined(__arm__) || defined(_M_ARM)
|
||||
|
||||
// on ARM, do extra work to avoid byte/halfword reads/writes by
|
||||
// reading/writing a word at a time for as long as we can
|
||||
|
@ -256,13 +170,30 @@ finish:
|
|||
}
|
||||
}
|
||||
|
||||
#else /* generic code */
|
||||
#else
|
||||
|
||||
#ifdef MOZILLA_MAY_SUPPORT_SSE2
|
||||
namespace mozilla {
|
||||
namespace SSE2 {
|
||||
|
||||
void Convert_ascii_run(const char *&src, PRUnichar *&dst, PRInt32 len);
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void
|
||||
Convert_ascii_run (const char *&src,
|
||||
PRUnichar *&dst,
|
||||
PRInt32 len)
|
||||
{
|
||||
#ifdef MOZILLA_MAY_SUPPORT_SSE2
|
||||
if (mozilla::supports_sse2()) {
|
||||
mozilla::SSE2::Convert_ascii_run(src, dst, len);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
while (len-- > 0 && (*src & 0x80U) == 0) {
|
||||
*dst++ = (PRUnichar) *src++;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Mozilla Foundation code.
|
||||
*
|
||||
* The Initial Developer of the Original Code is the Mozilla Foundation.
|
||||
*
|
||||
* Portions created by the Initial Developer are Copyright (C) 2010
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
// This file should only be compiled if you're on x86 or x86_64. Additionally,
|
||||
// you'll need to compile this file with -msse2 if you're using gcc.
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include "nscore.h"
|
||||
|
||||
namespace mozilla {
|
||||
namespace SSE2 {
|
||||
|
||||
void
|
||||
Convert_ascii_run(const char *&src,
|
||||
PRUnichar *&dst,
|
||||
PRInt32 len)
|
||||
{
|
||||
if (len > 15) {
|
||||
__m128i in, out1, out2;
|
||||
__m128d *outp1, *outp2;
|
||||
__m128i zeroes;
|
||||
PRUint32 offset;
|
||||
|
||||
// align input to 16 bytes
|
||||
while ((NS_PTR_TO_UINT32(src) & 15) && len > 0) {
|
||||
if (*src & 0x80U)
|
||||
return;
|
||||
*dst++ = (PRUnichar) *src++;
|
||||
len--;
|
||||
}
|
||||
|
||||
zeroes = _mm_setzero_si128();
|
||||
|
||||
offset = NS_PTR_TO_UINT32(dst) & 15;
|
||||
|
||||
// Note: all these inner loops have to break, not return; we need
|
||||
// to let the single-char loop below catch any leftover
|
||||
// byte-at-a-time ASCII chars, since this function must consume
|
||||
// all available ASCII chars before it returns
|
||||
|
||||
if (offset == 0) {
|
||||
while (len > 15) {
|
||||
in = _mm_load_si128((__m128i *) src);
|
||||
if (_mm_movemask_epi8(in))
|
||||
break;
|
||||
out1 = _mm_unpacklo_epi8(in, zeroes);
|
||||
out2 = _mm_unpackhi_epi8(in, zeroes);
|
||||
_mm_stream_si128((__m128i *) dst, out1);
|
||||
_mm_stream_si128((__m128i *) (dst + 8), out2);
|
||||
dst += 16;
|
||||
src += 16;
|
||||
len -= 16;
|
||||
}
|
||||
} else if (offset == 8) {
|
||||
outp1 = (__m128d *) &out1;
|
||||
outp2 = (__m128d *) &out2;
|
||||
while (len > 15) {
|
||||
in = _mm_load_si128((__m128i *) src);
|
||||
if (_mm_movemask_epi8(in))
|
||||
break;
|
||||
out1 = _mm_unpacklo_epi8(in, zeroes);
|
||||
out2 = _mm_unpackhi_epi8(in, zeroes);
|
||||
_mm_storel_epi64((__m128i *) dst, out1);
|
||||
_mm_storel_epi64((__m128i *) (dst + 8), out2);
|
||||
_mm_storeh_pd((double *) (dst + 4), *outp1);
|
||||
_mm_storeh_pd((double *) (dst + 12), *outp2);
|
||||
src += 16;
|
||||
dst += 16;
|
||||
len -= 16;
|
||||
}
|
||||
} else {
|
||||
while (len > 15) {
|
||||
in = _mm_load_si128((__m128i *) src);
|
||||
if (_mm_movemask_epi8(in))
|
||||
break;
|
||||
out1 = _mm_unpacklo_epi8(in, zeroes);
|
||||
out2 = _mm_unpackhi_epi8(in, zeroes);
|
||||
_mm_storeu_si128((__m128i *) dst, out1);
|
||||
_mm_storeu_si128((__m128i *) (dst + 8), out2);
|
||||
src += 16;
|
||||
dst += 16;
|
||||
len -= 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// finish off a byte at a time
|
||||
|
||||
while (len-- > 0 && (*src & 0x80U) == 0) {
|
||||
*dst++ = (PRUnichar) *src++;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace SSE2
|
||||
} // namespace mozilla
|
Загрузка…
Ссылка в новой задаче