* string.c (count_utf8_lead_bytes_with_word): Use __builtin_popcount

only if it can use SSE 4.2 POPCNT whose latency is 3 cycle.

* internal.h (rb_popcount64): use __builtin_popcountll because now
  it is in fast path.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@54894 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2016-05-03 13:14:30 +00:00
Родитель e0611820df
Коммит cdef0bc833
4 изменённых файлов: 35 добавлений и 4 удалений

Просмотреть файл

@ -1,3 +1,11 @@
Tue May 3 22:10:09 2016 NARUSE, Yui <naruse@ruby-lang.org>
* string.c (count_utf8_lead_bytes_with_word): Use __builtin_popcount
only if it can use SSE 4.2 POPCNT whose latency is 3 cycle.
* internal.h (rb_popcount64): use __builtin_popcountll because now
it is in fast path.
Tue May 3 14:19:18 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
* parse.y (new_if_gen): set newline flag to NODE_IF to trace all

Просмотреть файл

@ -2429,6 +2429,8 @@ fi])
RUBY_CHECK_BUILTIN_FUNC(__builtin_bswap16, [__builtin_bswap16(0)])
RUBY_CHECK_BUILTIN_FUNC(__builtin_bswap32, [__builtin_bswap32(0)])
RUBY_CHECK_BUILTIN_FUNC(__builtin_bswap64, [__builtin_bswap64(0)])
RUBY_CHECK_BUILTIN_FUNC(__builtin_popcount, [__builtin_popcount(0)])
RUBY_CHECK_BUILTIN_FUNC(__builtin_popcountll, [__builtin_popcountll(0)])
RUBY_CHECK_BUILTIN_FUNC(__builtin_clz, [__builtin_clz(0)])
RUBY_CHECK_BUILTIN_FUNC(__builtin_clzl, [__builtin_clzl(0)])
RUBY_CHECK_BUILTIN_FUNC(__builtin_clzll, [__builtin_clzll(0)])

Просмотреть файл

@ -271,21 +271,38 @@ nlz_intptr(uintptr_t x) {
static inline int
rb_popcount32(uint32_t x) {
#ifdef HAVE_BUILTIN___BUILTIN_POPCOUNT
return __builtin_popcount(x);
#else
x = (x & 0x55555555) + (x >> 1 & 0x55555555);
x = (x & 0x33333333) + (x >> 2 & 0x33333333);
x = (x & 0x0f0f0f0f) + (x >> 4 & 0x0f0f0f0f);
x = (x & 0x001f001f) + (x >> 8 & 0x001f001f);
return (x & 0x0000003f) + (x >>16 & 0x0000003f);
#endif
}
static inline int
rb_popcount64(uint64_t x) {
#ifdef HAVE_BUILTIN___BUILTIN_POPCOUNT
return __builtin_popcountll(x);
#else
x = (x & 0x5555555555555555) + (x >> 1 & 0x5555555555555555);
x = (x & 0x3333333333333333) + (x >> 2 & 0x3333333333333333);
x = (x & 0x0707070707070707) + (x >> 4 & 0x0707070707070707);
x = (x & 0x001f001f001f001f) + (x >> 8 & 0x001f001f001f001f);
x = (x & 0x0000003f0000003f) + (x >>16 & 0x0000003f0000003f);
return (x & 0x7f) + (x >>32 & 0x7f);
#endif
}
static inline int
rb_popcount_intptr(uintptr_t x) {
#if SIZEOF_VOIDP == 8
return rb_popcount64(x);
#elif SIZEOF_VOIDP == 4
return rb_popcount32(x);
#endif
}
static inline int

Просмотреть файл

@ -1476,17 +1476,21 @@ count_utf8_lead_bytes_with_word(const uintptr_t *s)
uintptr_t d = *s;
/* Transform so that bit0 indicates whether we have a UTF-8 leading byte or not. */
d |= ~(d>>1);
d >>= 6;
d = (d>>6) | (~d>>7);
d &= NONASCII_MASK >> 7;
/* Gather all bytes. */
#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
/* use only if it can use POPCNT */
return rb_popcount_intptr(d);
#else
d += (d>>8);
d += (d>>16);
#if SIZEOF_VOIDP == 8
# if SIZEOF_VOIDP == 8
d += (d>>32);
#endif
# endif
return (d&0xF);
#endif
}
#endif