diff --git a/ChangeLog b/ChangeLog index 267b3433c0..9312462438 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +Tue May 3 22:10:09 2016 NARUSE, Yui + + * string.c (count_utf8_lead_bytes_with_word): Use __builtin_popcount + only if it can use SSE 4.2 POPCNT whose latency is 3 cycle. + + * internal.h (rb_popcount64): use __builtin_popcountll because now + it is in fast path. + Tue May 3 14:19:18 2016 Nobuyoshi Nakada * parse.y (new_if_gen): set newline flag to NODE_IF to trace all diff --git a/configure.in b/configure.in index 14711a6837..ad8cc988c4 100644 --- a/configure.in +++ b/configure.in @@ -2429,6 +2429,8 @@ fi]) RUBY_CHECK_BUILTIN_FUNC(__builtin_bswap16, [__builtin_bswap16(0)]) RUBY_CHECK_BUILTIN_FUNC(__builtin_bswap32, [__builtin_bswap32(0)]) RUBY_CHECK_BUILTIN_FUNC(__builtin_bswap64, [__builtin_bswap64(0)]) +RUBY_CHECK_BUILTIN_FUNC(__builtin_popcount, [__builtin_popcount(0)]) +RUBY_CHECK_BUILTIN_FUNC(__builtin_popcountll, [__builtin_popcountll(0)]) RUBY_CHECK_BUILTIN_FUNC(__builtin_clz, [__builtin_clz(0)]) RUBY_CHECK_BUILTIN_FUNC(__builtin_clzl, [__builtin_clzl(0)]) RUBY_CHECK_BUILTIN_FUNC(__builtin_clzll, [__builtin_clzll(0)]) diff --git a/internal.h b/internal.h index 0bac79e012..765758a962 100644 --- a/internal.h +++ b/internal.h @@ -271,21 +271,38 @@ nlz_intptr(uintptr_t x) { static inline int rb_popcount32(uint32_t x) { +#ifdef HAVE_BUILTIN___BUILTIN_POPCOUNT + return __builtin_popcount(x); +#else x = (x & 0x55555555) + (x >> 1 & 0x55555555); x = (x & 0x33333333) + (x >> 2 & 0x33333333); x = (x & 0x0f0f0f0f) + (x >> 4 & 0x0f0f0f0f); x = (x & 0x001f001f) + (x >> 8 & 0x001f001f); return (x & 0x0000003f) + (x >>16 & 0x0000003f); +#endif } static inline int rb_popcount64(uint64_t x) { +#ifdef HAVE_BUILTIN___BUILTIN_POPCOUNT + return __builtin_popcountll(x); +#else x = (x & 0x5555555555555555) + (x >> 1 & 0x5555555555555555); x = (x & 0x3333333333333333) + (x >> 2 & 0x3333333333333333); x = (x & 0x0707070707070707) + (x >> 4 & 0x0707070707070707); x = (x & 0x001f001f001f001f) + (x >> 8 & 0x001f001f001f001f); x = (x & 0x0000003f0000003f) + (x >>16 & 0x0000003f0000003f); return (x & 0x7f) + (x >>32 & 0x7f); +#endif +} + +static inline int +rb_popcount_intptr(uintptr_t x) { +#if SIZEOF_VOIDP == 8 + return rb_popcount64(x); +#elif SIZEOF_VOIDP == 4 + return rb_popcount32(x); +#endif } static inline int diff --git a/string.c b/string.c index 35e53f7306..9ccf0933e2 100644 --- a/string.c +++ b/string.c @@ -1476,17 +1476,21 @@ count_utf8_lead_bytes_with_word(const uintptr_t *s) uintptr_t d = *s; /* Transform so that bit0 indicates whether we have a UTF-8 leading byte or not. */ - d |= ~(d>>1); - d >>= 6; + d = (d>>6) | (~d>>7); d &= NONASCII_MASK >> 7; /* Gather all bytes. */ +#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__) + /* use only if it can use POPCNT */ + return rb_popcount_intptr(d); +#else d += (d>>8); d += (d>>16); -#if SIZEOF_VOIDP == 8 +# if SIZEOF_VOIDP == 8 d += (d>>32); -#endif +# endif return (d&0xF); +#endif } #endif