зеркало из https://github.com/github/ruby.git
* string.c (hash): replaced by MurmurHash described in
<http://murmurhash.googlepages.com/>. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15743 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
923a661a7a
Коммит
3e51715596
|
@ -1,3 +1,8 @@
|
|||
Tue Mar 11 10:19:10 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
|
||||
|
||||
* string.c (hash): replaced by MurmurHash described in
|
||||
<http://murmurhash.googlepages.com/>.
|
||||
|
||||
Tue Mar 11 09:52:49 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
|
||||
|
||||
* string.c (rb_str_comparable): empty strings in any encoding are
|
||||
|
|
141
string.c
141
string.c
|
@ -1685,122 +1685,41 @@ rb_str_concat(VALUE str1, VALUE str2)
|
|||
return rb_str_append(str1, str2);
|
||||
}
|
||||
|
||||
typedef unsigned int ub4; /* unsigned 4-byte quantities */
|
||||
typedef unsigned char ub1; /* unsigned 1-byte quantities */
|
||||
|
||||
#define hashsize(n) ((ub4)1<<(n))
|
||||
#define hashmask(n) (hashsize(n)-1)
|
||||
|
||||
/*
|
||||
--------------------------------------------------------------------
|
||||
mix -- mix 3 32-bit values reversibly.
|
||||
For every delta with one or two bits set, and the deltas of all three
|
||||
high bits or all three low bits, whether the original value of a,b,c
|
||||
is almost all zero or is uniformly distributed,
|
||||
* If mix() is run forward or backward, at least 32 bits in a,b,c
|
||||
have at least 1/4 probability of changing.
|
||||
* If mix() is run forward, every bit of c will change between 1/3 and
|
||||
2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
|
||||
mix() was built out of 36 single-cycle latency instructions in a
|
||||
structure that could supported 2x parallelism, like so:
|
||||
a -= b;
|
||||
a -= c; x = (c>>13);
|
||||
b -= c; a ^= x;
|
||||
b -= a; x = (a<<8);
|
||||
c -= a; b ^= x;
|
||||
c -= b; x = (b>>13);
|
||||
...
|
||||
Unfortunately, superscalar Pentiums and Sparcs can't take advantage
|
||||
of that parallelism. They've also turned some of those single-cycle
|
||||
latency instructions into multi-cycle latency instructions. Still,
|
||||
this is the fastest good hash I could find. There were about 2^^68
|
||||
to choose from. I only looked at a billion or so.
|
||||
--------------------------------------------------------------------
|
||||
*/
|
||||
#define mix(a,b,c) \
|
||||
{ \
|
||||
a -= b; a -= c; a ^= (c>>13); \
|
||||
b -= c; b -= a; b ^= (a<<8); \
|
||||
c -= a; c -= b; c ^= (b>>13); \
|
||||
a -= b; a -= c; a ^= (c>>12); \
|
||||
b -= c; b -= a; b ^= (a<<16); \
|
||||
c -= a; c -= b; c ^= (b>>5); \
|
||||
a -= b; a -= c; a ^= (c>>3); \
|
||||
b -= c; b -= a; b ^= (a<<10); \
|
||||
c -= a; c -= b; c ^= (b>>15); \
|
||||
}
|
||||
|
||||
/*
|
||||
--------------------------------------------------------------------
|
||||
hash() -- hash a variable-length key into a 32-bit value
|
||||
k : the key (the unaligned variable-length array of bytes)
|
||||
len : the length of the key, counting by bytes
|
||||
initval : can be any 4-byte value
|
||||
Returns a 32-bit value. Every bit of the key affects every bit of
|
||||
the return value. Every 1-bit and 2-bit delta achieves avalanche.
|
||||
About 6*len+35 instructions.
|
||||
|
||||
The best hash table sizes are powers of 2. There is no need to do
|
||||
mod a prime (mod is sooo slow!). If you need less than 32 bits,
|
||||
use a bitmask. For example, if you need only 10 bits, do
|
||||
h = (h & hashmask(10));
|
||||
In which case, the hash table should have hashsize(10) elements.
|
||||
|
||||
If you are hashing n strings (ub1 **)k, do it like this:
|
||||
for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
|
||||
|
||||
By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
|
||||
code any way you wish, private, educational, or commercial. It's free.
|
||||
|
||||
See http://burtleburtle.net/bob/hash/evahash.html
|
||||
Use for hash table lookup, or anything where one collision in 2^^32 is
|
||||
acceptable. Do NOT use for cryptographic purposes.
|
||||
--------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
static ub4
|
||||
hash(const ub1 *k, ub4 length, ub4 initval)
|
||||
/* k: the key */
|
||||
/* length: the length of the key */
|
||||
/* initval: the previous hash, or an arbitrary value */
|
||||
/* MurmurHash described in http://murmurhash.googlepages.com/ */
|
||||
unsigned int
|
||||
hash(const unsigned char * data, int len, unsigned int h)
|
||||
{
|
||||
register ub4 a,b,c,len;
|
||||
const unsigned int m = 0x7fd652ad;
|
||||
const int r = 16;
|
||||
|
||||
/* Set up the internal state */
|
||||
len = length;
|
||||
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
||||
c = initval; /* the previous hash value */
|
||||
h += 0xdeadbeef;
|
||||
|
||||
/*---------------------------------------- handle most of the key */
|
||||
while (len >= 12) {
|
||||
a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
|
||||
b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
|
||||
c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
|
||||
mix(a,b,c);
|
||||
k += 12; len -= 12;
|
||||
while(len >= 4) {
|
||||
h += *(unsigned int *)data;
|
||||
h *= m;
|
||||
h ^= h >> r;
|
||||
|
||||
data += 4;
|
||||
len -= 4;
|
||||
}
|
||||
|
||||
/*------------------------------------- handle the last 11 bytes */
|
||||
c += length;
|
||||
switch(len) /* all the case statements fall through */
|
||||
{
|
||||
case 11: c+=((ub4)k[10]<<24);
|
||||
case 10: c+=((ub4)k[9]<<16);
|
||||
case 9 : c+=((ub4)k[8]<<8);
|
||||
/* the first byte of c is reserved for the length */
|
||||
case 8 : b+=((ub4)k[7]<<24);
|
||||
case 7 : b+=((ub4)k[6]<<16);
|
||||
case 6 : b+=((ub4)k[5]<<8);
|
||||
case 5 : b+=k[4];
|
||||
case 4 : a+=((ub4)k[3]<<24);
|
||||
case 3 : a+=((ub4)k[2]<<16);
|
||||
case 2 : a+=((ub4)k[1]<<8);
|
||||
case 1 : a+=k[0];
|
||||
/* case 0: nothing left to add */
|
||||
}
|
||||
mix(a,b,c);
|
||||
/*-------------------------------------------- report the result */
|
||||
return c;
|
||||
switch(len) {
|
||||
case 3:
|
||||
h += data[2] << 16;
|
||||
case 2:
|
||||
h += data[1] << 8;
|
||||
case 1:
|
||||
h += data[0];
|
||||
h *= m;
|
||||
h ^= h >> r;
|
||||
};
|
||||
|
||||
h *= m;
|
||||
h ^= h >> 10;
|
||||
h *= m;
|
||||
h ^= h >> 17;
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
Загрузка…
Ссылка в новой задаче