Stop making a redundant hash copy in Hash#dup (#2489)

* Stop making a redundant hash copy in Hash#dup

It was making a copy of the hash without rehashing, then created an
extra copy of the hash to do the rehashing.  Since rehashing creates
a new copy already, this change just uses that rehashing to make
the copy.

[Bug #16121]

* Remove redundant Check_Type after to_hash

* Fix freeing and clearing destination hash in Hash#initialize_copy

The code was assuming the state of the destination hash based on the
source hash for clearing any existing table on it. If these don't match,
then that can cause the old table to be leaked. This can be seen by
compiling hash.c with `#define HASH_DEBUG 1` and running the following
script, which will crash from a debug assertion.

```ruby
h = 9.times.map { |i| [i, i] }.to_h
h.send(:initialize_copy, {})
```

* Remove dead code paths in rb_hash_initialize_copy

Given that `RHASH_ST_TABLE_P(h)` is defined as `(!RHASH_AR_TABLE_P(h))`
it shouldn't be possible for a hash to be neither of these, so there
is no need for the removed `else if` blocks.

* Share implementation between Hash#replace and Hash#initialize_copy

This also fixes key rehashing for small hashes backed by an array
table for Hash#replace.  This used to be done consistently in ruby
2.5.x, but stopped being done for small arrays in ruby 2.6.x.

This also bring optimization improvements that were done for
Hash#initialize_copy to Hash#replace.

* Add the Hash#dup benchmark
This commit is contained in:
Dylan Thacker-Smith 2019-10-21 04:29:21 -04:00 коммит произвёл Koichi Sasada
Родитель 8b8b9c1a1f
Коммит b970259044
2 изменённых файлов: 29 добавлений и 55 удалений

8
benchmark/hash_dup.yml Normal file
Просмотреть файл

@ -0,0 +1,8 @@
prelude: |
small_hash = { a: 1 }
larger_hash = 20.times.map { |i| [('a'.ord + i).chr.to_sym, i] }.to_h
benchmark:
dup_small: small_hash.dup
dup_larger: larger_hash.dup
loop_count: 10000

76
hash.c
Просмотреть файл

@ -2808,49 +2808,6 @@ rb_hash_aset(VALUE hash, VALUE key, VALUE val)
return val;
}
static int
replace_i(VALUE key, VALUE val, VALUE hash)
{
rb_hash_aset(hash, key, val);
return ST_CONTINUE;
}
/* :nodoc: */
static VALUE
rb_hash_initialize_copy(VALUE hash, VALUE hash2)
{
rb_hash_modify_check(hash);
hash2 = to_hash(hash2);
Check_Type(hash2, T_HASH);
if (hash == hash2) return hash;
if (RHASH_AR_TABLE_P(hash2)) {
if (RHASH_AR_TABLE_P(hash)) ar_free_and_clear_table(hash);
ar_copy(hash, hash2);
if (RHASH_AR_TABLE_SIZE(hash))
rb_hash_rehash(hash);
}
else if (RHASH_ST_TABLE_P(hash2)) {
if (RHASH_ST_TABLE_P(hash)) st_free_table(RHASH_ST_TABLE(hash));
RHASH_ST_TABLE_SET(hash, st_copy(RHASH_ST_TABLE(hash2)));
if (RHASH_ST_TABLE(hash)->num_entries)
rb_hash_rehash(hash);
}
else if (RHASH_AR_TABLE_P(hash)) {
ar_clear(hash);
}
else if (RHASH_ST_TABLE_P(hash)) {
st_clear(RHASH_ST_TABLE(hash));
}
COPY_DEFAULT(hash, hash2);
return hash;
}
/*
* call-seq:
* hsh.replace(other_hash) -> hsh
@ -2868,26 +2825,35 @@ rb_hash_replace(VALUE hash, VALUE hash2)
{
rb_hash_modify_check(hash);
if (hash == hash2) return hash;
if (RHASH_ITER_LEV(hash) > 0) {
rb_raise(rb_eRuntimeError, "can't replace hash during iteration");
}
hash2 = to_hash(hash2);
COPY_DEFAULT(hash, hash2);
rb_hash_clear(hash);
if (RHASH_AR_TABLE_P(hash)) {
if (RHASH_AR_TABLE_P(hash2)) {
ar_copy(hash, hash2);
}
else {
goto st_to_st;
}
if (RHASH_AR_TABLE_P(hash2)) {
ar_clear(hash);
}
else {
ar_free_and_clear_table(hash);
RHASH_ST_TABLE_SET(hash, st_init_table_with_size(RHASH_TYPE(hash2), RHASH_SIZE(hash2)));
}
}
else {
if (RHASH_AR_TABLE_P(hash2)) ar_force_convert_table(hash2, __FILE__, __LINE__);
st_to_st:
if (RHASH_AR_TABLE_P(hash2)) {
st_free_table(RHASH_ST_TABLE(hash));
RHASH_ST_CLEAR(hash);
}
else {
st_clear(RHASH_ST_TABLE(hash));
RHASH_TBL_RAW(hash)->type = RHASH_ST_TABLE(hash2)->type;
rb_hash_foreach(hash2, replace_i, hash);
}
}
rb_hash_foreach(hash2, rb_hash_rehash_i, (VALUE)hash);
rb_gc_writebarrier_remember(hash);
return hash;
}
@ -6143,7 +6109,7 @@ Init_Hash(void)
rb_define_singleton_method(rb_cHash, "[]", rb_hash_s_create, -1);
rb_define_singleton_method(rb_cHash, "try_convert", rb_hash_s_try_convert, 1);
rb_define_method(rb_cHash, "initialize", rb_hash_initialize, -1);
rb_define_method(rb_cHash, "initialize_copy", rb_hash_initialize_copy, 1);
rb_define_method(rb_cHash, "initialize_copy", rb_hash_replace, 1);
rb_define_method(rb_cHash, "rehash", rb_hash_rehash, 0);
rb_define_method(rb_cHash, "to_hash", rb_hash_to_hash, 0);