зеркало из https://github.com/github/ruby.git
refactor newhash (revision 58463 another try) [fix GH-1600]
* st.c (rb_hash_bulk_insert): new API to bulk insert entries into a hash. Given arguments are first inserted into the table at once, then reindexed. This is faster than inserting things using rb_hash_aset() one by one. This arrangement (rb_ prefixed function placed in st.c) is unavoidable because it both touches table internal and write barrier at once. * internal.h: delcare the new function. * hash.c (rb_hash_s_create): use the new function. * vm.c (core_hash_merge): ditto. * insns.def (newhash): ditto. * test/ruby/test_hash.rb: more coverage on hash creation. * test/ruby/test_literal.rb: ditto. ----------------------------------------------------------- benchmark results: minimum results in each 7 measurements. Execution time (sec) name before after loop_whileloop2 0.136 0.137 vm2_bighash* 1.249 0.623 Speedup ratio: compare with the result of `before' (greater is better) name after loop_whileloop2 0.996 vm2_bighash* 2.004 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58492 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
42c2df9937
Коммит
29ca20de2d
8
hash.c
8
hash.c
|
@ -650,7 +650,6 @@ static VALUE
|
|||
rb_hash_s_create(int argc, VALUE *argv, VALUE klass)
|
||||
{
|
||||
VALUE hash, tmp;
|
||||
int i;
|
||||
|
||||
if (argc == 1) {
|
||||
tmp = rb_hash_s_try_convert(Qnil, argv[0]);
|
||||
|
@ -704,12 +703,7 @@ rb_hash_s_create(int argc, VALUE *argv, VALUE klass)
|
|||
}
|
||||
|
||||
hash = hash_alloc(klass);
|
||||
if (argc > 0) {
|
||||
RHASH(hash)->ntbl = st_init_table_with_size(&objhash, argc / 2);
|
||||
}
|
||||
for (i=0; i<argc; i+=2) {
|
||||
rb_hash_aset(hash, argv[i], argv[i + 1]);
|
||||
}
|
||||
rb_hash_bulk_insert(argc, argv, hash);
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
|
|
@ -492,16 +492,12 @@ newhash
|
|||
(...)
|
||||
(VALUE val) // inc += 1 - num;
|
||||
{
|
||||
rb_num_t i;
|
||||
|
||||
RUBY_DTRACE_CREATE_HOOK(HASH, num);
|
||||
|
||||
val = rb_hash_new();
|
||||
|
||||
for (i = num; i > 0; i -= 2) {
|
||||
const VALUE v = TOPN(i - 2);
|
||||
const VALUE k = TOPN(i - 1);
|
||||
rb_hash_aset(val, k, v);
|
||||
if (num) {
|
||||
rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val);
|
||||
}
|
||||
POPN(num);
|
||||
}
|
||||
|
|
|
@ -1550,6 +1550,9 @@ extern int ruby_enable_coredump;
|
|||
int rb_get_next_signal(void);
|
||||
int rb_sigaltstack_size(void);
|
||||
|
||||
/* st.c */
|
||||
extern void rb_hash_bulk_insert(long, const VALUE *, VALUE);
|
||||
|
||||
/* strftime.c */
|
||||
#ifdef RUBY_ENCODING_H
|
||||
VALUE rb_strftime_timespec(const char *format, size_t format_len, rb_encoding *enc,
|
||||
|
|
165
st.c
165
st.c
|
@ -1955,3 +1955,168 @@ st_numhash(st_data_t n)
|
|||
enum {s1 = 11, s2 = 3};
|
||||
return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
|
||||
}
|
||||
|
||||
/* Expand TAB to be suitable for holding SIZ entries in total.
|
||||
Pre-existing entries remain not deleted inside of TAB, but its bins
|
||||
are cleared to expect future reconstruction. See rehash below. */
|
||||
static void
|
||||
st_expand_table(st_table *tab, st_index_t siz)
|
||||
{
|
||||
st_table *tmp;
|
||||
st_index_t n;
|
||||
|
||||
if (siz <= get_allocated_entries(tab))
|
||||
return; /* enough room already */
|
||||
|
||||
tmp = st_init_table_with_size(tab->type, siz);
|
||||
n = get_allocated_entries(tab);
|
||||
MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
|
||||
free(tab->entries);
|
||||
if (tab->bins != NULL)
|
||||
free(tab->bins);
|
||||
if (tmp->bins != NULL)
|
||||
free(tmp->bins);
|
||||
tab->entry_power = tmp->entry_power;
|
||||
tab->bin_power = tmp->bin_power;
|
||||
tab->size_ind = tmp->size_ind;
|
||||
tab->entries = tmp->entries;
|
||||
tab->bins = NULL;
|
||||
tab->rebuilds_num++;
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
/* Rehash using linear search. */
|
||||
static void
|
||||
st_rehash_linear(st_table *tab)
|
||||
{
|
||||
st_index_t i, j;
|
||||
st_table_entry *p, *q;
|
||||
if (tab->bins) {
|
||||
free(tab->bins);
|
||||
tab->bins = NULL;
|
||||
}
|
||||
for (i = tab->entries_start; i < tab->entries_bound; i++) {
|
||||
p = &tab->entries[i];
|
||||
if (DELETED_ENTRY_P(p))
|
||||
continue;
|
||||
for (j = i + 1; j < tab->entries_bound; j++) {
|
||||
q = &tab->entries[j];
|
||||
if (DELETED_ENTRY_P(q))
|
||||
continue;
|
||||
if (PTR_EQUAL(tab, p, q->hash, q->key)) {
|
||||
st_assert(p < q);
|
||||
*p = *q;
|
||||
MARK_ENTRY_DELETED(q);
|
||||
tab->num_entries--;
|
||||
update_range_for_deleted(tab, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Rehash using index */
|
||||
static void
|
||||
st_rehash_indexed(st_table *tab)
|
||||
{
|
||||
st_index_t i;
|
||||
st_index_t const n = bins_size(tab);
|
||||
unsigned int const size_ind = get_size_ind(tab);
|
||||
st_index_t *bins = realloc(tab->bins, n);
|
||||
st_assert(bins != NULL);
|
||||
tab->bins = bins;
|
||||
initialize_bins(tab);
|
||||
for (i = tab->entries_start; i < tab->entries_bound; i++) {
|
||||
st_table_entry *p = &tab->entries[i];
|
||||
st_index_t ind;
|
||||
#ifdef QUADRATIC_PROBE
|
||||
st_index_t d = 1;
|
||||
#else
|
||||
st_index_t peterb = p->hash;
|
||||
#endif
|
||||
|
||||
if (DELETED_ENTRY_P(p))
|
||||
continue;
|
||||
|
||||
ind = hash_bin(p->hash, tab);
|
||||
for(;;) {
|
||||
st_index_t bin = get_bin(bins, size_ind, ind);
|
||||
st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
|
||||
if (EMPTY_OR_DELETED_BIN_P(bin)) {
|
||||
/* ok, new room */
|
||||
set_bin(bins, size_ind, ind, i + ENTRY_BASE);
|
||||
break;
|
||||
}
|
||||
else if (PTR_EQUAL(tab, q, p->hash, p->key)) {
|
||||
/* duplicated key; delete it */
|
||||
st_assert(q < p);
|
||||
q->record = p->record;
|
||||
MARK_ENTRY_DELETED(p);
|
||||
tab->num_entries--;
|
||||
update_range_for_deleted(tab, bin);
|
||||
break;
|
||||
}
|
||||
else {
|
||||
/* hash collision; skip it */
|
||||
#ifdef QUADRATIC_PROBE
|
||||
ind = hash_bin(ind + d, tab);
|
||||
d++;
|
||||
#else
|
||||
ind = secondary_hash(ind, tab, &peterb);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Reconstruct TAB's bins according to TAB's entries. This function
|
||||
permits conflicting keys inside of entries. No errors are reported
|
||||
then. All but one of them are discarded silently. */
|
||||
static void
|
||||
st_rehash(st_table *tab)
|
||||
{
|
||||
if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
|
||||
st_rehash_linear(tab);
|
||||
else
|
||||
st_rehash_indexed(tab);
|
||||
}
|
||||
|
||||
#ifdef RUBY
|
||||
/* Mimics ruby's { foo => bar } syntax. This function is placed here
|
||||
because it touches table internals and write barriers at once. */
|
||||
void
|
||||
rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash)
|
||||
{
|
||||
int i;
|
||||
st_table *tab;
|
||||
|
||||
st_assert(argc % 2);
|
||||
if (! argc)
|
||||
return;
|
||||
if (! RHASH(hash)->ntbl)
|
||||
rb_hash_tbl_raw(hash);
|
||||
tab = RHASH(hash)->ntbl;
|
||||
|
||||
/* make room */
|
||||
st_expand_table(tab, tab->num_entries + argc);
|
||||
|
||||
/* push elems */
|
||||
for (i = 0; i < argc; /* */) {
|
||||
VALUE key = argv[i++];
|
||||
VALUE val = argv[i++];
|
||||
st_data_t k = (rb_obj_class(key) == rb_cString) ?
|
||||
rb_str_new_frozen(key) : key;
|
||||
st_table_entry e;
|
||||
e.hash = do_hash(k, tab);
|
||||
e.key = k;
|
||||
e.record = val;
|
||||
|
||||
tab->entries[tab->entries_bound++] = e;
|
||||
tab->num_entries++;
|
||||
RB_OBJ_WRITTEN(hash, Qundef, k);
|
||||
RB_OBJ_WRITTEN(hash, Qundef, val);
|
||||
}
|
||||
|
||||
/* reindex */
|
||||
st_rehash(tab);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -133,6 +133,50 @@ class TestHash < Test::Unit::TestCase
|
|||
assert_equal(100, h['a'])
|
||||
assert_equal(200, h['b'])
|
||||
assert_nil(h['c'])
|
||||
|
||||
h = @cls["a", 100, "b", 200]
|
||||
assert_equal(100, h['a'])
|
||||
assert_equal(200, h['b'])
|
||||
assert_nil(h['c'])
|
||||
|
||||
h = @cls[[["a", 100], ["b", 200]]]
|
||||
assert_equal(100, h['a'])
|
||||
assert_equal(200, h['b'])
|
||||
assert_nil(h['c'])
|
||||
|
||||
h = @cls[[["a", 100], ["b"], ["c", 300]]]
|
||||
assert_equal(100, h['a'])
|
||||
assert_equal(nil, h['b'])
|
||||
assert_equal(300, h['c'])
|
||||
|
||||
h = @cls[[["a", 100], "b", ["c", 300]]]
|
||||
assert_equal(100, h['a'])
|
||||
assert_equal(nil, h['b'])
|
||||
assert_equal(300, h['c'])
|
||||
end
|
||||
|
||||
def test_s_AREF_duplicated_key
|
||||
alist = [["a", 100], ["b", 200], ["a", 300], ["a", 400]]
|
||||
h = @cls[alist]
|
||||
assert_equal(2, h.size)
|
||||
assert_equal(400, h['a'])
|
||||
assert_equal(200, h['b'])
|
||||
assert_nil(h['c'])
|
||||
assert_equal(nil, h.key('300'))
|
||||
end
|
||||
|
||||
def test_s_AREF_frozen_key_id
|
||||
key = "a".freeze
|
||||
h = @cls[key, 100]
|
||||
assert_equal(100, h['a'])
|
||||
assert_same(key, *h.keys)
|
||||
end
|
||||
|
||||
def test_s_AREF_key_tampering
|
||||
key = "a".dup
|
||||
h = @cls[key, 100]
|
||||
key.upcase!
|
||||
assert_equal(100, h['a'])
|
||||
end
|
||||
|
||||
def test_s_new
|
||||
|
@ -145,7 +189,6 @@ class TestHash < Test::Unit::TestCase
|
|||
assert_instance_of(@cls, h)
|
||||
assert_equal('default', h.default)
|
||||
assert_equal('default', h['spurious'])
|
||||
|
||||
end
|
||||
|
||||
def test_try_convert
|
||||
|
|
|
@ -394,6 +394,35 @@ class TestRubyLiteral < Test::Unit::TestCase
|
|||
end;
|
||||
end
|
||||
|
||||
def test_hash_duplicated_key
|
||||
h = EnvUtil.suppress_warning do
|
||||
eval <<~end
|
||||
# This is a syntax that renders warning at very early stage.
|
||||
# eval used to delay warning, to be suppressible by EnvUtil.
|
||||
{"a" => 100, "b" => 200, "a" => 300, "a" => 400}
|
||||
end
|
||||
end
|
||||
assert_equal(2, h.size)
|
||||
assert_equal(400, h['a'])
|
||||
assert_equal(200, h['b'])
|
||||
assert_nil(h['c'])
|
||||
assert_equal(nil, h.key('300'))
|
||||
end
|
||||
|
||||
def test_hash_frozen_key_id
|
||||
key = "a".freeze
|
||||
h = {key => 100}
|
||||
assert_equal(100, h['a'])
|
||||
assert_same(key, *h.keys)
|
||||
end
|
||||
|
||||
def test_hash_key_tampering
|
||||
key = "a"
|
||||
h = {key => 100}
|
||||
key.upcase!
|
||||
assert_equal(100, h['a'])
|
||||
end
|
||||
|
||||
def test_range
|
||||
assert_instance_of Range, (1..2)
|
||||
assert_equal(1..2, 1..2)
|
||||
|
|
6
vm.c
6
vm.c
|
@ -2653,13 +2653,9 @@ static VALUE core_hash_merge_kwd(int argc, VALUE *argv);
|
|||
static VALUE
|
||||
core_hash_merge(VALUE hash, long argc, const VALUE *argv)
|
||||
{
|
||||
long i;
|
||||
|
||||
Check_Type(hash, T_HASH);
|
||||
VM_ASSERT(argc % 2 == 0);
|
||||
for (i=0; i<argc; i+=2) {
|
||||
rb_hash_aset(hash, argv[i], argv[i+1]);
|
||||
}
|
||||
rb_hash_bulk_insert(argc, argv, hash);
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче