st.c: retry operations if rebuilt

Calling the .eql? and .hash methods during a Hash operation can
result in a thread switch or a signal handler to run: allowing
one execution context to rebuild the hash table while another is
still reading or writing the table.  This results in a
use-after-free bug affecting the thread_safe-0.3.6 test suite
and likely other bugs.

This bug did not affect users of commonly keys (String, Symbol,
Fixnum) as those are optimized to avoid method dispatch
for .eql? and .hash methods.

A separate version of this change needs to be ported to Ruby 2.3.x
which had a different implementation of st.c but was affected
by the same bug.

* st.c: Add comment about table rebuilding during comparison.
  (DO_PTR_EQUAL_CHECK): New macro.
  (REBUILT_TABLE_ENTRY_IND, REBUILT_TABLE_BIN_IND): New macros.
  (find_entry, find_table_entry_ind, find_table_bin_ind): Use new
  macros.  Return the rebuild flag.
  (find_table_bin_ptr_and_reserve): Ditto.
  (st_lookup, st_get_key, st_insert, st_insert2): Retry the
  operation if the table was rebuilt.
  (st_general_delete, st_shift, st_update, st_general_foreach):
  Ditto.
  (st_rehash_linear, st_rehash_indexed): Use DO_PTR_EQUAL_CHECK.
  Return the rebuild flag.
  (st_rehash): Retry the operation if the table was rebuilt.
  [ruby-core:85510] [Ruby trunk Bug#14357]

Thanks to Vit Ondruch for reporting the bug.

From: Vladimir Makarov <vmakarov@redhat.com>

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62396 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
normal 2018-02-13 10:02:07 +00:00
Родитель 248ff4f1b1
Коммит 4663c224fa
1 изменённых файлов: 184 добавлений и 72 удалений

256
st.c
Просмотреть файл

@ -90,6 +90,11 @@
o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
bins depending on the current hash table size.
o The implementation takes into account that the table can be
rebuilt during hashing or comparison functions. It can happen if
the functions are implemented in Ruby and a thread switch occurs
during their execution.
This implementation speeds up the Ruby hash table benchmarks in
average by more 40% on Intel Haswell CPU.
@ -174,6 +179,15 @@ static const struct st_hash_type type_strcasehash = {
#define PTR_EQUAL(tab, ptr, hash_val, key_) \
((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
/* As PRT_EQUAL only its result is returned in RES. REBUILT_P is set
up to TRUE if the table is rebuilt during the comparison. */
#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
do { \
unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
res = PTR_EQUAL(tab, ptr, hash_val, key); \
rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
} while (FALSE)
/* Features of a table. */
struct st_features {
/* Power of 2 used for number of allocated entries. */
@ -380,6 +394,11 @@ set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
#define UNDEFINED_BIN_IND (~(st_index_t) 0)
/* Entry and bin values returned when we found a table rebuild during
the search. */
#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
/* Mark I-th bin of table TAB as corresponding to a deleted table
entry. Update number of entries in the table and number of bins
corresponding to deleted entries. */
@ -823,17 +842,22 @@ secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb)
/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
search. Return the index of the found entry in array `entries`.
If it is not found, return UNDEFINED_ENTRY_IND. */
If it is not found, return UNDEFINED_ENTRY_IND. If the table was
rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
static inline st_index_t
find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
{
int eq_p, rebuilt_p;
st_index_t i, bound;
st_table_entry *entries;
bound = tab->entries_bound;
entries = tab->entries;
for (i = tab->entries_start; i < bound; i++) {
if (PTR_EQUAL(tab, &entries[i], hash_value, key))
DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
if (EXPECT(rebuilt_p, 0))
return REBUILT_TABLE_ENTRY_IND;
if (eq_p)
return i;
}
return UNDEFINED_ENTRY_IND;
@ -845,10 +869,12 @@ find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
/*#define QUADRATIC_PROBE*/
/* Return index of entry with HASH_VALUE and KEY in table TAB. If
there is no such entry, return UNDEFINED_ENTRY_IND. */
there is no such entry, return UNDEFINED_ENTRY_IND. If the table
was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
static st_index_t
find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
{
int eq_p, rebuilt_p;
st_index_t ind;
#ifdef QUADRATIC_PROBE
st_index_t d;
@ -869,10 +895,13 @@ find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
FOUND_BIN;
for (;;) {
bin = get_bin(tab->bins, get_size_ind(tab), ind);
if (! EMPTY_OR_DELETED_BIN_P(bin)
&& PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key))
break;
else if (EMPTY_BIN_P(bin))
if (! EMPTY_OR_DELETED_BIN_P(bin)) {
DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
if (EXPECT(rebuilt_p, 0))
return REBUILT_TABLE_ENTRY_IND;
if (eq_p)
break;
} else if (EMPTY_BIN_P(bin))
return UNDEFINED_ENTRY_IND;
#ifdef QUADRATIC_PROBE
ind = hash_bin(ind + d, tab);
@ -887,10 +916,12 @@ find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
/* Find and return index of table TAB bin corresponding to an entry
with HASH_VALUE and KEY. If there is no such bin, return
UNDEFINED_BIN_IND. */
UNDEFINED_BIN_IND. If the table was rebuilt during the search,
return REBUILT_TABLE_BIN_IND. */
static st_index_t
find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
{
int eq_p, rebuilt_p;
st_index_t ind;
#ifdef QUADRATIC_PROBE
st_index_t d;
@ -911,10 +942,13 @@ find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
FOUND_BIN;
for (;;) {
bin = get_bin(tab->bins, get_size_ind(tab), ind);
if (! EMPTY_OR_DELETED_BIN_P(bin)
&& PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key))
break;
else if (EMPTY_BIN_P(bin))
if (! EMPTY_OR_DELETED_BIN_P(bin)) {
DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
if (EXPECT(rebuilt_p, 0))
return REBUILT_TABLE_BIN_IND;
if (eq_p)
break;
} else if (EMPTY_BIN_P(bin))
return UNDEFINED_BIN_IND;
#ifdef QUADRATIC_PROBE
ind = hash_bin(ind + d, tab);
@ -955,7 +989,7 @@ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
bin = get_bin(tab->bins, get_size_ind(tab), ind);
if (EMPTY_OR_DELETED_BIN_P(bin))
return ind;
st_assert (! PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key));
st_assert (entries[bin - ENTRY_BASE].hash != hash_value);
#ifdef QUADRATIC_PROBE
ind = hash_bin(ind + d, tab);
d++;
@ -973,11 +1007,13 @@ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
bigger entries array. Although we can reuse a deleted bin, the
result bin value is always empty if the table has no entry with
KEY. Return the entries array index of the found entry or
UNDEFINED_ENTRY_IND if it is not found. */
UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
during the search, return REBUILT_TABLE_ENTRY_IND. */
static st_index_t
find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
st_data_t key, st_index_t *bin_ind)
{
int eq_p, rebuilt_p;
st_index_t ind;
st_hash_t curr_hash_value = *hash_value;
#ifdef QUADRATIC_PROBE
@ -1015,7 +1051,10 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
break;
}
else if (! DELETED_BIN_P(entry_index)) {
if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
if (EXPECT(rebuilt_p, 0))
return REBUILT_TABLE_ENTRY_IND;
if (eq_p)
break;
}
else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
@ -1040,13 +1079,18 @@ st_lookup(st_table *tab, st_data_t key, st_data_t *value)
st_index_t bin;
st_hash_t hash = do_hash(key, tab);
retry:
if (tab->bins == NULL) {
bin = find_entry(tab, hash, key);
if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
goto retry;
if (bin == UNDEFINED_ENTRY_IND)
return 0;
}
else {
bin = find_table_entry_ind(tab, hash, key);
if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
goto retry;
if (bin == UNDEFINED_ENTRY_IND)
return 0;
bin -= ENTRY_BASE;
@ -1064,13 +1108,18 @@ st_get_key(st_table *tab, st_data_t key, st_data_t *result)
st_index_t bin;
st_hash_t hash = do_hash(key, tab);
retry:
if (tab->bins == NULL) {
bin = find_entry(tab, hash, key);
if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
goto retry;
if (bin == UNDEFINED_ENTRY_IND)
return 0;
}
else {
bin = find_table_entry_ind(tab, hash, key);
if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
goto retry;
if (bin == UNDEFINED_ENTRY_IND)
return 0;
bin -= ENTRY_BASE;
@ -1104,10 +1153,13 @@ st_insert(st_table *tab, st_data_t key, st_data_t value)
st_index_t bin_ind;
int new_p;
rebuild_table_if_necessary(tab);
hash_value = do_hash(key, tab);
retry:
rebuild_table_if_necessary(tab);
if (tab->bins == NULL) {
bin = find_entry(tab, hash_value, key);
if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
goto retry;
new_p = bin == UNDEFINED_ENTRY_IND;
if (new_p)
tab->num_entries++;
@ -1116,6 +1168,8 @@ st_insert(st_table *tab, st_data_t key, st_data_t value)
else {
bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
key, &bin_ind);
if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
goto retry;
new_p = bin == UNDEFINED_ENTRY_IND;
bin -= ENTRY_BASE;
}
@ -1192,10 +1246,13 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value,
st_index_t bin_ind;
int new_p;
rebuild_table_if_necessary (tab);
hash_value = do_hash(key, tab);
retry:
rebuild_table_if_necessary (tab);
if (tab->bins == NULL) {
bin = find_entry(tab, hash_value, key);
if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
goto retry;
new_p = bin == UNDEFINED_ENTRY_IND;
if (new_p)
tab->num_entries++;
@ -1204,6 +1261,8 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value,
else {
bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
key, &bin_ind);
if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
goto retry;
new_p = bin == UNDEFINED_ENTRY_IND;
bin -= ENTRY_BASE;
}
@ -1212,7 +1271,6 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value,
check = tab->rebuilds_num;
key = (*func)(key);
st_assert(check == tab->rebuilds_num);
st_assert(do_hash(key, tab) == hash_value);
ind = tab->entries_bound++;
entry = &tab->entries[ind];
entry->hash = hash_value;
@ -1220,6 +1278,7 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value,
entry->record = value;
if (bin_ind != UNDEFINED_BIN_IND)
set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
st_assert(do_hash(key, tab) == hash_value);
#ifdef ST_DEBUG
st_check(tab);
#endif
@ -1281,8 +1340,11 @@ st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
st_assert(tab != NULL);
hash = do_hash(*key, tab);
retry:
if (tab->bins == NULL) {
bin = find_entry(tab, hash, *key);
if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
goto retry;
if (bin == UNDEFINED_ENTRY_IND) {
if (value != 0) *value = 0;
return 0;
@ -1290,6 +1352,8 @@ st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
}
else {
bin_ind = find_table_bin_ind(tab, hash, *key);
if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
goto retry;
if (bin_ind == UNDEFINED_BIN_IND) {
if (value != 0) *value = 0;
return 0;
@ -1344,21 +1408,33 @@ st_shift(st_table *tab, st_data_t *key, st_data_t *value)
for (i = tab->entries_start; i < bound; i++) {
curr_entry_ptr = &entries[i];
if (! DELETED_ENTRY_P(curr_entry_ptr)) {
st_hash_t entry_hash = curr_entry_ptr->hash;
st_data_t entry_key = curr_entry_ptr->key;
if (value != 0) *value = curr_entry_ptr->record;
*key = curr_entry_ptr->key;
*key = entry_key;
retry:
if (tab->bins == NULL) {
bin = find_entry(tab, curr_entry_ptr->hash, curr_entry_ptr->key);
bin = find_entry(tab, entry_hash, entry_key);
if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
entries = tab->entries;
goto retry;
}
st_assert(bin != UNDEFINED_ENTRY_IND);
st_assert(&entries[bin] == curr_entry_ptr);
curr_entry_ptr = &entries[bin];
}
else {
bin_ind = find_table_bin_ind(tab, curr_entry_ptr->hash,
curr_entry_ptr->key);
bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
entries = tab->entries;
goto retry;
}
st_assert(bin_ind != UNDEFINED_BIN_IND);
st_assert(&entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
- ENTRY_BASE] == curr_entry_ptr);
curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
- ENTRY_BASE];
MARK_BIN_DELETED(tab, bin_ind);
}
st_assert(entry_hash != curr_entry_ptr->hash && entry_key == curr_entry_ptr->key);
MARK_ENTRY_DELETED(curr_entry_ptr);
tab->num_entries--;
update_range_for_deleted(tab, i);
@ -1402,15 +1478,20 @@ st_update(st_table *tab, st_data_t key,
int retval, existing;
st_hash_t hash = do_hash(key, tab);
retry:
entries = tab->entries;
if (tab->bins == NULL) {
bin = find_entry(tab, hash, key);
if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
goto retry;
existing = bin != UNDEFINED_ENTRY_IND;
entry = &entries[bin];
bin_ind = UNDEFINED_BIN_IND;
}
else {
bin_ind = find_table_bin_ind(tab, hash, key);
if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
goto retry;
existing = bin_ind != UNDEFINED_BIN_IND;
if (existing) {
bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
@ -1489,14 +1570,19 @@ st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg,
hash = curr_entry_ptr->hash;
retval = (*func)(key, curr_entry_ptr->record, arg, 0);
if (rebuilds_num != tab->rebuilds_num) {
retry:
entries = tab->entries;
packed_p = tab->bins == NULL;
if (packed_p) {
i = find_entry(tab, hash, key);
if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
goto retry;
error_p = i == UNDEFINED_ENTRY_IND;
}
else {
i = find_table_entry_ind(tab, hash, key);
if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
goto retry;
error_p = i == UNDEFINED_ENTRY_IND;
i -= ENTRY_BASE;
}
@ -1512,36 +1598,44 @@ st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg,
}
switch (retval) {
case ST_CONTINUE:
break;
break;
case ST_CHECK:
if (check_p)
break;
if (check_p)
break;
case ST_STOP:
#ifdef ST_DEBUG
st_check(tab);
st_check(tab);
#endif
return 0;
case ST_DELETE:
if (packed_p) {
bin = find_entry(tab, hash, curr_entry_ptr->key);
if (bin == UNDEFINED_ENTRY_IND)
break;
}
else {
bin_ind = find_table_bin_ind(tab, hash, curr_entry_ptr->key);
if (bin_ind == UNDEFINED_BIN_IND)
break;
bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
MARK_BIN_DELETED(tab, bin_ind);
}
st_assert(&entries[bin] == curr_entry_ptr);
MARK_ENTRY_DELETED(curr_entry_ptr);
tab->num_entries--;
update_range_for_deleted(tab, bin);
return 0;
case ST_DELETE: {
st_data_t key = curr_entry_ptr->key;
again:
if (packed_p) {
bin = find_entry(tab, hash, key);
if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
goto again;
if (bin == UNDEFINED_ENTRY_IND)
break;
}
else {
bin_ind = find_table_bin_ind(tab, hash, key);
if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
goto again;
if (bin_ind == UNDEFINED_BIN_IND)
break;
bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
MARK_BIN_DELETED(tab, bin_ind);
}
curr_entry_ptr = &entries[bin];
MARK_ENTRY_DELETED(curr_entry_ptr);
tab->num_entries--;
update_range_for_deleted(tab, bin);
#ifdef ST_DEBUG
st_check(tab);
st_check(tab);
#endif
break;
break;
}
}
}
#ifdef ST_DEBUG
@ -2021,10 +2115,12 @@ st_expand_table(st_table *tab, st_index_t siz)
free(tmp);
}
/* Rehash using linear search. */
static void
/* Rehash using linear search. Return TRUE if we found that the table
was rebuilt. */
static int
st_rehash_linear(st_table *tab)
{
int eq_p, rebuilt_p;
st_index_t i, j;
st_table_entry *p, *q;
if (tab->bins) {
@ -2039,7 +2135,10 @@ st_rehash_linear(st_table *tab)
q = &tab->entries[j];
if (DELETED_ENTRY_P(q))
continue;
if (PTR_EQUAL(tab, p, q->hash, q->key)) {
DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
if (EXPECT(rebuilt_p, 0))
return TRUE;
if (eq_p) {
st_assert(p < q);
*p = *q;
MARK_ENTRY_DELETED(q);
@ -2048,12 +2147,15 @@ st_rehash_linear(st_table *tab)
}
}
}
return FALSE;
}
/* Rehash using index */
static void
/* Rehash using index. Return TRUE if we found that the table was
rebuilt. */
static int
st_rehash_indexed(st_table *tab)
{
int eq_p, rebuilt_p;
st_index_t i;
st_index_t const n = bins_size(tab);
unsigned int const size_ind = get_size_ind(tab);
@ -2082,26 +2184,32 @@ st_rehash_indexed(st_table *tab)
set_bin(bins, size_ind, ind, i + ENTRY_BASE);
break;
}
else if (PTR_EQUAL(tab, q, p->hash, p->key)) {
/* duplicated key; delete it */
st_assert(q < p);
q->record = p->record;
MARK_ENTRY_DELETED(p);
tab->num_entries--;
update_range_for_deleted(tab, bin);
break;
}
else {
/* hash collision; skip it */
DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
if (EXPECT(rebuilt_p, 0))
return TRUE;
if (eq_p) {
/* duplicated key; delete it */
st_assert(q < p);
q->record = p->record;
MARK_ENTRY_DELETED(p);
tab->num_entries--;
update_range_for_deleted(tab, bin);
break;
}
else {
/* hash collision; skip it */
#ifdef QUADRATIC_PROBE
ind = hash_bin(ind + d, tab);
d++;
ind = hash_bin(ind + d, tab);
d++;
#else
ind = secondary_hash(ind, tab, &peterb);
ind = secondary_hash(ind, tab, &peterb);
#endif
}
}
}
}
}
return FALSE;
}
/* Reconstruct TAB's bins according to TAB's entries. This function
@ -2110,10 +2218,14 @@ st_rehash_indexed(st_table *tab)
static void
st_rehash(st_table *tab)
{
if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
st_rehash_linear(tab);
else
st_rehash_indexed(tab);
int rebuilt_p;
do {
if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
rebuilt_p = st_rehash_linear(tab);
else
rebuilt_p = st_rehash_indexed(tab);
} while (rebuilt_p);
}
#ifdef RUBY