ebpf-for-windows/libs/platform/ebpf_hash_table.c

671 строка
20 KiB
C

// Copyright (c) Microsoft Corporation
// SPDX-License-Identifier: MIT
#include "ebpf_platform.h"
// Buckets contain an array of pointers to value and keys.
// Buckets are immutable once inserted in to the hash-table and replaced when
// modified.
// Layout is:
// ebpf_hash_table_t.buckets->ebpf_hash_bucket_header_t.entries->data
// Keys are stored contiguously in ebpf_hash_bucket_header_t for fast
// searching, data is stored separately to prevent read-copy-update semantics
// from causing loss of updates.
typedef struct _ebpf_hash_bucket_entry
{
uint8_t* data;
uint8_t key[1];
} ebpf_hash_bucket_entry_t;
typedef struct _ebpf_hash_bucket_header
{
size_t count;
_Field_size_(count) ebpf_hash_bucket_entry_t entries[1];
} ebpf_hash_bucket_header_t;
struct _ebpf_hash_table
{
uint32_t bucket_count;
volatile int32_t entry_count;
uint32_t seed;
size_t key_size;
size_t value_size;
void* (*allocate)(size_t size);
void (*free)(void* memory);
void (*extract)(_In_ const uint8_t* value, _Outptr_ const uint8_t** data, _Out_ size_t* num);
_Field_size_(bucket_count) ebpf_hash_bucket_header_t* volatile buckets[1];
};
typedef enum _ebpf_hash_bucket_operation
{
EBPF_HASH_BUCKET_OPERATION_INSERT_OR_UPDATE,
EBPF_HASH_BUCKET_OPERATION_INSERT,
EBPF_HASH_BUCKET_OPERATION_UPDATE,
EBPF_HASH_BUCKET_OPERATION_DELETE,
} ebpf_hash_bucket_operation_t;
/**
* @brief Perform a rotate left on a value.
*
* @param[in] value Value to be rotated.
* @param[in] count Count of bits to rotate.
* @return Rotated value.
*/
static inline unsigned long
_ebpf_rol(uint32_t value, size_t count)
{
return (value << count) | (value >> (32 - count));
}
// Ported from https://github.com/aappleby/smhasher
// Quote from https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L2
// "MurmurHash3 was written by Austin Appleby, and is placed in the public domain."
/**
* @brief An implementation of the murmur3_32 hash function. This is a high
* performance non-cryptographic hash function.
*
* @param[in] key Pointer to key to hash.
* @param[in] length Length of key to hash.
* @param[in] seed Seed to randomize hash.
* @return Hash of key.
*/
unsigned long
_ebpf_murmur3_32(_In_ const uint8_t* key, size_t length_in_bits, uint32_t seed)
{
uint32_t c1 = 0xcc9e2d51;
uint32_t c2 = 0x1b873593;
uint32_t r1 = 15;
uint32_t r2 = 13;
uint32_t m = 5;
uint32_t n = 0xe6546b64;
uint32_t hash = seed;
uint32_t length_in_bytes = ((uint32_t)length_in_bits / 8);
uint32_t remaining_bits = length_in_bits % 8;
for (size_t index = 0; (length_in_bytes - index) > 3; index += 4) {
uint32_t k = *(uint32_t*)(key + index);
k *= c1;
k = _ebpf_rol(k, r1);
k *= c2;
hash ^= k;
hash = _ebpf_rol(hash, r2);
hash *= m;
hash += n;
}
unsigned long remainder = 0;
for (size_t index = length_in_bytes & (~3); index < length_in_bytes; index++) {
remainder <<= 8;
remainder |= key[index];
}
if (remaining_bits) {
uint8_t bits = key[length_in_bytes];
bits >>= (8 - remaining_bits);
remainder <<= 8;
remainder |= bits;
}
remainder *= c1;
remainder = _ebpf_rol(remainder, r1);
remainder *= c2;
hash ^= remainder;
hash ^= (uint32_t)length_in_bytes;
hash *= 0x85ebca6b;
hash ^= (hash >> r2);
hash *= 0xc2b2ae35;
hash ^= (hash >> 16);
return hash;
}
/**
* @brief Given two potentially non-comparable key values, extract the key and
* compare them.
*
* @param[in] hash_table Hash table the keys belong to.
* @param[in] key_a First key.
* @param[in] key_b Second key.
* @retval -1 if key_a < key_b
* @retval 0 if key_a == key_b
* @retval 1 if key_a > key_b
*/
static int
_ebpf_hash_table_compare(_In_ const ebpf_hash_table_t* hash_table, _In_ const uint8_t* key_a, _In_ const uint8_t* key_b)
{
size_t length_a_in_bits;
size_t length_b_in_bits;
const uint8_t* data_a;
const uint8_t* data_b;
uint8_t remainder_a;
uint8_t remainder_b;
if (hash_table->extract) {
hash_table->extract(key_a, &data_a, &length_a_in_bits);
hash_table->extract(key_b, &data_b, &length_b_in_bits);
} else {
length_a_in_bits = hash_table->key_size * 8;
data_a = key_a;
length_b_in_bits = hash_table->key_size * 8;
data_b = key_b;
}
if (length_a_in_bits < length_b_in_bits) {
return -1;
}
if (length_a_in_bits > length_b_in_bits) {
return 1;
}
int cmp_result = memcmp(data_a, data_b, length_a_in_bits / 8);
// No match or length ends on a byte boundary.
if (cmp_result != 0 || length_a_in_bits % 8 == 0) {
return cmp_result;
}
// Check remaining high-order bits.
remainder_a = data_a[length_a_in_bits / 8];
remainder_b = data_b[length_b_in_bits / 8];
remainder_a >>= 8 - (length_a_in_bits % 8);
remainder_b >>= 8 - (length_b_in_bits % 8);
if (remainder_a < remainder_b) {
return -1;
} else if (remainder_a > remainder_b) {
return 1;
} else {
return 0;
}
}
/**
* @brief Given a potentially non-comparable key value, extract the key and
* compute the hash.
*
* @param[in] hash_table Hash table the keys belong to.
* @param[in] key Key to hash.
* @return Hash of key.
*/
static uint32_t
_ebpf_hash_table_compute_hash(_In_ const ebpf_hash_table_t* hash_table, _In_ const uint8_t* key)
{
size_t length;
const uint8_t* data;
if (hash_table->extract) {
hash_table->extract(key, &data, &length);
} else {
length = hash_table->key_size * 8;
data = key;
}
return _ebpf_murmur3_32(data, length, hash_table->seed);
}
/**
* @brief Given a pointer to a bucket, compute the offset of a bucket entry.
*
* @param [in] key_size Size of key.
* @param [in] bucket Pointer to start of the bucket.
* @param [in] index Index into the bucket.
* @return Pointer to the ebpf_hash_bucket_entry_t.
*/
ebpf_hash_bucket_entry_t*
_ebpf_hash_table_bucket_entry(size_t key_size, _In_ ebpf_hash_bucket_header_t* bucket, size_t index)
{
uint8_t* offset = (uint8_t*)bucket->entries;
size_t entry_size = EBPF_OFFSET_OF(ebpf_hash_bucket_entry_t, key) + key_size;
return (ebpf_hash_bucket_entry_t*)(offset + (size_t)index * entry_size);
}
/**
* @brief Perform an atomic replacement of a bucket in the hash table.
* Operations include insert and delete of elements.
*
* @param[in] hash_table Hash table to update.
* @param[in] key Key to operate on.
* @param[in] value Value to be inserted or NULL.
* @param[in] operation Operation to perform.
* @retval EBPF_SUCCESS The operation succeeded.
* @retval EBPF_KEY_NOT_FOUND The specified key is not present in the bucket.
* @retval EBPF_NO_MEMORY Insufficient memory to construct new bucket or value.
*/
static ebpf_result_t
_ebpf_hash_table_replace_bucket(
_In_ ebpf_hash_table_t* hash_table,
_In_ const uint8_t* key,
_In_opt_ const uint8_t* value,
ebpf_hash_bucket_operation_t operation)
{
ebpf_result_t result;
size_t index;
size_t entry_size = EBPF_OFFSET_OF(ebpf_hash_bucket_entry_t, key) + hash_table->key_size;
uint8_t* old_data = NULL;
uint8_t* new_data = NULL;
uint8_t* delete_data = NULL;
uint32_t hash;
ebpf_hash_bucket_header_t* old_bucket = NULL;
ebpf_hash_bucket_header_t* new_bucket = NULL;
ebpf_hash_bucket_header_t* delete_bucket = NULL;
hash = _ebpf_hash_table_compute_hash(hash_table, key);
switch (operation) {
case EBPF_HASH_BUCKET_OPERATION_INSERT_OR_UPDATE:
case EBPF_HASH_BUCKET_OPERATION_INSERT:
case EBPF_HASH_BUCKET_OPERATION_UPDATE:
new_data = hash_table->allocate(hash_table->value_size);
if (!new_data) {
result = EBPF_NO_MEMORY;
goto Done;
}
delete_data = new_data;
if (value) {
memcpy(new_data, value, hash_table->value_size);
}
break;
case EBPF_HASH_BUCKET_OPERATION_DELETE:
break;
}
for (;;) {
size_t old_data_index = MAXSIZE_T;
size_t old_bucket_count = 0;
size_t new_bucket_count = 0;
new_bucket = NULL;
old_data = NULL;
delete_bucket = new_bucket;
delete_data = new_data;
// Capture current bucket pointer.
old_bucket = hash_table->buckets[hash % hash_table->bucket_count];
// If the old_bucket exists, capture its count.
if (old_bucket) {
old_bucket_count = old_bucket->count;
}
// Find the old key index if it exists.
if (old_bucket) {
for (index = 0; index < old_bucket_count; index++) {
ebpf_hash_bucket_entry_t* entry =
_ebpf_hash_table_bucket_entry(hash_table->key_size, old_bucket, index);
ebpf_assert(entry);
if (_ebpf_hash_table_compare(hash_table, key, entry->key) == 0) {
// If old_data exists, remove it.
old_data = entry->data;
old_data_index = index;
break;
}
}
}
// new_bucket_count is either old_bucket_count +1 or -1.
new_bucket_count = old_bucket_count;
switch (operation) {
// Fail if the key is not found and this is a update or delete.
case EBPF_HASH_BUCKET_OPERATION_UPDATE:
if (old_data_index == MAXSIZE_T) {
result = EBPF_KEY_NOT_FOUND;
goto Done;
}
break;
case EBPF_HASH_BUCKET_OPERATION_DELETE:
if (old_data_index == MAXSIZE_T) {
result = EBPF_KEY_NOT_FOUND;
goto Done;
} else {
new_bucket_count--;
}
break;
// Permit it if this is an insert or insert_or_update.
case EBPF_HASH_BUCKET_OPERATION_INSERT:
if (old_data_index != MAXSIZE_T) {
result = EBPF_OBJECT_ALREADY_EXISTS;
goto Done;
} else {
new_bucket_count++;
}
break;
case EBPF_HASH_BUCKET_OPERATION_INSERT_OR_UPDATE:
if (old_data_index == MAXSIZE_T) {
new_bucket_count++;
}
break;
}
ebpf_assert(new_bucket_count < MAXUINT32);
if (new_bucket_count) {
new_bucket = hash_table->allocate(entry_size * new_bucket_count + sizeof(ebpf_hash_bucket_header_t));
if (!new_bucket) {
result = EBPF_NO_MEMORY;
goto Done;
}
delete_bucket = new_bucket;
// Copy everything except old entry over.
for (index = 0; index < old_bucket_count; index++) {
ebpf_hash_bucket_entry_t* old_entry =
_ebpf_hash_table_bucket_entry(hash_table->key_size, old_bucket, index);
ebpf_hash_bucket_entry_t* new_entry =
_ebpf_hash_table_bucket_entry(hash_table->key_size, new_bucket, new_bucket->count);
if (index == old_data_index) {
continue;
}
memcpy(new_entry, old_entry, entry_size);
new_bucket->count++;
}
// If new_data exists, add it to the end.
if (new_data) {
ebpf_assert(new_bucket_count > new_bucket->count);
ebpf_hash_bucket_entry_t* new_entry =
_ebpf_hash_table_bucket_entry(hash_table->key_size, new_bucket, new_bucket->count);
new_entry->data = new_data;
memcpy(new_entry->key, key, hash_table->key_size);
new_bucket->count++;
}
}
ebpf_assert(
(new_bucket == NULL && new_bucket_count == 0) || // No new bucket.
(new_bucket_count == new_bucket->count)); // New bucket is full.
if (ebpf_interlocked_compare_exchange_pointer(
(void* volatile*)&hash_table->buckets[hash % hash_table->bucket_count], new_bucket, old_bucket) ==
old_bucket) {
delete_bucket = old_bucket;
delete_data = old_data;
break;
} else {
// Delete new_bucket and try again.
hash_table->free(new_bucket);
}
}
switch (operation) {
case EBPF_HASH_BUCKET_OPERATION_INSERT:
ebpf_interlocked_increment_int32(&hash_table->entry_count);
break;
case EBPF_HASH_BUCKET_OPERATION_INSERT_OR_UPDATE:
if (!old_data)
ebpf_interlocked_increment_int32(&hash_table->entry_count);
break;
case EBPF_HASH_BUCKET_OPERATION_UPDATE:
break;
case EBPF_HASH_BUCKET_OPERATION_DELETE:
ebpf_interlocked_decrement_int32(&hash_table->entry_count);
break;
}
result = EBPF_SUCCESS;
Done:
hash_table->free(delete_bucket);
hash_table->free(delete_data);
return result;
}
ebpf_result_t
ebpf_hash_table_create(
_Out_ ebpf_hash_table_t** hash_table,
_In_ void* (*allocate)(size_t size),
_In_ void (*free)(void* memory),
size_t key_size,
size_t value_size,
size_t bucket_count,
_In_opt_ void (*extract)(
_In_ const uint8_t* value,
_Outptr_result_buffer_((*length_in_bits + 7) / 8) const uint8_t** data,
_Out_ size_t* length_in_bits))
{
ebpf_result_t retval;
ebpf_hash_table_t* table = NULL;
size_t table_size = 0;
retval = ebpf_safe_size_t_multiply(sizeof(ebpf_hash_bucket_header_t*), bucket_count, &table_size);
if (retval != EBPF_SUCCESS) {
goto Done;
}
retval = ebpf_safe_size_t_add(table_size, EBPF_OFFSET_OF(ebpf_hash_table_t, buckets), &table_size);
if (retval != EBPF_SUCCESS) {
goto Done;
}
table = allocate(table_size);
if (table == NULL) {
retval = EBPF_NO_MEMORY;
goto Done;
}
table->key_size = key_size;
table->value_size = value_size;
table->allocate = allocate;
table->free = free;
table->bucket_count = (uint32_t)bucket_count;
table->entry_count = 0;
table->seed = ebpf_random_uint32();
table->extract = extract;
*hash_table = table;
retval = EBPF_SUCCESS;
Done:
return retval;
}
void
ebpf_hash_table_destroy(_In_opt_ _Post_ptr_invalid_ ebpf_hash_table_t* hash_table)
{
size_t index;
if (!hash_table) {
return;
}
for (index = 0; index < hash_table->bucket_count; index++) {
ebpf_hash_bucket_header_t* bucket = (ebpf_hash_bucket_header_t*)hash_table->buckets[index];
if (bucket) {
size_t inner_index;
for (inner_index = 0; inner_index < bucket->count; inner_index++) {
ebpf_hash_bucket_entry_t* entry =
_ebpf_hash_table_bucket_entry(hash_table->key_size, bucket, inner_index);
hash_table->free(entry->data);
}
hash_table->free(bucket);
hash_table->buckets[index] = NULL;
}
}
hash_table->free(hash_table);
}
ebpf_result_t
ebpf_hash_table_find(_In_ ebpf_hash_table_t* hash_table, _In_ const uint8_t* key, _Outptr_ uint8_t** value)
{
ebpf_result_t retval;
uint32_t hash;
uint8_t* data = NULL;
size_t index;
ebpf_hash_bucket_header_t* bucket;
if (!hash_table || !key) {
retval = EBPF_INVALID_ARGUMENT;
goto Done;
}
hash = _ebpf_hash_table_compute_hash(hash_table, key);
bucket = hash_table->buckets[hash % hash_table->bucket_count];
if (!bucket) {
retval = EBPF_KEY_NOT_FOUND;
goto Done;
}
for (index = 0; index < bucket->count; index++) {
ebpf_hash_bucket_entry_t* entry = _ebpf_hash_table_bucket_entry(hash_table->key_size, bucket, index);
if (_ebpf_hash_table_compare(hash_table, key, entry->key) == 0) {
data = entry->data;
break;
}
}
if (!data) {
retval = EBPF_KEY_NOT_FOUND;
goto Done;
}
*value = data;
retval = EBPF_SUCCESS;
Done:
return retval;
}
ebpf_result_t
ebpf_hash_table_update(
_In_ ebpf_hash_table_t* hash_table,
_In_ const uint8_t* key,
_In_opt_ const uint8_t* value,
ebpf_hash_table_operations_t operation)
{
ebpf_result_t retval;
ebpf_hash_bucket_operation_t bucket_operation;
if (!hash_table || !key) {
retval = EBPF_INVALID_ARGUMENT;
goto Done;
}
switch (operation) {
case EBPF_HASH_TABLE_OPERATION_ANY:
bucket_operation = EBPF_HASH_BUCKET_OPERATION_INSERT_OR_UPDATE;
break;
case EBPF_HASH_TABLE_OPERATION_INSERT:
bucket_operation = EBPF_HASH_BUCKET_OPERATION_INSERT;
break;
case EBPF_HASH_TABLE_OPERATION_REPLACE:
bucket_operation = EBPF_HASH_BUCKET_OPERATION_UPDATE;
break;
default:
retval = EBPF_INVALID_ARGUMENT;
goto Done;
}
retval = _ebpf_hash_table_replace_bucket(hash_table, key, value, bucket_operation);
Done:
return retval;
}
ebpf_result_t
ebpf_hash_table_delete(_In_ ebpf_hash_table_t* hash_table, _In_ const uint8_t* key)
{
ebpf_result_t retval;
if (!hash_table || !key) {
retval = EBPF_INVALID_ARGUMENT;
goto Done;
}
retval = _ebpf_hash_table_replace_bucket(hash_table, key, NULL, EBPF_HASH_BUCKET_OPERATION_DELETE);
Done:
return retval;
}
ebpf_result_t
ebpf_hash_table_next_key_pointer_and_value(
_In_ ebpf_hash_table_t* hash_table,
_In_opt_ const uint8_t* previous_key,
_Outptr_ uint8_t** next_key_pointer,
_Outptr_opt_ uint8_t** value)
{
ebpf_result_t result = EBPF_SUCCESS;
uint32_t hash;
ebpf_hash_bucket_entry_t* next_entry = NULL;
size_t bucket_index;
size_t data_index;
bool found_entry = false;
if (!hash_table || !next_key_pointer) {
result = EBPF_INVALID_ARGUMENT;
goto Done;
}
hash = (previous_key != NULL) ? _ebpf_hash_table_compute_hash(hash_table, previous_key) : 0;
for (bucket_index = hash % hash_table->bucket_count; bucket_index < hash_table->bucket_count; bucket_index++) {
ebpf_hash_bucket_header_t* bucket = hash_table->buckets[bucket_index];
// Skip empty buckets.
if (!bucket) {
continue;
}
// Pick first entry if no previous key.
if (!previous_key) {
next_entry = _ebpf_hash_table_bucket_entry(hash_table->key_size, bucket, 0);
break;
}
for (data_index = 0; data_index < bucket->count; data_index++) {
ebpf_hash_bucket_entry_t* entry = _ebpf_hash_table_bucket_entry(hash_table->key_size, bucket, data_index);
if (!entry) {
result = EBPF_INVALID_ARGUMENT;
goto Done;
}
// Do we have the previous key?
if (found_entry) {
// Yes, then this is the next key.
next_entry = entry;
break;
}
// Is this the previous key?
if (_ebpf_hash_table_compare(hash_table, previous_key, entry->key) == 0) {
// Yes, record its location.
found_entry = true;
}
}
if (next_entry) {
break;
}
}
if (!next_entry) {
result = EBPF_NO_MORE_KEYS;
goto Done;
}
result = EBPF_SUCCESS;
if (value)
*value = next_entry->data;
*next_key_pointer = next_entry->key;
Done:
return result;
}
ebpf_result_t
ebpf_hash_table_next_key_and_value(
_In_ ebpf_hash_table_t* hash_table,
_In_opt_ const uint8_t* previous_key,
_Out_ uint8_t* next_key,
_Inout_opt_ uint8_t** next_value)
{
uint8_t* next_key_pointer;
ebpf_result_t result =
ebpf_hash_table_next_key_pointer_and_value(hash_table, previous_key, &next_key_pointer, next_value);
if (result == EBPF_SUCCESS) {
memcpy(next_key, next_key_pointer, hash_table->key_size);
}
return result;
}
ebpf_result_t
ebpf_hash_table_next_key(
_In_ ebpf_hash_table_t* hash_table, _In_opt_ const uint8_t* previous_key, _Out_ uint8_t* next_key)
{
return ebpf_hash_table_next_key_and_value(hash_table, previous_key, next_key, NULL);
}
size_t
ebpf_hash_table_key_count(_In_ ebpf_hash_table_t* hash_table)
{
return hash_table->entry_count;
}