Signed-off-by: Alan Jowett <alan.jowett@microsoft.com>
This commit is contained in:
Alan Jowett 2024-11-19 15:26:37 -08:00
Родитель 0a14ab3c68
Коммит 60e2ad55e1
4 изменённых файлов: 230 добавлений и 170 удалений

Просмотреть файл

@ -4266,15 +4266,28 @@ _ebpf_ring_buffer_map_async_query_completion(_Inout_ void* completion_context) N
break;
}
int callback_result = subscription->sample_callback(
subscription->sample_callback_context,
const_cast<void*>(reinterpret_cast<const void*>(record->data)),
record->header.length - EBPF_OFFSET_OF(ebpf_ring_buffer_record_t, data));
if (callback_result != 0) {
break;
long size;
for (;;) {
size = ReadAcquire(&record->size);
if (size & EBPF_RING_BUFFER_RECORD_FLAG_LOCKED) {
// The record is being written to by the producer.
// Wait for the producer to finish writing.
YieldProcessor();
} else {
break;
}
}
consumer += record->header.length;
if (!(size & EBPF_RING_BUFFER_RECORD_FLAG_DISCARDED)) {
int callback_result = subscription->sample_callback(
subscription->sample_callback_context,
const_cast<void*>(reinterpret_cast<const void*>(record->data)),
size - EBPF_OFFSET_OF(ebpf_ring_buffer_record_t, data));
if (callback_result != 0) {
break;
}
}
consumer += size;
}
}

Просмотреть файл

@ -8,80 +8,13 @@
typedef struct _ebpf_ring_buffer
{
ebpf_lock_t lock;
size_t length;
size_t consumer_offset;
size_t producer_offset;
int64_t length;
volatile int64_t consumer_offset;
volatile int64_t producer_offset;
uint8_t* shared_buffer;
ebpf_ring_descriptor_t* ring_descriptor;
} ebpf_ring_buffer_t;
inline static size_t
_ring_get_length(_In_ const ebpf_ring_buffer_t* ring)
{
return ring->length;
}
inline static size_t
_ring_get_producer_offset(_In_ const ebpf_ring_buffer_t* ring)
{
return ring->producer_offset % ring->length;
}
inline static size_t
_ring_get_consumer_offset(_In_ const ebpf_ring_buffer_t* ring)
{
return ring->consumer_offset % ring->length;
}
inline static size_t
_ring_get_used_capacity(_In_ const ebpf_ring_buffer_t* ring)
{
ebpf_assert(ring->producer_offset >= ring->consumer_offset);
return ring->producer_offset - ring->consumer_offset;
}
inline static void
_ring_advance_producer_offset(_Inout_ ebpf_ring_buffer_t* ring, size_t length)
{
ring->producer_offset += length;
}
inline static void
_ring_advance_consumer_offset(_Inout_ ebpf_ring_buffer_t* ring, size_t length)
{
ring->consumer_offset += length;
}
inline static _Ret_notnull_ ebpf_ring_buffer_record_t*
_ring_record_at_offset(_In_ const ebpf_ring_buffer_t* ring, size_t offset)
{
return (ebpf_ring_buffer_record_t*)&ring->shared_buffer[offset % ring->length];
}
inline static _Ret_notnull_ ebpf_ring_buffer_record_t*
_ring_next_consumer_record(_In_ const ebpf_ring_buffer_t* ring)
{
return _ring_record_at_offset(ring, _ring_get_consumer_offset(ring));
}
inline static _Ret_maybenull_ ebpf_ring_buffer_record_t*
_ring_buffer_acquire_record(_Inout_ ebpf_ring_buffer_t* ring, size_t requested_length)
{
ebpf_ring_buffer_record_t* record = NULL;
requested_length += EBPF_OFFSET_OF(ebpf_ring_buffer_record_t, data);
size_t remaining_space = ring->length - (ring->producer_offset - ring->consumer_offset);
if (remaining_space > requested_length) {
record = _ring_record_at_offset(ring, _ring_get_producer_offset(ring));
_ring_advance_producer_offset(ring, requested_length);
record->header.length = (uint32_t)requested_length;
record->header.locked = 1;
record->header.discarded = 0;
}
return record;
}
_Must_inspect_result_ ebpf_result_t
ebpf_ring_buffer_create(_Outptr_ ebpf_ring_buffer_t** ring, size_t capacity)
{
@ -135,78 +68,68 @@ _Must_inspect_result_ ebpf_result_t
ebpf_ring_buffer_output(_Inout_ ebpf_ring_buffer_t* ring, _In_reads_bytes_(length) uint8_t* data, size_t length)
{
ebpf_result_t result;
ebpf_lock_state_t state = ebpf_lock_lock(&ring->lock);
ebpf_ring_buffer_record_t* record = _ring_buffer_acquire_record(ring, length);
uint8_t* buffer;
if (record == NULL) {
result = EBPF_OUT_OF_SPACE;
goto Done;
result = ebpf_ring_buffer_reserve(ring, &buffer, length);
if (result != EBPF_SUCCESS) {
return result;
}
record->header.discarded = 0;
record->header.locked = 0;
memcpy(record->data, data, length);
result = EBPF_SUCCESS;
Done:
ebpf_lock_unlock(&ring->lock, state);
return result;
memcpy(buffer, data, length);
return ebpf_ring_buffer_submit(buffer);
}
void
ebpf_ring_buffer_query(_In_ ebpf_ring_buffer_t* ring, _Out_ size_t* consumer, _Out_ size_t* producer)
{
ebpf_lock_state_t state = ebpf_lock_lock(&ring->lock);
*consumer = ring->consumer_offset;
*producer = ring->producer_offset;
ebpf_lock_unlock(&ring->lock, state);
*consumer = (size_t)ReadAcquire64(&ring->consumer_offset);
*producer = (size_t)ReadAcquire64(&ring->producer_offset);
}
_Must_inspect_result_ ebpf_result_t
ebpf_ring_buffer_return(_Inout_ ebpf_ring_buffer_t* ring, size_t length)
{
EBPF_LOG_ENTRY();
ebpf_result_t result;
ebpf_lock_state_t state = ebpf_lock_lock(&ring->lock);
size_t local_length = length;
size_t offset = _ring_get_consumer_offset(ring);
int64_t length_remaining = (int64_t)length;
if ((length > _ring_get_length(ring)) || length > _ring_get_used_capacity(ring)) {
EBPF_LOG_MESSAGE_UINT64_UINT64(
EBPF_TRACELOG_LEVEL_ERROR,
EBPF_TRACELOG_KEYWORD_MAP,
"ebpf_ring_buffer_return: Buffer too large",
ring->producer_offset,
ring->consumer_offset);
result = EBPF_INVALID_ARGUMENT;
goto Done;
}
for (;;) {
int64_t producer_offset = ReadAcquire64(&ring->producer_offset);
int64_t consumer_offset = ReadAcquire64(&ring->consumer_offset);
// Verify count.
while (local_length != 0) {
ebpf_ring_buffer_record_t* record = _ring_record_at_offset(ring, offset);
if (local_length < record->header.length) {
if (length_remaining > (producer_offset - consumer_offset)) {
return EBPF_INVALID_ARGUMENT;
}
if (length_remaining == 0) {
break;
}
offset += record->header.length;
local_length -= record->header.length;
}
// Did it end on a record boundary?
if (local_length != 0) {
EBPF_LOG_MESSAGE_UINT64(
EBPF_TRACELOG_LEVEL_ERROR,
EBPF_TRACELOG_KEYWORD_MAP,
"ebpf_ring_buffer_return: Invalid buffer length",
local_length);
result = EBPF_INVALID_ARGUMENT;
goto Done;
ebpf_ring_buffer_record_t* record =
(ebpf_ring_buffer_record_t*)(ring->shared_buffer + consumer_offset % ring->length);
long size = ReadAcquire(&record->size);
// Can't return a locked record.
if (size & EBPF_RING_BUFFER_RECORD_FLAG_LOCKED) {
continue;
}
long data_size = size - EBPF_OFFSET_OF(ebpf_ring_buffer_record_t, data);
memset(record->data, 0, data_size);
WriteRelease(&record->size, 0);
length_remaining -= size;
if (consumer_offset + size > producer_offset) {
return EBPF_INVALID_ARGUMENT;
}
WriteRelease64(&ring->consumer_offset, consumer_offset + size);
}
_ring_advance_consumer_offset(ring, length);
result = EBPF_SUCCESS;
Done:
ebpf_lock_unlock(&ring->lock, state);
EBPF_RETURN_RESULT(result);
EBPF_RETURN_RESULT(EBPF_SUCCESS);
}
_Must_inspect_result_ ebpf_result_t
@ -224,56 +147,90 @@ _Must_inspect_result_ ebpf_result_t
ebpf_ring_buffer_reserve(
_Inout_ ebpf_ring_buffer_t* ring, _Outptr_result_bytebuffer_(length) uint8_t** data, size_t length)
{
ebpf_result_t result;
ebpf_lock_state_t state = ebpf_lock_lock(&ring->lock);
ebpf_ring_buffer_record_t* record = _ring_buffer_acquire_record(ring, length);
if (record == NULL) {
result = EBPF_INVALID_ARGUMENT;
goto Done;
for (;;) {
int64_t producer_offset = ReadAcquire64(&ring->producer_offset);
// Record points to the next record to allocate.
ebpf_ring_buffer_record_t* record =
(ebpf_ring_buffer_record_t*)(ring->shared_buffer + producer_offset % ring->length);
int64_t remaining_space = ring->length - (producer_offset - ReadAcquire64(&ring->consumer_offset));
long effective_length = (long)length + 4;
if (remaining_space < effective_length) {
return EBPF_NO_MEMORY;
}
// Check if locked.
if (record->size != 0) {
// If locked, pause then try again.
// Use _mm_pause() on x86 and __yield() on ARM.
#if defined(_M_X64) || defined(_M_IX86)
_mm_pause();
#else
__yield();
#endif
continue;
}
if (InterlockedCompareExchange(&record->size, EBPF_RING_BUFFER_RECORD_FLAG_LOCKED, 0) != 0) {
continue;
}
// Check if the producer offset changed after we read it.
if (ReadAcquire64(&ring->producer_offset) != producer_offset) {
// Clear the lock bit
InterlockedAnd(&record->size, ~(EBPF_RING_BUFFER_RECORD_FLAG_LOCKED));
continue;
}
remaining_space = ring->length - (producer_offset - ReadAcquire64(&ring->consumer_offset));
if (remaining_space < effective_length) {
// Clear the lock bit
InterlockedAnd(&record->size, ~(EBPF_RING_BUFFER_RECORD_FLAG_LOCKED));
return EBPF_NO_MEMORY;
}
// Grab the pointer to the record.
*data = record->data;
WriteRelease(&record->size, effective_length | EBPF_RING_BUFFER_RECORD_FLAG_LOCKED);
int64_t new_producer_offset = producer_offset + effective_length;
WriteRelease64(&ring->producer_offset, new_producer_offset);
break;
}
record->header.locked = 1;
MemoryBarrier();
*data = record->data;
result = EBPF_SUCCESS;
Done:
ebpf_lock_unlock(&ring->lock, state);
return result;
return EBPF_SUCCESS;
}
_Must_inspect_result_ ebpf_result_t
ebpf_ring_buffer_submit(_Frees_ptr_opt_ uint8_t* data)
{
if (!data) {
ebpf_ring_buffer_record_t* record = CONTAINING_RECORD(data, ebpf_ring_buffer_record_t, data);
long size = ReadAcquire(&record->size);
if (!(size & EBPF_RING_BUFFER_RECORD_FLAG_LOCKED)) {
return EBPF_INVALID_ARGUMENT;
}
ebpf_ring_buffer_record_t* record =
(ebpf_ring_buffer_record_t*)(data - EBPF_OFFSET_OF(ebpf_ring_buffer_record_t, data));
record->header.discarded = 0;
// Place a memory barrier here so that all prior writes to the record are completed before the record
// is unlocked. Caller needs to ensure a MemoryBarrier between reading the record->header.locked and
// the data in the record.
MemoryBarrier();
record->header.locked = 0;
size &= ~EBPF_RING_BUFFER_RECORD_FLAG_LOCKED;
WriteRelease(&record->size, size);
return EBPF_SUCCESS;
}
_Must_inspect_result_ ebpf_result_t
ebpf_ring_buffer_discard(_Frees_ptr_opt_ uint8_t* data)
{
if (!data) {
ebpf_ring_buffer_record_t* record = CONTAINING_RECORD(data, ebpf_ring_buffer_record_t, data);
long size = ReadAcquire(&record->size);
if (!(size & EBPF_RING_BUFFER_RECORD_FLAG_LOCKED)) {
return EBPF_INVALID_ARGUMENT;
}
ebpf_ring_buffer_record_t* record =
(ebpf_ring_buffer_record_t*)(data - EBPF_OFFSET_OF(ebpf_ring_buffer_record_t, data));
record->header.discarded = 1;
// Place a memory barrier here so that all prior writes to the record are completed before the record
// is unlocked. Caller needs to ensure a MemoryBarrier between reading the record->header.locked and
// the data in the record.
MemoryBarrier();
record->header.locked = 0;
size &= ~EBPF_RING_BUFFER_RECORD_FLAG_LOCKED;
size |= EBPF_RING_BUFFER_RECORD_FLAG_DISCARDED;
WriteRelease(&record->size, size);
return EBPF_SUCCESS;
}

Просмотреть файл

@ -1062,9 +1062,9 @@ TEST_CASE("ring_buffer_output", "[platform]")
auto record = ebpf_ring_buffer_next_record(buffer, size, consumer, producer);
REQUIRE(record != nullptr);
REQUIRE(record->header.length == data.size() + EBPF_OFFSET_OF(ebpf_ring_buffer_record_t, data));
REQUIRE(record->size == data.size() + EBPF_OFFSET_OF(ebpf_ring_buffer_record_t, data));
REQUIRE(ebpf_ring_buffer_return(ring_buffer, record->header.length) == EBPF_SUCCESS);
REQUIRE(ebpf_ring_buffer_return(ring_buffer, record->size) == EBPF_SUCCESS);
ebpf_ring_buffer_query(ring_buffer, &consumer, &producer);
record = ebpf_ring_buffer_next_record(buffer, size, consumer, producer);
@ -1134,7 +1134,7 @@ TEST_CASE("ring_buffer_reserve_submit_discard", "[platform]")
}
uint8_t* mem3 = nullptr;
REQUIRE(ebpf_ring_buffer_reserve(ring_buffer, &mem3, size + 1) == EBPF_INVALID_ARGUMENT);
REQUIRE(ebpf_ring_buffer_reserve(ring_buffer, &mem3, size + 1) == EBPF_NO_MEMORY);
ebpf_ring_buffer_query(ring_buffer, &consumer, &producer);
@ -1146,6 +1146,98 @@ TEST_CASE("ring_buffer_reserve_submit_discard", "[platform]")
ring_buffer = nullptr;
}
TEST_CASE("ring_buffer_stress", "[platform]")
{
_test_helper test_helper;
test_helper.initialize();
ebpf_ring_buffer_t* ring_buffer;
uint8_t* buffer;
std::vector<uint8_t> data(10);
size_t size = 64 * 1024;
bool bad_record = false;
std::atomic<size_t> a_records = 0;
std::atomic<size_t> b_records = 0;
std::atomic<bool> stop{false};
REQUIRE(ebpf_ring_buffer_create(&ring_buffer, size) == EBPF_SUCCESS);
REQUIRE(ebpf_ring_buffer_map_buffer(ring_buffer, &buffer) == EBPF_SUCCESS);
auto producer = [&](std::vector<uint8_t>& data) {
while (!stop) {
if (ebpf_ring_buffer_output(ring_buffer, data.data(), data.size()) != EBPF_SUCCESS) {
YieldProcessor();
}
}
};
auto consumer = [&]() {
size_t consumer;
size_t producer;
while (!stop) {
ebpf_ring_buffer_query(ring_buffer, &consumer, &producer);
if (consumer != producer) {
auto record = ebpf_ring_buffer_next_record(buffer, size, consumer, producer);
if (record != nullptr) {
volatile long actual_size = ReadAcquire(&record->size);
if (actual_size & EBPF_RING_BUFFER_RECORD_FLAG_LOCKED) {
YieldProcessor();
continue;
}
switch (actual_size) {
case 17:
a_records++;
break;
case 23:
b_records++;
break;
default:
bad_record = true;
return;
break;
}
if (ebpf_ring_buffer_return(ring_buffer, actual_size) != EBPF_SUCCESS) {
bad_record = true;
break;
}
}
} else {
YieldProcessor();
}
}
};
std::vector<std::thread> threads;
std::vector<uint8_t> data1(13, 'a');
std::vector<uint8_t> data2(19, 'b');
auto producer_a = [&]() { producer(data1); };
auto producer_b = [&]() { producer(data2); };
// Start consumer thread.
threads.emplace_back(std::thread(consumer));
// Start producer threads.
for (size_t i = 0; i < 10; i++) {
threads.emplace_back(std::thread(producer_a));
threads.emplace_back(std::thread(producer_b));
}
std::this_thread::sleep_for(std::chrono::seconds(1));
stop = true;
for (auto& thread : threads) {
thread.join();
}
REQUIRE(!bad_record);
REQUIRE(a_records > 0);
// REQUIRE(b_records > 0);
ebpf_ring_buffer_destroy(ring_buffer);
}
TEST_CASE("error codes", "[platform]")
{
for (ebpf_result_t result = EBPF_SUCCESS; result < EBPF_RESULT_COUNT; result = (ebpf_result_t)(result + 1)) {

Просмотреть файл

@ -5,14 +5,12 @@
CXPLAT_EXTERN_C_BEGIN
#define EBPF_RING_BUFFER_RECORD_FLAG_LOCKED (long)(0x1ul << 31)
#define EBPF_RING_BUFFER_RECORD_FLAG_DISCARDED (long)(0x1ul << 30)
typedef struct _ebpf_ring_buffer_record
{
struct
{
uint8_t locked : 1;
uint8_t discarded : 1;
uint32_t length : 30;
} header;
long size;
uint8_t data[1];
} ebpf_ring_buffer_record_t;