From 821620133d07c8ea561c63277b68f6ee5e21adf1 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Wed, 23 Mar 2022 16:08:53 +0000 Subject: [PATCH] Remove ChunkAllocator --- src/backend/backend.h | 9 +- src/backend/chunkallocator.h | 370 ------------------- src/backend/fixedglobalconfig.h | 1 - src/backend/globalconfig.h | 1 - src/ds/spmcstack.h | 72 ---- src/mem/corealloc.h | 21 +- src/mem/pool.h | 11 +- src/test/func/domestication/domestication.cc | 8 - 8 files changed, 18 insertions(+), 475 deletions(-) delete mode 100644 src/backend/chunkallocator.h delete mode 100644 src/ds/spmcstack.h diff --git a/src/backend/backend.h b/src/backend/backend.h index 3145a1c1..4c169d6a 100644 --- a/src/backend/backend.h +++ b/src/backend/backend.h @@ -1,7 +1,6 @@ #pragma once #include "../mem/allocconfig.h" #include "../pal/pal.h" -#include "chunkallocator.h" #include "commitrange.h" #include "commonconfig.h" #include "empty_range.h" @@ -301,10 +300,10 @@ namespace snmalloc return {p, meta}; } - static void dealloc_chunk( - LocalState& local_state, ChunkRecord* chunk_record, size_t size) + static void + dealloc_chunk(LocalState& local_state, MetaCommon& meta_common, size_t size) { - auto chunk = chunk_record->meta_common.chunk; + auto chunk = meta_common.chunk; /* * The backend takes possession of these chunks now, by disassociating @@ -316,7 +315,7 @@ namespace snmalloc Pagemap::set_metaentry(address_cast(chunk), size, t); local_state.get_meta_range()->dealloc_range( - capptr::Chunk(chunk_record), PAGEMAP_METADATA_STRUCT_SIZE); + capptr::Chunk(&meta_common), PAGEMAP_METADATA_STRUCT_SIZE); local_state.object_range->dealloc_range(chunk, size); } diff --git a/src/backend/chunkallocator.h b/src/backend/chunkallocator.h deleted file mode 100644 index 80d1be5d..00000000 --- a/src/backend/chunkallocator.h +++ /dev/null @@ -1,370 +0,0 @@ -#pragma once - -/*** - * WARNING: This file is not currently in use. The functionality has not - * be transistioned to the new backend. It does not seem to be required - * but further testing is required before we delete it. - */ - -#include "../backend/backend_concept.h" -#include "../backend/metatypes.h" -#include "../ds/mpmcstack.h" -#include "../ds/spmcstack.h" -#include "../mem/metaslab.h" -#include "../mem/sizeclasstable.h" -#include "../pal/pal_ds.h" - -#ifdef SNMALLOC_TRACING -# include -#endif - -#include - -namespace snmalloc -{ - /** - * Used to store slabs in the unused sizes. - */ - struct ChunkRecord - { - MetaCommon meta_common; - std::atomic next; - }; -#if defined(USE_METADATA_CONCEPT) - static_assert(ConceptMetadataStruct); -#endif - - /** - * How many slab sizes that can be provided. - */ - constexpr size_t NUM_SLAB_SIZES = Pal::address_bits - MIN_CHUNK_BITS; - - /** - * Number of free stacks per chunk size that each allocator will use. - * For performance ideally a power of 2. We will return to the central - * pool anything that has not be used in the last NUM_EPOCHS - 1, where - * each epoch is separated by DecayMemoryTimerObject::PERIOD. - * I.e. if period is 500ms and num of epochs is 4, then we will return to - * the central pool anything not used for the last 1500-2000ms. - */ - constexpr size_t NUM_EPOCHS = 4; - static_assert(bits::is_pow2(NUM_EPOCHS), "Code assumes power of two."); - - class ChunkAllocatorLocalState - { - friend class ChunkAllocator; - - /** - * Stack of slabs that have been returned for reuse. - */ - ModArray>> - chunk_stack; - - /** - * Used for list of all ChunkAllocatorLocalStates. - */ - std::atomic next{nullptr}; - }; - - /** - * This is the global state required for the chunk allocator. - * It must be provided as a part of the shared state handle - * to the chunk allocator. - */ - class ChunkAllocatorState - { - friend class ChunkAllocator; - /** - * Stack of slabs that have been returned for reuse. - */ - ModArray> - decommitted_chunk_stack; - - /** - * Which is the current epoch to place dealloced chunks, and the - * first place we look for allocating chunks. - */ - alignas(CACHELINE_SIZE) std::atomic epoch{0}; - - /** - * All memory issued by this address space manager - */ - std::atomic peak_memory_usage_{0}; - - std::atomic memory_in_stacks{0}; - - std::atomic all_local{nullptr}; - - // Flag to ensure one-shot registration with the PAL for notifications. - std::atomic_flag register_decay{}; - - public: - size_t unused_memory() - { - return memory_in_stacks; - } - - size_t peak_memory_usage() - { - return peak_memory_usage_; - } - - void add_peak_memory_usage(size_t size) - { - peak_memory_usage_ += size; -#ifdef SNMALLOC_TRACING - std::cout << "peak_memory_usage_: " << peak_memory_usage_ << std::endl; -#endif - } - }; - - class ChunkAllocator - { - template - class DecayMemoryTimerObject : public PalTimerObject - { - ChunkAllocatorState* state; - - /*** - * Method for callback object to perform lazy decommit. - */ - static void process(PalTimerObject* p) - { - // Unsafe downcast here. Don't want vtable and RTTI. - auto self = reinterpret_cast(p); - ChunkAllocator::handle_decay_tick(self->state); - } - - // Specify that we notify the ChunkAllocator every 500ms. - static constexpr size_t PERIOD = 500; - - public: - DecayMemoryTimerObject(ChunkAllocatorState* state) - : PalTimerObject(&process, PERIOD), state(state) - {} - }; - - template - static void handle_decay_tick(ChunkAllocatorState* state) - { - auto new_epoch = (state->epoch + 1) % NUM_EPOCHS; - // Flush old index for all threads. - ChunkAllocatorLocalState* curr = state->all_local; - while (curr != nullptr) - { - for (size_t sc = 0; sc < NUM_SLAB_SIZES; sc++) - { - auto& old_stack = curr->chunk_stack[sc][new_epoch]; - ChunkRecord* record = old_stack.pop_all(); - while (record != nullptr) - { - auto next = record->next.load(); - - // Disable pages for this - Pal::notify_not_using( - record->meta_common.chunk.unsafe_ptr(), - slab_sizeclass_to_size(sc)); - - // Add to global state - state->decommitted_chunk_stack[sc].push(record); - - record = next; - } - } - curr = curr->next; - } - - // Advance current index - state->epoch = new_epoch; - } - - public: - template - static std::pair, Metaslab*> alloc_chunk( - typename SharedStateHandle::LocalState& local_state, - ChunkAllocatorLocalState& chunk_alloc_local_state, - chunksizeclass_t slab_sizeclass, - size_t slab_size, - uintptr_t ras) - { - using PAL = typename SharedStateHandle::Pal; - ChunkAllocatorState& state = - SharedStateHandle::get_chunk_allocator_state(&local_state); - - if (slab_sizeclass >= NUM_SLAB_SIZES) - { - // Your address space is not big enough for this allocation! - errno = ENOMEM; - return {nullptr, nullptr}; - } - - ChunkRecord* chunk_record = nullptr; - if constexpr (pal_supports) - { - // Try local cache of chunks first - for (size_t e = 0; e < NUM_EPOCHS && chunk_record == nullptr; e++) - { - chunk_record = - chunk_alloc_local_state - .chunk_stack[slab_sizeclass][(state.epoch - e) % NUM_EPOCHS] - .pop(); - } - } - - // Try global cache. - if (chunk_record == nullptr) - { - chunk_record = state.decommitted_chunk_stack[slab_sizeclass].pop(); - if (chunk_record != nullptr) - { - PAL::template notify_using( - chunk_record->meta_common.chunk.unsafe_ptr(), slab_size); - } - } - - if (chunk_record != nullptr) - { - auto slab = chunk_record->meta_common.chunk; - state.memory_in_stacks -= slab_size; - auto meta = reinterpret_cast(chunk_record); -#ifdef SNMALLOC_TRACING - std::cout << "Reuse slab:" << slab.unsafe_ptr() << " slab_sizeclass " - << slab_sizeclass << " size " << slab_size - << " memory in stacks " << state.memory_in_stacks - << std::endl; -#endif - MetaEntry entry{&meta->meta_common, ras}; - SharedStateHandle::Pagemap::set_metaentry( - address_cast(slab), slab_size, entry); - return {slab, meta}; - } - - // Allocate a fresh slab as there are no available ones. - // First create meta-data - auto [slab, meta] = - SharedStateHandle::alloc_chunk(&local_state, slab_size, ras); -#ifdef SNMALLOC_TRACING - std::cout << "Create slab:" << slab.unsafe_ptr() << " slab_sizeclass " - << slab_sizeclass << " size " << slab_size << std::endl; -#endif - - state.add_peak_memory_usage(slab_size); - state.add_peak_memory_usage(PAGEMAP_METADATA_STRUCT_SIZE); - // TODO handle bounded versus lazy pagemaps in stats - state.add_peak_memory_usage( - (slab_size / MIN_CHUNK_SIZE) * sizeof(MetaEntry)); - - return {slab, meta}; - } - - template - SNMALLOC_SLOW_PATH static void dealloc( - typename SharedStateHandle::LocalState& local_state, - ChunkAllocatorLocalState& chunk_alloc_local_state, - ChunkRecord* p, - size_t slab_sizeclass) - { - ChunkAllocatorState& state = - SharedStateHandle::get_chunk_allocator_state(&local_state); - - if constexpr (pal_supports) - { - // If we have a time source use decay based local cache. -#ifdef SNMALLOC_TRACING - std::cout << "Return slab:" << p->meta_common.chunk.unsafe_ptr() - << " slab_sizeclass " << slab_sizeclass << " size " - << slab_sizeclass_to_size(slab_sizeclass) - << " memory in stacks " << state.memory_in_stacks - << std::endl; -#endif - chunk_alloc_local_state.chunk_stack[slab_sizeclass][state.epoch].push( - p); - } - else - { - // No time source share immediately with global state. - // Disable pages for this chunk. - SharedStateHandle::Pal::notify_not_using( - p->meta_common.chunk.unsafe_ptr(), - slab_sizeclass_to_size(slab_sizeclass)); - - // Add to global state - state.decommitted_chunk_stack[slab_sizeclass].push(p); - } - - state.memory_in_stacks += slab_sizeclass_to_size(slab_sizeclass); - } - - /** - * Provide a block of meta-data with size and align. - * - * Backend allocator may use guard pages and separate area of - * address space to protect this from corruption. - */ - template< - typename U, - SNMALLOC_CONCEPT(ConceptBackendGlobals) SharedStateHandle, - typename... Args> - static U* alloc_meta_data( - typename SharedStateHandle::LocalState* local_state, Args&&... args) - { - // Cache line align - size_t size = bits::align_up(sizeof(U), 64); - - capptr::Chunk p = - SharedStateHandle::template alloc_meta_data(local_state, size); - - if (p == nullptr) - { - errno = ENOMEM; - return nullptr; - } - - return new (p.unsafe_ptr()) U(std::forward(args)...); - } - - template - static void register_local_state( - typename SharedStateHandle::LocalState& local_state, - ChunkAllocatorLocalState& chunk_alloc_local_state) - { - if constexpr (pal_supports) - { - ChunkAllocatorState& state = - SharedStateHandle::get_chunk_allocator_state(&local_state); - - // Register with the Pal to receive notifications. - if (!state.register_decay.test_and_set()) - { - auto timer = alloc_meta_data< - DecayMemoryTimerObject, - SharedStateHandle>(&local_state, &state); - if (timer != nullptr) - { - SharedStateHandle::Pal::register_timer(timer); - } - else - { - // We failed to register the notification. - // This is not catarophic, but if we can't allocate this - // state something else will fail shortly. - state.register_decay.clear(); - } - } - - // Add to the list of local states. - auto* head = state.all_local.load(); - do - { - chunk_alloc_local_state.next = head; - } while (!state.all_local.compare_exchange_strong( - head, &chunk_alloc_local_state)); - } - else - { - UNUSED(local_state); - UNUSED(chunk_alloc_local_state); - } - } - }; -} // namespace snmalloc diff --git a/src/backend/fixedglobalconfig.h b/src/backend/fixedglobalconfig.h index 9bb33ae5..5b9cbf5d 100644 --- a/src/backend/fixedglobalconfig.h +++ b/src/backend/fixedglobalconfig.h @@ -1,7 +1,6 @@ #pragma once #include "../backend/backend.h" -#include "../backend/chunkallocator.h" #include "../mem/corealloc.h" #include "../mem/pool.h" #include "commonconfig.h" diff --git a/src/backend/globalconfig.h b/src/backend/globalconfig.h index 3b468569..a496aa3e 100644 --- a/src/backend/globalconfig.h +++ b/src/backend/globalconfig.h @@ -1,7 +1,6 @@ #pragma once #include "../backend/backend.h" -#include "../backend/chunkallocator.h" #include "../mem/corealloc.h" #include "../mem/pool.h" #include "commonconfig.h" diff --git a/src/ds/spmcstack.h b/src/ds/spmcstack.h deleted file mode 100644 index 7c6ea70e..00000000 --- a/src/ds/spmcstack.h +++ /dev/null @@ -1,72 +0,0 @@ -#pragma once - -#include "aba.h" -#include "ptrwrap.h" - -namespace snmalloc -{ - /** - * Concurrent Stack - * - * This stack supports the following clients - * (push|pop)* || pop_all* || ... || pop_all* - * - * That is a single thread that can do push and pop, and other threads - * that do pop_all. pop_all if it returns a value, returns all of the - * stack, however, it may return nullptr if it races with either a push - * or a pop. - * - * The primary use case is single-threaded access, where other threads - * can attempt to steal all the values. - */ - template - class SPMCStack - { - private: - alignas(CACHELINE_SIZE) std::atomic stack{}; - - public: - constexpr SPMCStack() = default; - - void push(T* item) - { - static_assert( - std::is_same>::value, - "T->next must be an std::atomic"); - - return push(item, item); - } - - void push(T* first, T* last) - { - T* old_head = stack.exchange(nullptr, std::memory_order_relaxed); - last->next.store(old_head, std::memory_order_relaxed); - // Assume stays null as not allowed to race with pop or other pushes. - SNMALLOC_ASSERT(stack.load() == nullptr); - stack.store(first, std::memory_order_release); - } - - T* pop() - { - if (stack.load(std::memory_order_relaxed) == nullptr) - return nullptr; - T* old_head = stack.exchange(nullptr); - if (SNMALLOC_UNLIKELY(old_head == nullptr)) - return nullptr; - - auto next = old_head->next.load(std::memory_order_relaxed); - - // Assume stays null as not allowed to race with pop or other pushes. - SNMALLOC_ASSERT(stack.load() == nullptr); - - stack.store(next, std::memory_order_release); - - return old_head; - } - - T* pop_all() - { - return stack.exchange(nullptr); - } - }; -} // namespace snmalloc diff --git a/src/mem/corealloc.h b/src/mem/corealloc.h index b22fbb0a..bd42b4e3 100644 --- a/src/mem/corealloc.h +++ b/src/mem/corealloc.h @@ -1,6 +1,5 @@ #pragma once -#include "../backend/chunkallocator.h" #include "../ds/defines.h" #include "allocconfig.h" #include "localcache.h" @@ -282,7 +281,7 @@ namespace snmalloc bumpptr = slab_end; } - ChunkRecord* clear_slab(Metaslab* meta, smallsizeclass_t sizeclass) + void clear_slab(Metaslab* meta, smallsizeclass_t sizeclass) { auto& key = entropy.get_free_list_key(); freelist::Iter<> fl; @@ -324,15 +323,13 @@ namespace snmalloc SNMALLOC_ASSERT( count == snmalloc::sizeclass_to_slab_object_count(sizeclass)); #endif - ChunkRecord* chunk_record = reinterpret_cast(meta); // TODO: This is a capability amplification as we are saying we // have the whole chunk. auto start_of_slab = pointer_align_down( p, snmalloc::sizeclass_to_slab_size(sizeclass)); SNMALLOC_ASSERT( - address_cast(start_of_slab) == - chunk_record->meta_common.chunk_address()); + address_cast(start_of_slab) == meta->meta_common.chunk_address()); #if defined(__CHERI_PURE_CAPABILITY__) && !defined(SNMALLOC_CHECK_CLIENT) // Zero the whole slab. For CHERI we at least need to clear the freelist @@ -340,9 +337,8 @@ namespace snmalloc // the freelist order as for SNMALLOC_CHECK_CLIENT. Zeroing the whole slab // may be more friendly to hw because it does not involve pointer chasing // and is amenable to prefetching. - chunk_record->meta_common - .template zero_chunk( - snmalloc::sizeclass_to_slab_size(sizeclass)); + meta->meta_common.template zero_chunk( + snmalloc::sizeclass_to_slab_size(sizeclass)); #endif #ifdef SNMALLOC_TRACING @@ -351,7 +347,6 @@ namespace snmalloc #else UNUSED(start_of_slab); #endif - return chunk_record; } template @@ -386,11 +381,11 @@ namespace snmalloc // TODO delay the clear to the next user of the slab, or teardown so // don't touch the cache lines at this point in snmalloc_check_client. - auto chunk_record = clear_slab(meta, sizeclass); + clear_slab(meta, sizeclass); SharedStateHandle::dealloc_chunk( get_backend_local_state(), - chunk_record, + meta->meta_common, sizeclass_to_slab_size(sizeclass)); return true; @@ -422,10 +417,8 @@ namespace snmalloc UNUSED(size); #endif - auto slab_record = reinterpret_cast(meta); - SharedStateHandle::dealloc_chunk( - get_backend_local_state(), slab_record, size); + get_backend_local_state(), meta->meta_common, size); return; } diff --git a/src/mem/pool.h b/src/mem/pool.h index c7924c63..141a14c5 100644 --- a/src/mem/pool.h +++ b/src/mem/pool.h @@ -1,11 +1,12 @@ #pragma once -#include "../backend/chunkallocator.h" #include "../ds/flaglock.h" #include "../ds/mpmcstack.h" #include "../pal/pal_concept.h" #include "pooled.h" +#include + namespace snmalloc { /** @@ -132,15 +133,17 @@ namespace snmalloc return p; } - p = ChunkAllocator::alloc_meta_data( - nullptr, std::forward(args)...); + auto raw = + SharedStateHandle::template alloc_meta_data(nullptr, sizeof(T)); - if (p == nullptr) + if (raw == nullptr) { SharedStateHandle::Pal::error( "Failed to initialise thread local allocator."); } + p = new (raw.unsafe_ptr()) T(std::forward(args)...); + FlagLock f(pool.lock); p->list_next = pool.list; pool.list = p; diff --git a/src/test/func/domestication/domestication.cc b/src/test/func/domestication/domestication.cc index 7a5d2c07..5108960f 100644 --- a/src/test/func/domestication/domestication.cc +++ b/src/test/func/domestication/domestication.cc @@ -24,8 +24,6 @@ namespace snmalloc private: using Backend = BackendAllocator; - SNMALLOC_REQUIRE_CONSTINIT - inline static ChunkAllocatorState chunk_allocator_state; SNMALLOC_REQUIRE_CONSTINIT inline static GlobalPoolState alloc_pool; @@ -46,12 +44,6 @@ namespace snmalloc } (); - static ChunkAllocatorState& - get_chunk_allocator_state(Backend::LocalState* = nullptr) - { - return chunk_allocator_state; - } - static GlobalPoolState& pool() { return alloc_pool;