This commit is contained in:
Matthew Parkinson 2022-03-23 16:08:53 +00:00 коммит произвёл Matthew Parkinson
Родитель 73be8a3786
Коммит 821620133d
8 изменённых файлов: 18 добавлений и 475 удалений

Просмотреть файл

@ -1,7 +1,6 @@
#pragma once
#include "../mem/allocconfig.h"
#include "../pal/pal.h"
#include "chunkallocator.h"
#include "commitrange.h"
#include "commonconfig.h"
#include "empty_range.h"
@ -301,10 +300,10 @@ namespace snmalloc
return {p, meta};
}
static void dealloc_chunk(
LocalState& local_state, ChunkRecord* chunk_record, size_t size)
static void
dealloc_chunk(LocalState& local_state, MetaCommon& meta_common, size_t size)
{
auto chunk = chunk_record->meta_common.chunk;
auto chunk = meta_common.chunk;
/*
* The backend takes possession of these chunks now, by disassociating
@ -316,7 +315,7 @@ namespace snmalloc
Pagemap::set_metaentry(address_cast(chunk), size, t);
local_state.get_meta_range()->dealloc_range(
capptr::Chunk<void>(chunk_record), PAGEMAP_METADATA_STRUCT_SIZE);
capptr::Chunk<void>(&meta_common), PAGEMAP_METADATA_STRUCT_SIZE);
local_state.object_range->dealloc_range(chunk, size);
}

Просмотреть файл

@ -1,370 +0,0 @@
#pragma once
/***
* WARNING: This file is not currently in use. The functionality has not
* be transistioned to the new backend. It does not seem to be required
* but further testing is required before we delete it.
*/
#include "../backend/backend_concept.h"
#include "../backend/metatypes.h"
#include "../ds/mpmcstack.h"
#include "../ds/spmcstack.h"
#include "../mem/metaslab.h"
#include "../mem/sizeclasstable.h"
#include "../pal/pal_ds.h"
#ifdef SNMALLOC_TRACING
# include <iostream>
#endif
#include <new>
namespace snmalloc
{
/**
* Used to store slabs in the unused sizes.
*/
struct ChunkRecord
{
MetaCommon meta_common;
std::atomic<ChunkRecord*> next;
};
#if defined(USE_METADATA_CONCEPT)
static_assert(ConceptMetadataStruct<ChunkRecord>);
#endif
/**
* How many slab sizes that can be provided.
*/
constexpr size_t NUM_SLAB_SIZES = Pal::address_bits - MIN_CHUNK_BITS;
/**
* Number of free stacks per chunk size that each allocator will use.
* For performance ideally a power of 2. We will return to the central
* pool anything that has not be used in the last NUM_EPOCHS - 1, where
* each epoch is separated by DecayMemoryTimerObject::PERIOD.
* I.e. if period is 500ms and num of epochs is 4, then we will return to
* the central pool anything not used for the last 1500-2000ms.
*/
constexpr size_t NUM_EPOCHS = 4;
static_assert(bits::is_pow2(NUM_EPOCHS), "Code assumes power of two.");
class ChunkAllocatorLocalState
{
friend class ChunkAllocator;
/**
* Stack of slabs that have been returned for reuse.
*/
ModArray<NUM_SLAB_SIZES, ModArray<NUM_EPOCHS, SPMCStack<ChunkRecord>>>
chunk_stack;
/**
* Used for list of all ChunkAllocatorLocalStates.
*/
std::atomic<ChunkAllocatorLocalState*> next{nullptr};
};
/**
* This is the global state required for the chunk allocator.
* It must be provided as a part of the shared state handle
* to the chunk allocator.
*/
class ChunkAllocatorState
{
friend class ChunkAllocator;
/**
* Stack of slabs that have been returned for reuse.
*/
ModArray<NUM_SLAB_SIZES, MPMCStack<ChunkRecord, RequiresInit>>
decommitted_chunk_stack;
/**
* Which is the current epoch to place dealloced chunks, and the
* first place we look for allocating chunks.
*/
alignas(CACHELINE_SIZE) std::atomic<size_t> epoch{0};
/**
* All memory issued by this address space manager
*/
std::atomic<size_t> peak_memory_usage_{0};
std::atomic<size_t> memory_in_stacks{0};
std::atomic<ChunkAllocatorLocalState*> all_local{nullptr};
// Flag to ensure one-shot registration with the PAL for notifications.
std::atomic_flag register_decay{};
public:
size_t unused_memory()
{
return memory_in_stacks;
}
size_t peak_memory_usage()
{
return peak_memory_usage_;
}
void add_peak_memory_usage(size_t size)
{
peak_memory_usage_ += size;
#ifdef SNMALLOC_TRACING
std::cout << "peak_memory_usage_: " << peak_memory_usage_ << std::endl;
#endif
}
};
class ChunkAllocator
{
template<SNMALLOC_CONCEPT(ConceptPAL) Pal>
class DecayMemoryTimerObject : public PalTimerObject
{
ChunkAllocatorState* state;
/***
* Method for callback object to perform lazy decommit.
*/
static void process(PalTimerObject* p)
{
// Unsafe downcast here. Don't want vtable and RTTI.
auto self = reinterpret_cast<DecayMemoryTimerObject*>(p);
ChunkAllocator::handle_decay_tick<Pal>(self->state);
}
// Specify that we notify the ChunkAllocator every 500ms.
static constexpr size_t PERIOD = 500;
public:
DecayMemoryTimerObject(ChunkAllocatorState* state)
: PalTimerObject(&process, PERIOD), state(state)
{}
};
template<SNMALLOC_CONCEPT(ConceptPAL) Pal>
static void handle_decay_tick(ChunkAllocatorState* state)
{
auto new_epoch = (state->epoch + 1) % NUM_EPOCHS;
// Flush old index for all threads.
ChunkAllocatorLocalState* curr = state->all_local;
while (curr != nullptr)
{
for (size_t sc = 0; sc < NUM_SLAB_SIZES; sc++)
{
auto& old_stack = curr->chunk_stack[sc][new_epoch];
ChunkRecord* record = old_stack.pop_all();
while (record != nullptr)
{
auto next = record->next.load();
// Disable pages for this
Pal::notify_not_using(
record->meta_common.chunk.unsafe_ptr(),
slab_sizeclass_to_size(sc));
// Add to global state
state->decommitted_chunk_stack[sc].push(record);
record = next;
}
}
curr = curr->next;
}
// Advance current index
state->epoch = new_epoch;
}
public:
template<SNMALLOC_CONCEPT(ConceptBackendGlobals) SharedStateHandle>
static std::pair<capptr::Chunk<void>, Metaslab*> alloc_chunk(
typename SharedStateHandle::LocalState& local_state,
ChunkAllocatorLocalState& chunk_alloc_local_state,
chunksizeclass_t slab_sizeclass,
size_t slab_size,
uintptr_t ras)
{
using PAL = typename SharedStateHandle::Pal;
ChunkAllocatorState& state =
SharedStateHandle::get_chunk_allocator_state(&local_state);
if (slab_sizeclass >= NUM_SLAB_SIZES)
{
// Your address space is not big enough for this allocation!
errno = ENOMEM;
return {nullptr, nullptr};
}
ChunkRecord* chunk_record = nullptr;
if constexpr (pal_supports<Time, PAL>)
{
// Try local cache of chunks first
for (size_t e = 0; e < NUM_EPOCHS && chunk_record == nullptr; e++)
{
chunk_record =
chunk_alloc_local_state
.chunk_stack[slab_sizeclass][(state.epoch - e) % NUM_EPOCHS]
.pop();
}
}
// Try global cache.
if (chunk_record == nullptr)
{
chunk_record = state.decommitted_chunk_stack[slab_sizeclass].pop();
if (chunk_record != nullptr)
{
PAL::template notify_using<NoZero>(
chunk_record->meta_common.chunk.unsafe_ptr(), slab_size);
}
}
if (chunk_record != nullptr)
{
auto slab = chunk_record->meta_common.chunk;
state.memory_in_stacks -= slab_size;
auto meta = reinterpret_cast<Metaslab*>(chunk_record);
#ifdef SNMALLOC_TRACING
std::cout << "Reuse slab:" << slab.unsafe_ptr() << " slab_sizeclass "
<< slab_sizeclass << " size " << slab_size
<< " memory in stacks " << state.memory_in_stacks
<< std::endl;
#endif
MetaEntry entry{&meta->meta_common, ras};
SharedStateHandle::Pagemap::set_metaentry(
address_cast(slab), slab_size, entry);
return {slab, meta};
}
// Allocate a fresh slab as there are no available ones.
// First create meta-data
auto [slab, meta] =
SharedStateHandle::alloc_chunk(&local_state, slab_size, ras);
#ifdef SNMALLOC_TRACING
std::cout << "Create slab:" << slab.unsafe_ptr() << " slab_sizeclass "
<< slab_sizeclass << " size " << slab_size << std::endl;
#endif
state.add_peak_memory_usage(slab_size);
state.add_peak_memory_usage(PAGEMAP_METADATA_STRUCT_SIZE);
// TODO handle bounded versus lazy pagemaps in stats
state.add_peak_memory_usage(
(slab_size / MIN_CHUNK_SIZE) * sizeof(MetaEntry));
return {slab, meta};
}
template<SNMALLOC_CONCEPT(ConceptBackendGlobals) SharedStateHandle>
SNMALLOC_SLOW_PATH static void dealloc(
typename SharedStateHandle::LocalState& local_state,
ChunkAllocatorLocalState& chunk_alloc_local_state,
ChunkRecord* p,
size_t slab_sizeclass)
{
ChunkAllocatorState& state =
SharedStateHandle::get_chunk_allocator_state(&local_state);
if constexpr (pal_supports<Time, typename SharedStateHandle::Pal>)
{
// If we have a time source use decay based local cache.
#ifdef SNMALLOC_TRACING
std::cout << "Return slab:" << p->meta_common.chunk.unsafe_ptr()
<< " slab_sizeclass " << slab_sizeclass << " size "
<< slab_sizeclass_to_size(slab_sizeclass)
<< " memory in stacks " << state.memory_in_stacks
<< std::endl;
#endif
chunk_alloc_local_state.chunk_stack[slab_sizeclass][state.epoch].push(
p);
}
else
{
// No time source share immediately with global state.
// Disable pages for this chunk.
SharedStateHandle::Pal::notify_not_using(
p->meta_common.chunk.unsafe_ptr(),
slab_sizeclass_to_size(slab_sizeclass));
// Add to global state
state.decommitted_chunk_stack[slab_sizeclass].push(p);
}
state.memory_in_stacks += slab_sizeclass_to_size(slab_sizeclass);
}
/**
* Provide a block of meta-data with size and align.
*
* Backend allocator may use guard pages and separate area of
* address space to protect this from corruption.
*/
template<
typename U,
SNMALLOC_CONCEPT(ConceptBackendGlobals) SharedStateHandle,
typename... Args>
static U* alloc_meta_data(
typename SharedStateHandle::LocalState* local_state, Args&&... args)
{
// Cache line align
size_t size = bits::align_up(sizeof(U), 64);
capptr::Chunk<void> p =
SharedStateHandle::template alloc_meta_data<U>(local_state, size);
if (p == nullptr)
{
errno = ENOMEM;
return nullptr;
}
return new (p.unsafe_ptr()) U(std::forward<Args>(args)...);
}
template<SNMALLOC_CONCEPT(ConceptBackendGlobals) SharedStateHandle>
static void register_local_state(
typename SharedStateHandle::LocalState& local_state,
ChunkAllocatorLocalState& chunk_alloc_local_state)
{
if constexpr (pal_supports<Time, typename SharedStateHandle::Pal>)
{
ChunkAllocatorState& state =
SharedStateHandle::get_chunk_allocator_state(&local_state);
// Register with the Pal to receive notifications.
if (!state.register_decay.test_and_set())
{
auto timer = alloc_meta_data<
DecayMemoryTimerObject<typename SharedStateHandle::Pal>,
SharedStateHandle>(&local_state, &state);
if (timer != nullptr)
{
SharedStateHandle::Pal::register_timer(timer);
}
else
{
// We failed to register the notification.
// This is not catarophic, but if we can't allocate this
// state something else will fail shortly.
state.register_decay.clear();
}
}
// Add to the list of local states.
auto* head = state.all_local.load();
do
{
chunk_alloc_local_state.next = head;
} while (!state.all_local.compare_exchange_strong(
head, &chunk_alloc_local_state));
}
else
{
UNUSED(local_state);
UNUSED(chunk_alloc_local_state);
}
}
};
} // namespace snmalloc

Просмотреть файл

@ -1,7 +1,6 @@
#pragma once
#include "../backend/backend.h"
#include "../backend/chunkallocator.h"
#include "../mem/corealloc.h"
#include "../mem/pool.h"
#include "commonconfig.h"

Просмотреть файл

@ -1,7 +1,6 @@
#pragma once
#include "../backend/backend.h"
#include "../backend/chunkallocator.h"
#include "../mem/corealloc.h"
#include "../mem/pool.h"
#include "commonconfig.h"

Просмотреть файл

@ -1,72 +0,0 @@
#pragma once
#include "aba.h"
#include "ptrwrap.h"
namespace snmalloc
{
/**
* Concurrent Stack
*
* This stack supports the following clients
* (push|pop)* || pop_all* || ... || pop_all*
*
* That is a single thread that can do push and pop, and other threads
* that do pop_all. pop_all if it returns a value, returns all of the
* stack, however, it may return nullptr if it races with either a push
* or a pop.
*
* The primary use case is single-threaded access, where other threads
* can attempt to steal all the values.
*/
template<class T>
class SPMCStack
{
private:
alignas(CACHELINE_SIZE) std::atomic<T*> stack{};
public:
constexpr SPMCStack() = default;
void push(T* item)
{
static_assert(
std::is_same<decltype(T::next), std::atomic<T*>>::value,
"T->next must be an std::atomic<T*>");
return push(item, item);
}
void push(T* first, T* last)
{
T* old_head = stack.exchange(nullptr, std::memory_order_relaxed);
last->next.store(old_head, std::memory_order_relaxed);
// Assume stays null as not allowed to race with pop or other pushes.
SNMALLOC_ASSERT(stack.load() == nullptr);
stack.store(first, std::memory_order_release);
}
T* pop()
{
if (stack.load(std::memory_order_relaxed) == nullptr)
return nullptr;
T* old_head = stack.exchange(nullptr);
if (SNMALLOC_UNLIKELY(old_head == nullptr))
return nullptr;
auto next = old_head->next.load(std::memory_order_relaxed);
// Assume stays null as not allowed to race with pop or other pushes.
SNMALLOC_ASSERT(stack.load() == nullptr);
stack.store(next, std::memory_order_release);
return old_head;
}
T* pop_all()
{
return stack.exchange(nullptr);
}
};
} // namespace snmalloc

Просмотреть файл

@ -1,6 +1,5 @@
#pragma once
#include "../backend/chunkallocator.h"
#include "../ds/defines.h"
#include "allocconfig.h"
#include "localcache.h"
@ -282,7 +281,7 @@ namespace snmalloc
bumpptr = slab_end;
}
ChunkRecord* clear_slab(Metaslab* meta, smallsizeclass_t sizeclass)
void clear_slab(Metaslab* meta, smallsizeclass_t sizeclass)
{
auto& key = entropy.get_free_list_key();
freelist::Iter<> fl;
@ -324,15 +323,13 @@ namespace snmalloc
SNMALLOC_ASSERT(
count == snmalloc::sizeclass_to_slab_object_count(sizeclass));
#endif
ChunkRecord* chunk_record = reinterpret_cast<ChunkRecord*>(meta);
// TODO: This is a capability amplification as we are saying we
// have the whole chunk.
auto start_of_slab = pointer_align_down<void>(
p, snmalloc::sizeclass_to_slab_size(sizeclass));
SNMALLOC_ASSERT(
address_cast(start_of_slab) ==
chunk_record->meta_common.chunk_address());
address_cast(start_of_slab) == meta->meta_common.chunk_address());
#if defined(__CHERI_PURE_CAPABILITY__) && !defined(SNMALLOC_CHECK_CLIENT)
// Zero the whole slab. For CHERI we at least need to clear the freelist
@ -340,9 +337,8 @@ namespace snmalloc
// the freelist order as for SNMALLOC_CHECK_CLIENT. Zeroing the whole slab
// may be more friendly to hw because it does not involve pointer chasing
// and is amenable to prefetching.
chunk_record->meta_common
.template zero_chunk<typename SharedStateHandle::Pal>(
snmalloc::sizeclass_to_slab_size(sizeclass));
meta->meta_common.template zero_chunk<typename SharedStateHandle::Pal>(
snmalloc::sizeclass_to_slab_size(sizeclass));
#endif
#ifdef SNMALLOC_TRACING
@ -351,7 +347,6 @@ namespace snmalloc
#else
UNUSED(start_of_slab);
#endif
return chunk_record;
}
template<bool check_slabs = false>
@ -386,11 +381,11 @@ namespace snmalloc
// TODO delay the clear to the next user of the slab, or teardown so
// don't touch the cache lines at this point in snmalloc_check_client.
auto chunk_record = clear_slab(meta, sizeclass);
clear_slab(meta, sizeclass);
SharedStateHandle::dealloc_chunk(
get_backend_local_state(),
chunk_record,
meta->meta_common,
sizeclass_to_slab_size(sizeclass));
return true;
@ -422,10 +417,8 @@ namespace snmalloc
UNUSED(size);
#endif
auto slab_record = reinterpret_cast<ChunkRecord*>(meta);
SharedStateHandle::dealloc_chunk(
get_backend_local_state(), slab_record, size);
get_backend_local_state(), meta->meta_common, size);
return;
}

Просмотреть файл

@ -1,11 +1,12 @@
#pragma once
#include "../backend/chunkallocator.h"
#include "../ds/flaglock.h"
#include "../ds/mpmcstack.h"
#include "../pal/pal_concept.h"
#include "pooled.h"
#include <new>
namespace snmalloc
{
/**
@ -132,15 +133,17 @@ namespace snmalloc
return p;
}
p = ChunkAllocator::alloc_meta_data<T, SharedStateHandle>(
nullptr, std::forward<Args>(args)...);
auto raw =
SharedStateHandle::template alloc_meta_data<T>(nullptr, sizeof(T));
if (p == nullptr)
if (raw == nullptr)
{
SharedStateHandle::Pal::error(
"Failed to initialise thread local allocator.");
}
p = new (raw.unsafe_ptr()) T(std::forward<Args>(args)...);
FlagLock f(pool.lock);
p->list_next = pool.list;
pool.list = p;

Просмотреть файл

@ -24,8 +24,6 @@ namespace snmalloc
private:
using Backend = BackendAllocator<Pal, false>;
SNMALLOC_REQUIRE_CONSTINIT
inline static ChunkAllocatorState chunk_allocator_state;
SNMALLOC_REQUIRE_CONSTINIT
inline static GlobalPoolState alloc_pool;
@ -46,12 +44,6 @@ namespace snmalloc
}
();
static ChunkAllocatorState&
get_chunk_allocator_state(Backend::LocalState* = nullptr)
{
return chunk_allocator_state;
}
static GlobalPoolState& pool()
{
return alloc_pool;