* Refactor buddy allocator

Make it clearer the structure of add_block by pulling out remove_buddy.

* Give buddy a few elements so don't have to touch pagemap earlie on.

* Only use do and dont dump on pagemap

The do and dont dump calls were costings a lot during start up of snmalloc.  This reduces the times they are called to only be for the pagemap.
This commit is contained in:
Matthew Parkinson 2024-06-26 21:34:22 +01:00 коммит произвёл GitHub
Родитель 835ab51863
Коммит 4620220080
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
5 изменённых файлов: 119 добавлений и 75 удалений

Просмотреть файл

@ -15,7 +15,13 @@ namespace snmalloc
template<typename Rep, size_t MIN_SIZE_BITS, size_t MAX_SIZE_BITS>
class Buddy
{
std::array<RBTree<Rep>, MAX_SIZE_BITS - MIN_SIZE_BITS> trees{};
struct Entry
{
typename Rep::Contents cache[3];
RBTree<Rep> tree{};
};
std::array<Entry, MAX_SIZE_BITS - MIN_SIZE_BITS> entries{};
// All RBtrees at or above this index should be empty.
size_t empty_at_or_above{0};
@ -42,13 +48,54 @@ namespace snmalloc
void invariant()
{
#ifndef NDEBUG
for (size_t i = empty_at_or_above; i < trees.size(); i++)
for (size_t i = empty_at_or_above; i < entries.size(); i++)
{
SNMALLOC_ASSERT(trees[i].is_empty());
SNMALLOC_ASSERT(entries[i].tree.is_empty());
// TODO check cache is empty
}
#endif
}
bool remove_buddy(typename Rep::Contents addr, size_t size)
{
auto idx = to_index(size);
// Empty at this range.
if (idx >= empty_at_or_above)
return false;
auto buddy = Rep::buddy(addr, size);
// Check local cache first
for (auto& e : entries[idx].cache)
{
if (Rep::equal(buddy, e))
{
if (!Rep::can_consolidate(addr, size))
return false;
e = entries[idx].tree.remove_min();
return true;
}
}
auto path = entries[idx].tree.get_root_path();
bool contains_buddy = entries[idx].tree.find(path, buddy);
if (!contains_buddy)
return false;
// Only check if we can consolidate after we know the buddy is in
// the buddy allocator. This is required to prevent possible segfaults
// from looking at the buddies meta-data, which we only know exists
// once we have found it in the red-black tree.
if (!Rep::can_consolidate(addr, size))
return false;
entries[idx].tree.remove_path(path);
return true;
}
public:
constexpr Buddy() = default;
/**
@ -63,48 +110,39 @@ namespace snmalloc
*/
typename Rep::Contents add_block(typename Rep::Contents addr, size_t size)
{
validate_block(addr, size);
if (remove_buddy(addr, size))
{
// Add to next level cache
size *= 2;
addr = Rep::align_down(addr, size);
if (size == bits::one_at_bit(MAX_SIZE_BITS))
{
// Invariant should be checked on all non-tail return paths.
// Holds trivially here with current design.
invariant();
// Too big for this buddy allocator.
return addr;
}
return add_block(addr, size);
}
auto idx = to_index(size);
empty_at_or_above = bits::max(empty_at_or_above, idx + 1);
validate_block(addr, size);
auto buddy = Rep::buddy(addr, size);
auto path = trees[idx].get_root_path();
bool contains_buddy = trees[idx].find(path, buddy);
if (contains_buddy)
for (auto& e : entries[idx].cache)
{
// Only check if we can consolidate after we know the buddy is in
// the buddy allocator. This is required to prevent possible segfaults
// from looking at the buddies meta-data, which we only know exists
// once we have found it in the red-black tree.
if (Rep::can_consolidate(addr, size))
if (Rep::equal(Rep::null, e))
{
trees[idx].remove_path(path);
// Add to next level cache
size *= 2;
addr = Rep::align_down(addr, size);
if (size == bits::one_at_bit(MAX_SIZE_BITS))
{
// Invariant should be checked on all non-tail return paths.
// Holds trivially here with current design.
invariant();
// Too big for this buddy allocator.
return addr;
}
return add_block(addr, size);
e = addr;
return Rep::null;
}
// Re-traverse as the path was to the buddy,
// but the representation says we cannot combine.
// We must find the correct place for this element.
// Something clever could be done here, but it's not worth it.
// path = trees[idx].get_root_path();
trees[idx].find(path, addr);
}
trees[idx].insert_path(path, addr);
auto path = entries[idx].tree.get_root_path();
entries[idx].tree.find(path, addr);
entries[idx].tree.insert_path(path, addr);
invariant();
return Rep::null;
}
@ -121,7 +159,15 @@ namespace snmalloc
if (idx >= empty_at_or_above)
return Rep::null;
auto addr = trees[idx].remove_min();
auto addr = entries[idx].tree.remove_min();
for (auto& e : entries[idx].cache)
{
if (Rep::equal(Rep::null, addr) || Rep::compare(e, addr))
{
addr = std::exchange(e, addr);
}
}
if (addr != Rep::null)
{
validate_block(addr, size);

Просмотреть файл

@ -68,6 +68,10 @@ namespace snmalloc
auto page_end = pointer_align_up<OS_PAGE_SIZE, char>(last);
size_t using_size = pointer_diff(page_start, page_end);
PAL::template notify_using<NoZero>(page_start, using_size);
if constexpr (pal_supports<CoreDump, PAL>)
{
PAL::notify_do_dump(page_start, using_size);
}
}
constexpr FlatPagemap() = default;
@ -192,6 +196,12 @@ namespace snmalloc
auto new_body_untyped = PAL::reserve(request_size);
if constexpr (pal_supports<CoreDump, PAL>)
{
// Pagemap should not be in core dump except where it is non-zero.
PAL::notify_do_not_dump(new_body_untyped, request_size);
}
if (new_body_untyped == nullptr)
{
PAL::error("Failed to initialise snmalloc.");

Просмотреть файл

@ -55,6 +55,12 @@ namespace snmalloc
* This Pal provides a millisecond time source
*/
Time = (1 << 5),
/**
* This Pal provides selective core dumps, so
* modify which parts get dumped.
*/
CoreDump = (1 << 6),
};
/**

Просмотреть файл

@ -43,7 +43,8 @@ namespace snmalloc
* field is declared explicitly to remind anyone modifying this class to
* add new features that they should add any required feature flags.
*/
static constexpr uint64_t pal_features = PALBSD_Aligned::pal_features;
static constexpr uint64_t pal_features =
PALBSD_Aligned::pal_features | CoreDump;
/**
* FreeBSD uses atypically small address spaces on its 64 bit RISC machines.
@ -68,10 +69,8 @@ namespace snmalloc
/**
* Notify platform that we will not be using these pages.
*
* We use the `MADV_FREE` and `NADV_NOCORE` flags to `madvise`. The first
* allows the system to discard the page and replace it with a CoW mapping
* of the zero page. The second prevents this mapping from appearing in
* core files.
* We use the `MADV_FREE` flag to `madvise`. This allows the system to
* discard the page and replace it with a CoW mapping of the zero page.
*/
static void notify_not_using(void* p, size_t size) noexcept
{
@ -80,7 +79,6 @@ namespace snmalloc
if constexpr (DEBUG)
memset(p, 0x5a, size);
madvise(p, size, MADV_NOCORE);
madvise(p, size, MADV_FREE);
if constexpr (mitigations(pal_enforce_access))
@ -90,28 +88,19 @@ namespace snmalloc
}
/**
* Notify platform that we will be using these pages for reading.
*
* This is used only for pages full of zeroes and so we exclude them from
* core dumps.
* Notify platform that these pages should be included in a core dump.
*/
static void notify_using_readonly(void* p, size_t size) noexcept
static void notify_do_dump(void* p, size_t size) noexcept
{
PALBSD_Aligned<PALFreeBSD>::notify_using_readonly(p, size);
madvise(p, size, MADV_NOCORE);
madvise(p, size, MADV_CORE);
}
/**
* Notify platform that we will be using these pages.
*
* We may have previously marked this memory as not being included in core
* files, so mark it for inclusion again.
* Notify platform that these pages should not be included in a core dump.
*/
template<ZeroMem zero_mem>
static void notify_using(void* p, size_t size) noexcept
static void notify_do_not_dump(void* p, size_t size) noexcept
{
PALBSD_Aligned<PALFreeBSD>::notify_using<zero_mem>(p, size);
madvise(p, size, MADV_CORE);
madvise(p, size, MADV_NOCORE);
}
# if defined(__CHERI_PURE_CAPABILITY__)

Просмотреть файл

@ -27,7 +27,8 @@ namespace snmalloc
*
* We always make sure that linux has entropy support.
*/
static constexpr uint64_t pal_features = PALPOSIX::pal_features | Entropy;
static constexpr uint64_t pal_features =
PALPOSIX::pal_features | Entropy | CoreDump;
static constexpr size_t page_size =
Aal::aal_name == PowerPC ? 0x10000 : PALPOSIX::page_size;
@ -58,7 +59,6 @@ namespace snmalloc
void* p = PALPOSIX<PALLinux>::reserve(size);
if (p)
{
madvise(p, size, MADV_DONTDUMP);
# ifdef SNMALLOC_PAGEID
# ifndef PR_SET_VMA
# define PR_SET_VMA 0x53564d41
@ -125,7 +125,6 @@ namespace snmalloc
if constexpr (DEBUG)
memset(p, 0x5a, size);
madvise(p, size, MADV_DONTDUMP);
madvise(p, size, madvise_free_flags);
if constexpr (mitigations(pal_enforce_access))
@ -135,25 +134,19 @@ namespace snmalloc
}
/**
* Notify platform that we will be using these pages for reading.
*
* This is used only for pages full of zeroes and so we exclude them from
* core dumps.
* Notify platform that these pages should be included in a core dump.
*/
static void notify_using_readonly(void* p, size_t size) noexcept
static void notify_do_dump(void* p, size_t size) noexcept
{
PALPOSIX<PALLinux>::notify_using_readonly(p, size);
madvise(p, size, MADV_DONTDUMP);
madvise(p, size, MADV_DODUMP);
}
/**
* Notify platform that we will be using these pages.
* Notify platform that these pages should not be included in a core dump.
*/
template<ZeroMem zero_mem>
static void notify_using(void* p, size_t size) noexcept
static void notify_do_not_dump(void* p, size_t size) noexcept
{
PALPOSIX<PALLinux>::notify_using<zero_mem>(p, size);
madvise(p, size, MADV_DODUMP);
madvise(p, size, MADV_DONTDUMP);
}
static uint64_t get_entropy64()