зеркало из https://github.com/microsoft/snmalloc.git
Startup speed (#665)
* Refactor buddy allocator Make it clearer the structure of add_block by pulling out remove_buddy. * Give buddy a few elements so don't have to touch pagemap earlie on. * Only use do and dont dump on pagemap The do and dont dump calls were costings a lot during start up of snmalloc. This reduces the times they are called to only be for the pagemap.
This commit is contained in:
Родитель
835ab51863
Коммит
4620220080
|
@ -15,7 +15,13 @@ namespace snmalloc
|
|||
template<typename Rep, size_t MIN_SIZE_BITS, size_t MAX_SIZE_BITS>
|
||||
class Buddy
|
||||
{
|
||||
std::array<RBTree<Rep>, MAX_SIZE_BITS - MIN_SIZE_BITS> trees{};
|
||||
struct Entry
|
||||
{
|
||||
typename Rep::Contents cache[3];
|
||||
RBTree<Rep> tree{};
|
||||
};
|
||||
|
||||
std::array<Entry, MAX_SIZE_BITS - MIN_SIZE_BITS> entries{};
|
||||
// All RBtrees at or above this index should be empty.
|
||||
size_t empty_at_or_above{0};
|
||||
|
||||
|
@ -42,13 +48,54 @@ namespace snmalloc
|
|||
void invariant()
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
for (size_t i = empty_at_or_above; i < trees.size(); i++)
|
||||
for (size_t i = empty_at_or_above; i < entries.size(); i++)
|
||||
{
|
||||
SNMALLOC_ASSERT(trees[i].is_empty());
|
||||
SNMALLOC_ASSERT(entries[i].tree.is_empty());
|
||||
// TODO check cache is empty
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool remove_buddy(typename Rep::Contents addr, size_t size)
|
||||
{
|
||||
auto idx = to_index(size);
|
||||
|
||||
// Empty at this range.
|
||||
if (idx >= empty_at_or_above)
|
||||
return false;
|
||||
|
||||
auto buddy = Rep::buddy(addr, size);
|
||||
|
||||
// Check local cache first
|
||||
for (auto& e : entries[idx].cache)
|
||||
{
|
||||
if (Rep::equal(buddy, e))
|
||||
{
|
||||
if (!Rep::can_consolidate(addr, size))
|
||||
return false;
|
||||
|
||||
e = entries[idx].tree.remove_min();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
auto path = entries[idx].tree.get_root_path();
|
||||
bool contains_buddy = entries[idx].tree.find(path, buddy);
|
||||
|
||||
if (!contains_buddy)
|
||||
return false;
|
||||
|
||||
// Only check if we can consolidate after we know the buddy is in
|
||||
// the buddy allocator. This is required to prevent possible segfaults
|
||||
// from looking at the buddies meta-data, which we only know exists
|
||||
// once we have found it in the red-black tree.
|
||||
if (!Rep::can_consolidate(addr, size))
|
||||
return false;
|
||||
|
||||
entries[idx].tree.remove_path(path);
|
||||
return true;
|
||||
}
|
||||
|
||||
public:
|
||||
constexpr Buddy() = default;
|
||||
/**
|
||||
|
@ -63,48 +110,39 @@ namespace snmalloc
|
|||
*/
|
||||
typename Rep::Contents add_block(typename Rep::Contents addr, size_t size)
|
||||
{
|
||||
validate_block(addr, size);
|
||||
|
||||
if (remove_buddy(addr, size))
|
||||
{
|
||||
// Add to next level cache
|
||||
size *= 2;
|
||||
addr = Rep::align_down(addr, size);
|
||||
if (size == bits::one_at_bit(MAX_SIZE_BITS))
|
||||
{
|
||||
// Invariant should be checked on all non-tail return paths.
|
||||
// Holds trivially here with current design.
|
||||
invariant();
|
||||
// Too big for this buddy allocator.
|
||||
return addr;
|
||||
}
|
||||
return add_block(addr, size);
|
||||
}
|
||||
|
||||
auto idx = to_index(size);
|
||||
empty_at_or_above = bits::max(empty_at_or_above, idx + 1);
|
||||
|
||||
validate_block(addr, size);
|
||||
|
||||
auto buddy = Rep::buddy(addr, size);
|
||||
|
||||
auto path = trees[idx].get_root_path();
|
||||
bool contains_buddy = trees[idx].find(path, buddy);
|
||||
|
||||
if (contains_buddy)
|
||||
for (auto& e : entries[idx].cache)
|
||||
{
|
||||
// Only check if we can consolidate after we know the buddy is in
|
||||
// the buddy allocator. This is required to prevent possible segfaults
|
||||
// from looking at the buddies meta-data, which we only know exists
|
||||
// once we have found it in the red-black tree.
|
||||
if (Rep::can_consolidate(addr, size))
|
||||
if (Rep::equal(Rep::null, e))
|
||||
{
|
||||
trees[idx].remove_path(path);
|
||||
|
||||
// Add to next level cache
|
||||
size *= 2;
|
||||
addr = Rep::align_down(addr, size);
|
||||
if (size == bits::one_at_bit(MAX_SIZE_BITS))
|
||||
{
|
||||
// Invariant should be checked on all non-tail return paths.
|
||||
// Holds trivially here with current design.
|
||||
invariant();
|
||||
// Too big for this buddy allocator.
|
||||
return addr;
|
||||
}
|
||||
return add_block(addr, size);
|
||||
e = addr;
|
||||
return Rep::null;
|
||||
}
|
||||
|
||||
// Re-traverse as the path was to the buddy,
|
||||
// but the representation says we cannot combine.
|
||||
// We must find the correct place for this element.
|
||||
// Something clever could be done here, but it's not worth it.
|
||||
// path = trees[idx].get_root_path();
|
||||
trees[idx].find(path, addr);
|
||||
}
|
||||
trees[idx].insert_path(path, addr);
|
||||
|
||||
auto path = entries[idx].tree.get_root_path();
|
||||
entries[idx].tree.find(path, addr);
|
||||
entries[idx].tree.insert_path(path, addr);
|
||||
invariant();
|
||||
return Rep::null;
|
||||
}
|
||||
|
@ -121,7 +159,15 @@ namespace snmalloc
|
|||
if (idx >= empty_at_or_above)
|
||||
return Rep::null;
|
||||
|
||||
auto addr = trees[idx].remove_min();
|
||||
auto addr = entries[idx].tree.remove_min();
|
||||
for (auto& e : entries[idx].cache)
|
||||
{
|
||||
if (Rep::equal(Rep::null, addr) || Rep::compare(e, addr))
|
||||
{
|
||||
addr = std::exchange(e, addr);
|
||||
}
|
||||
}
|
||||
|
||||
if (addr != Rep::null)
|
||||
{
|
||||
validate_block(addr, size);
|
||||
|
|
|
@ -68,6 +68,10 @@ namespace snmalloc
|
|||
auto page_end = pointer_align_up<OS_PAGE_SIZE, char>(last);
|
||||
size_t using_size = pointer_diff(page_start, page_end);
|
||||
PAL::template notify_using<NoZero>(page_start, using_size);
|
||||
if constexpr (pal_supports<CoreDump, PAL>)
|
||||
{
|
||||
PAL::notify_do_dump(page_start, using_size);
|
||||
}
|
||||
}
|
||||
|
||||
constexpr FlatPagemap() = default;
|
||||
|
@ -192,6 +196,12 @@ namespace snmalloc
|
|||
|
||||
auto new_body_untyped = PAL::reserve(request_size);
|
||||
|
||||
if constexpr (pal_supports<CoreDump, PAL>)
|
||||
{
|
||||
// Pagemap should not be in core dump except where it is non-zero.
|
||||
PAL::notify_do_not_dump(new_body_untyped, request_size);
|
||||
}
|
||||
|
||||
if (new_body_untyped == nullptr)
|
||||
{
|
||||
PAL::error("Failed to initialise snmalloc.");
|
||||
|
|
|
@ -55,6 +55,12 @@ namespace snmalloc
|
|||
* This Pal provides a millisecond time source
|
||||
*/
|
||||
Time = (1 << 5),
|
||||
|
||||
/**
|
||||
* This Pal provides selective core dumps, so
|
||||
* modify which parts get dumped.
|
||||
*/
|
||||
CoreDump = (1 << 6),
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -43,7 +43,8 @@ namespace snmalloc
|
|||
* field is declared explicitly to remind anyone modifying this class to
|
||||
* add new features that they should add any required feature flags.
|
||||
*/
|
||||
static constexpr uint64_t pal_features = PALBSD_Aligned::pal_features;
|
||||
static constexpr uint64_t pal_features =
|
||||
PALBSD_Aligned::pal_features | CoreDump;
|
||||
|
||||
/**
|
||||
* FreeBSD uses atypically small address spaces on its 64 bit RISC machines.
|
||||
|
@ -68,10 +69,8 @@ namespace snmalloc
|
|||
/**
|
||||
* Notify platform that we will not be using these pages.
|
||||
*
|
||||
* We use the `MADV_FREE` and `NADV_NOCORE` flags to `madvise`. The first
|
||||
* allows the system to discard the page and replace it with a CoW mapping
|
||||
* of the zero page. The second prevents this mapping from appearing in
|
||||
* core files.
|
||||
* We use the `MADV_FREE` flag to `madvise`. This allows the system to
|
||||
* discard the page and replace it with a CoW mapping of the zero page.
|
||||
*/
|
||||
static void notify_not_using(void* p, size_t size) noexcept
|
||||
{
|
||||
|
@ -80,7 +79,6 @@ namespace snmalloc
|
|||
if constexpr (DEBUG)
|
||||
memset(p, 0x5a, size);
|
||||
|
||||
madvise(p, size, MADV_NOCORE);
|
||||
madvise(p, size, MADV_FREE);
|
||||
|
||||
if constexpr (mitigations(pal_enforce_access))
|
||||
|
@ -90,28 +88,19 @@ namespace snmalloc
|
|||
}
|
||||
|
||||
/**
|
||||
* Notify platform that we will be using these pages for reading.
|
||||
*
|
||||
* This is used only for pages full of zeroes and so we exclude them from
|
||||
* core dumps.
|
||||
* Notify platform that these pages should be included in a core dump.
|
||||
*/
|
||||
static void notify_using_readonly(void* p, size_t size) noexcept
|
||||
static void notify_do_dump(void* p, size_t size) noexcept
|
||||
{
|
||||
PALBSD_Aligned<PALFreeBSD>::notify_using_readonly(p, size);
|
||||
madvise(p, size, MADV_NOCORE);
|
||||
madvise(p, size, MADV_CORE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify platform that we will be using these pages.
|
||||
*
|
||||
* We may have previously marked this memory as not being included in core
|
||||
* files, so mark it for inclusion again.
|
||||
* Notify platform that these pages should not be included in a core dump.
|
||||
*/
|
||||
template<ZeroMem zero_mem>
|
||||
static void notify_using(void* p, size_t size) noexcept
|
||||
static void notify_do_not_dump(void* p, size_t size) noexcept
|
||||
{
|
||||
PALBSD_Aligned<PALFreeBSD>::notify_using<zero_mem>(p, size);
|
||||
madvise(p, size, MADV_CORE);
|
||||
madvise(p, size, MADV_NOCORE);
|
||||
}
|
||||
|
||||
# if defined(__CHERI_PURE_CAPABILITY__)
|
||||
|
|
|
@ -27,7 +27,8 @@ namespace snmalloc
|
|||
*
|
||||
* We always make sure that linux has entropy support.
|
||||
*/
|
||||
static constexpr uint64_t pal_features = PALPOSIX::pal_features | Entropy;
|
||||
static constexpr uint64_t pal_features =
|
||||
PALPOSIX::pal_features | Entropy | CoreDump;
|
||||
|
||||
static constexpr size_t page_size =
|
||||
Aal::aal_name == PowerPC ? 0x10000 : PALPOSIX::page_size;
|
||||
|
@ -58,7 +59,6 @@ namespace snmalloc
|
|||
void* p = PALPOSIX<PALLinux>::reserve(size);
|
||||
if (p)
|
||||
{
|
||||
madvise(p, size, MADV_DONTDUMP);
|
||||
# ifdef SNMALLOC_PAGEID
|
||||
# ifndef PR_SET_VMA
|
||||
# define PR_SET_VMA 0x53564d41
|
||||
|
@ -125,7 +125,6 @@ namespace snmalloc
|
|||
if constexpr (DEBUG)
|
||||
memset(p, 0x5a, size);
|
||||
|
||||
madvise(p, size, MADV_DONTDUMP);
|
||||
madvise(p, size, madvise_free_flags);
|
||||
|
||||
if constexpr (mitigations(pal_enforce_access))
|
||||
|
@ -135,25 +134,19 @@ namespace snmalloc
|
|||
}
|
||||
|
||||
/**
|
||||
* Notify platform that we will be using these pages for reading.
|
||||
*
|
||||
* This is used only for pages full of zeroes and so we exclude them from
|
||||
* core dumps.
|
||||
* Notify platform that these pages should be included in a core dump.
|
||||
*/
|
||||
static void notify_using_readonly(void* p, size_t size) noexcept
|
||||
static void notify_do_dump(void* p, size_t size) noexcept
|
||||
{
|
||||
PALPOSIX<PALLinux>::notify_using_readonly(p, size);
|
||||
madvise(p, size, MADV_DONTDUMP);
|
||||
madvise(p, size, MADV_DODUMP);
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify platform that we will be using these pages.
|
||||
* Notify platform that these pages should not be included in a core dump.
|
||||
*/
|
||||
template<ZeroMem zero_mem>
|
||||
static void notify_using(void* p, size_t size) noexcept
|
||||
static void notify_do_not_dump(void* p, size_t size) noexcept
|
||||
{
|
||||
PALPOSIX<PALLinux>::notify_using<zero_mem>(p, size);
|
||||
madvise(p, size, MADV_DODUMP);
|
||||
madvise(p, size, MADV_DONTDUMP);
|
||||
}
|
||||
|
||||
static uint64_t get_entropy64()
|
||||
|
|
Загрузка…
Ссылка в новой задаче