зеркало из https://github.com/microsoft/CCF.git
snmalloc is unused at the moment, it will be picked up again through (#563)
openenclave once it has been added there
This commit is contained in:
Родитель
4aa499e7d4
Коммит
e66f5ce7b3
|
@ -1,178 +0,0 @@
|
|||
cmake_minimum_required(VERSION 3.8)
|
||||
project(snmalloc C CXX)
|
||||
|
||||
option(USE_SNMALLOC_STATS "Track allocation stats" OFF)
|
||||
option(USE_MEASURE "Measure performance with histograms" OFF)
|
||||
option(EXPOSE_EXTERNAL_PAGEMAP "Expose the global pagemap" OFF)
|
||||
option(EXPOSE_EXTERNAL_RESERVE "Expose an interface to reserve memory using the default memory provider" OFF)
|
||||
|
||||
set(CACHE_FRIENDLY_OFFSET OFF CACHE STRING "Base offset to place linked-list nodes.")
|
||||
|
||||
# Provide as macro so other projects can reuse
|
||||
macro(warnings_high)
|
||||
if(MSVC)
|
||||
# Force to always compile with W4
|
||||
if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
|
||||
string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
|
||||
endif()
|
||||
add_compile_options(/WX /wd4127 /wd4324 /wd4201)
|
||||
else()
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
add_compile_options(-Wsign-conversion)
|
||||
endif ()
|
||||
add_compile_options(-Wall -Wextra -Werror)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
# The main target for snmalloc
|
||||
add_library(snmalloc_lib INTERFACE)
|
||||
target_include_directories(snmalloc_lib INTERFACE src/)
|
||||
if(NOT MSVC)
|
||||
find_package(Threads REQUIRED COMPONENTS snmalloc_lib)
|
||||
target_link_libraries(snmalloc_lib INTERFACE ${CMAKE_THREAD_LIBS_INIT})
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||
target_link_libraries(snmalloc_lib INTERFACE atomic)
|
||||
endif()
|
||||
target_compile_options(snmalloc_lib INTERFACE -mcx16)
|
||||
else()
|
||||
set(WIN8COMPAT FALSE CACHE BOOL "Avoid Windows 10 APIs")
|
||||
if (WIN8COMPAT)
|
||||
target_compile_definitions(snmalloc_lib INTERFACE -DWINVER=0x0603)
|
||||
message(STATUS "snmalloc: Avoiding Windows 10 APIs")
|
||||
else()
|
||||
message(STATUS "snmalloc: Using Windows 10 APIs")
|
||||
# VirtualAlloc2 is exposed by mincore.lib, not Kernel32.lib (as the
|
||||
# documentation says)
|
||||
target_link_libraries(snmalloc_lib INTERFACE mincore)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Have to set this globally, as can't be set on an interface target.
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
if(USE_SNMALLOC_STATS)
|
||||
target_compile_definitions(snmalloc_lib INTERFACE -DUSE_SNMALLOC_STATS)
|
||||
endif()
|
||||
|
||||
if(USE_MEASURE)
|
||||
target_compile_definitions(snmalloc_lib INTERFACE -DUSE_MEASURE)
|
||||
endif()
|
||||
|
||||
# To build with just the header library target define SNMALLOC_ONLY_HEADER_LIBRARY
|
||||
# in containing Cmake file.
|
||||
if(NOT DEFINED SNMALLOC_ONLY_HEADER_LIBRARY)
|
||||
|
||||
warnings_high()
|
||||
|
||||
if(MSVC)
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi")
|
||||
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG")
|
||||
else()
|
||||
add_compile_options(-march=native -fno-exceptions -fno-rtti -g)
|
||||
endif()
|
||||
|
||||
macro(subdirlist result curdir)
|
||||
file(GLOB children LIST_DIRECTORIES true RELATIVE ${curdir} ${curdir}/*)
|
||||
set(dirlist "")
|
||||
foreach(child ${children})
|
||||
if(IS_DIRECTORY ${curdir}/${child})
|
||||
list(APPEND dirlist ${child})
|
||||
endif()
|
||||
endforeach()
|
||||
set(${result} ${dirlist})
|
||||
endmacro()
|
||||
|
||||
macro(add_shim name)
|
||||
add_library(${name} SHARED src/override/malloc.cc)
|
||||
target_link_libraries(${name} snmalloc_lib)
|
||||
target_compile_definitions(${name} PRIVATE "SNMALLOC_EXPORT=__attribute__((visibility(\"default\")))")
|
||||
set_target_properties(${name} PROPERTIES CXX_VISIBILITY_PRESET hidden)
|
||||
|
||||
if(CACHE_FRIENDLY_OFFSET)
|
||||
target_compile_definitions(${name} PRIVATE -DCACHE_FRIENDLY_OFFSET=${CACHE_FRIENDLY_OFFSET})
|
||||
endif()
|
||||
|
||||
if(EXPOSE_EXTERNAL_PAGEMAP)
|
||||
target_compile_definitions(${name} PRIVATE -DSNMALLOC_EXPOSE_PAGEMAP)
|
||||
endif()
|
||||
|
||||
if(EXPOSE_EXTERNAL_RESERVE)
|
||||
target_compile_definitions(${name} PRIVATE -DSNMALLOC_EXPOSE_RESERVE)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
if(NOT MSVC)
|
||||
add_shim(snmallocshim)
|
||||
|
||||
add_shim(snmallocshim-1mib)
|
||||
target_compile_definitions(snmallocshim-1mib PRIVATE IS_ADDRESS_SPACE_CONSTRAINED)
|
||||
endif()
|
||||
|
||||
enable_testing()
|
||||
|
||||
set(TESTDIR ${CMAKE_CURRENT_SOURCE_DIR}/src/test)
|
||||
subdirlist(TEST_CATEGORIES ${TESTDIR})
|
||||
list(REVERSE TEST_CATEGORIES)
|
||||
foreach(TEST_CATEGORY ${TEST_CATEGORIES})
|
||||
subdirlist(TESTS ${TESTDIR}/${TEST_CATEGORY})
|
||||
foreach(TEST ${TESTS})
|
||||
foreach(SUPER_SLAB_SIZE 1;16)
|
||||
unset(SRC)
|
||||
aux_source_directory(${TESTDIR}/${TEST_CATEGORY}/${TEST} SRC)
|
||||
set(TESTNAME "${TEST_CATEGORY}-${TEST}-${SUPER_SLAB_SIZE}")
|
||||
|
||||
add_executable(${TESTNAME} ${SRC} src/override/new.cc)
|
||||
if (${SUPER_SLAB_SIZE} EQUAL 1)
|
||||
target_compile_definitions(${TESTNAME} PRIVATE IS_ADDRESS_SPACE_CONSTRAINED)
|
||||
endif()
|
||||
target_include_directories(${TESTNAME} PRIVATE src)
|
||||
target_link_libraries(${TESTNAME} snmalloc_lib)
|
||||
if (${TEST} MATCHES "release-.*")
|
||||
message(STATUS "Adding test: ${TESTNAME} only for release configs")
|
||||
add_test(NAME ${TESTNAME} COMMAND ${TESTNAME} CONFIGURATIONS "Release")
|
||||
else()
|
||||
message(STATUS "Adding test: ${TESTNAME}")
|
||||
add_test(${TESTNAME} ${TESTNAME})
|
||||
endif()
|
||||
if (${TEST_CATEGORY} MATCHES "perf")
|
||||
set_tests_properties(${TESTNAME} PROPERTIES PROCESSORS 3)
|
||||
endif()
|
||||
endforeach()
|
||||
endforeach()
|
||||
endforeach()
|
||||
|
||||
# The clang-format tool is installed under a variety of different names. Try
|
||||
# to find a sensible one. Only look for 6.0 and 7.0 versions explicitly - we
|
||||
# don't know whether our clang-format file will work with newer versions of the
|
||||
# tool
|
||||
set(CLANG_FORMAT_NAMES
|
||||
clang-format-7.0
|
||||
clang-format-6.0
|
||||
clang-format70
|
||||
clang-format60
|
||||
clang-format)
|
||||
|
||||
# Loop over each of the possible names of clang-format and try to find one.
|
||||
set(CLANG_FORMAT CLANG_FORMAT-NOTFOUND)
|
||||
foreach (NAME IN ITEMS ${CLANG_FORMAT_NAMES})
|
||||
if (${CLANG_FORMAT} STREQUAL "CLANG_FORMAT-NOTFOUND")
|
||||
find_program(CLANG_FORMAT ${NAME})
|
||||
endif ()
|
||||
endforeach()
|
||||
|
||||
# If we've found a clang-format tool, generate a target for it, otherwise emit
|
||||
# a warning.
|
||||
if (${CLANG_FORMAT} STREQUAL "CLANG_FORMAT-NOTFOUND")
|
||||
message(WARNING "Not generating clangformat target, no clang-format tool found")
|
||||
else ()
|
||||
message(STATUS "Generating clangformat target using ${CLANG_FORMAT}")
|
||||
file(GLOB_RECURSE ALL_SOURCE_FILES *.cc *.h *.hh)
|
||||
add_custom_target(
|
||||
clangformat
|
||||
COMMAND ${CLANG_FORMAT}
|
||||
-i
|
||||
${ALL_SOURCE_FILES})
|
||||
endif()
|
||||
endif()
|
|
@ -1,21 +0,0 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE
|
|
@ -1,113 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "bits.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
template<typename T, Construction c = RequiresInit>
|
||||
class ABA
|
||||
{
|
||||
public:
|
||||
#ifdef PLATFORM_IS_X86
|
||||
struct alignas(2 * sizeof(std::size_t)) Linked
|
||||
{
|
||||
T* ptr;
|
||||
uintptr_t aba;
|
||||
};
|
||||
|
||||
struct Independent
|
||||
{
|
||||
std::atomic<T*> ptr;
|
||||
std::atomic<uintptr_t> aba;
|
||||
};
|
||||
|
||||
static_assert(
|
||||
sizeof(Linked) == sizeof(Independent),
|
||||
"Expecting identical struct sizes in union");
|
||||
static_assert(
|
||||
sizeof(Linked) == (2 * sizeof(std::size_t)),
|
||||
"Expecting ABA to be the size of two pointers");
|
||||
|
||||
using Cmp = Linked;
|
||||
#else
|
||||
using Cmp = T*;
|
||||
#endif
|
||||
|
||||
private:
|
||||
#ifdef PLATFORM_IS_X86
|
||||
union
|
||||
{
|
||||
alignas(2 * sizeof(std::size_t)) std::atomic<Linked> linked;
|
||||
Independent independent;
|
||||
};
|
||||
#else
|
||||
std::atomic<T*> ptr;
|
||||
#endif
|
||||
|
||||
public:
|
||||
ABA()
|
||||
{
|
||||
if constexpr (c == RequiresInit)
|
||||
init(nullptr);
|
||||
}
|
||||
|
||||
void init(T* x)
|
||||
{
|
||||
#ifdef PLATFORM_IS_X86
|
||||
independent.ptr.store(x, std::memory_order_relaxed);
|
||||
independent.aba.store(0, std::memory_order_relaxed);
|
||||
#else
|
||||
ptr.store(x, std::memory_order_relaxed);
|
||||
#endif
|
||||
}
|
||||
|
||||
T* peek()
|
||||
{
|
||||
return independent.ptr.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
Cmp read()
|
||||
{
|
||||
return
|
||||
#ifdef PLATFORM_IS_X86
|
||||
Cmp{independent.ptr.load(std::memory_order_relaxed),
|
||||
independent.aba.load(std::memory_order_relaxed)};
|
||||
#else
|
||||
ptr.load(std::memory_order_relaxed);
|
||||
#endif
|
||||
}
|
||||
|
||||
static T* load(Cmp& from)
|
||||
{
|
||||
#ifdef PLATFORM_IS_X86
|
||||
return from.ptr;
|
||||
#else
|
||||
return from;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool compare_exchange(Cmp& expect, T* value)
|
||||
{
|
||||
#ifdef PLATFORM_IS_X86
|
||||
# if defined(_MSC_VER) && defined(PLATFORM_BITS_64)
|
||||
return _InterlockedCompareExchange128(
|
||||
(volatile __int64*)&linked,
|
||||
expect.aba + 1,
|
||||
(__int64)value,
|
||||
(__int64*)&expect);
|
||||
# else
|
||||
# if defined(__GNUC__) && !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16)
|
||||
#error You must compile with -mcx16 to enable 16-byte atomic compare and swap.
|
||||
# endif
|
||||
Cmp xchg{value, expect.aba + 1};
|
||||
|
||||
return linked.compare_exchange_weak(
|
||||
expect, xchg, std::memory_order_relaxed, std::memory_order_relaxed);
|
||||
# endif
|
||||
#else
|
||||
return ptr.compare_exchange_weak(
|
||||
expect, value, std::memory_order_relaxed, std::memory_order_relaxed);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,42 +0,0 @@
|
|||
#pragma once
|
||||
#include <cstdint>
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
/**
|
||||
* The type used for an address. Currently, all addresses are assumed to be
|
||||
* provenance-carrying values and so it is possible to cast back from the
|
||||
* result of arithmetic on an address_t. Eventually, this will want to be
|
||||
* separated into two types, one for raw addresses and one for addresses that
|
||||
* can be cast back to pointers.
|
||||
*/
|
||||
using address_t = uintptr_t;
|
||||
|
||||
/**
|
||||
* Perform pointer arithmetic and return the adjusted pointer.
|
||||
*/
|
||||
template<typename T>
|
||||
inline T* pointer_offset(T* base, size_t diff)
|
||||
{
|
||||
return reinterpret_cast<T*>(reinterpret_cast<char*>(base) + diff);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cast from a pointer type to an address.
|
||||
*/
|
||||
template<typename T>
|
||||
inline address_t address_cast(T* ptr)
|
||||
{
|
||||
return reinterpret_cast<address_t>(ptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cast from an address back to a pointer of the specified type. All uses of
|
||||
* this will eventually need auditing for CHERI compatibility.
|
||||
*/
|
||||
template<typename T>
|
||||
inline T* pointer_cast(address_t address)
|
||||
{
|
||||
return reinterpret_cast<T*>(address);
|
||||
}
|
||||
} // namespace snmalloc
|
|
@ -1,477 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <limits>
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# include <immintrin.h>
|
||||
# include <intrin.h>
|
||||
# define ALWAYSINLINE __forceinline
|
||||
# define NOINLINE __declspec(noinline)
|
||||
# define HEADER_GLOBAL __declspec(selectany)
|
||||
# define likely(x) !!(x)
|
||||
# define unlikely(x) !!(x)
|
||||
#else
|
||||
# define likely(x) __builtin_expect(!!(x), 1)
|
||||
# define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
# include <cpuid.h>
|
||||
# include <emmintrin.h>
|
||||
# define ALWAYSINLINE __attribute__((always_inline))
|
||||
# define NOINLINE __attribute__((noinline))
|
||||
# ifdef __clang__
|
||||
# define HEADER_GLOBAL __attribute__((selectany))
|
||||
# else
|
||||
// GCC does not support selectany, weak is almost the correct
|
||||
// attribute, but leaves the global variable preemptible.
|
||||
# define HEADER_GLOBAL __attribute__((weak))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || \
|
||||
defined(__amd64__) || defined(__x86_64__) || defined(_M_X64) || \
|
||||
defined(_M_AMD64)
|
||||
# define PLATFORM_IS_X86
|
||||
# if defined(__linux__) && !defined(OPEN_ENCLAVE)
|
||||
# include <x86intrin.h>
|
||||
# endif
|
||||
# if defined(__amd64__) || defined(__x86_64__) || defined(_M_X64) || \
|
||||
defined(_M_AMD64)
|
||||
# define PLATFORM_BITS_64
|
||||
# else
|
||||
# define PLATFORM_BITS_32
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && defined(PLATFORM_BITS_32)
|
||||
# include <intsafe.h>
|
||||
#endif
|
||||
|
||||
#ifndef __has_builtin
|
||||
# define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
#define UNUSED(x) ((void)(x))
|
||||
|
||||
#if __has_builtin(__builtin_assume)
|
||||
# define SNMALLOC_ASSUME(x) __builtin_assume(x)
|
||||
#else
|
||||
# define SNMALLOC_ASSUME(x) \
|
||||
do \
|
||||
{ \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
// #define USE_LZCNT
|
||||
|
||||
#include "address.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
|
||||
#ifdef pause
|
||||
# undef pause
|
||||
#endif
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
// Used to enable trivial constructors for
|
||||
// class that zero init is sufficient.
|
||||
// Supplying PreZeroed means the memory is pre-zeroed i.e. a global section
|
||||
// RequiresInit is if the class needs to zero its fields.
|
||||
enum Construction
|
||||
{
|
||||
PreZeroed,
|
||||
RequiresInit
|
||||
};
|
||||
|
||||
namespace bits
|
||||
{
|
||||
static constexpr size_t BITS = sizeof(size_t) * 8;
|
||||
|
||||
static constexpr bool is64()
|
||||
{
|
||||
return BITS == 64;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a value of type T that has a single bit set,
|
||||
*
|
||||
* S is a template parameter because callers use either `int` or `size_t`
|
||||
* and either is valid to represent a number in the range 0-63 (or 0-127 if
|
||||
* we want to use `__uint128_t` as `T`).
|
||||
*/
|
||||
template<typename T = size_t, typename S>
|
||||
constexpr T one_at_bit(S shift)
|
||||
{
|
||||
static_assert(std::is_integral_v<T>, "Type must be integral");
|
||||
return (static_cast<T>(1)) << shift;
|
||||
}
|
||||
|
||||
static constexpr size_t ADDRESS_BITS = is64() ? 48 : 32;
|
||||
|
||||
inline void pause()
|
||||
{
|
||||
#if defined(PLATFORM_IS_X86)
|
||||
_mm_pause();
|
||||
#else
|
||||
# warning "Missing pause intrinsic"
|
||||
#endif
|
||||
}
|
||||
|
||||
inline uint64_t tick()
|
||||
{
|
||||
#if defined(PLATFORM_IS_X86)
|
||||
# if defined(_MSC_VER)
|
||||
return __rdtsc();
|
||||
# elif defined(__clang__)
|
||||
return __builtin_readcyclecounter();
|
||||
# else
|
||||
return __builtin_ia32_rdtsc();
|
||||
# endif
|
||||
#else
|
||||
# error Define CPU tick for this platform
|
||||
#endif
|
||||
}
|
||||
|
||||
inline uint64_t tickp()
|
||||
{
|
||||
#if defined(PLATFORM_IS_X86)
|
||||
# if defined(_MSC_VER)
|
||||
unsigned int aux;
|
||||
return __rdtscp(&aux);
|
||||
# else
|
||||
unsigned aux;
|
||||
return __builtin_ia32_rdtscp(&aux);
|
||||
# endif
|
||||
#else
|
||||
# error Define CPU tick for this platform
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void halt_out_of_order()
|
||||
{
|
||||
#if defined(PLATFORM_IS_X86)
|
||||
# if defined(_MSC_VER)
|
||||
int cpu_info[4];
|
||||
__cpuid(cpu_info, 0);
|
||||
# else
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
__get_cpuid(0, &eax, &ebx, &ecx, &edx);
|
||||
# endif
|
||||
#else
|
||||
# error Define CPU benchmark start time for this platform
|
||||
#endif
|
||||
}
|
||||
|
||||
inline uint64_t benchmark_time_start()
|
||||
{
|
||||
halt_out_of_order();
|
||||
return tick();
|
||||
}
|
||||
|
||||
inline uint64_t benchmark_time_end()
|
||||
{
|
||||
uint64_t t = tickp();
|
||||
halt_out_of_order();
|
||||
return t;
|
||||
}
|
||||
|
||||
inline size_t clz(size_t x)
|
||||
{
|
||||
#if defined(_MSC_VER)
|
||||
# ifdef USE_LZCNT
|
||||
# ifdef PLATFORM_BITS_64
|
||||
return __lzcnt64(x);
|
||||
# else
|
||||
return __lzcnt((uint32_t)x);
|
||||
# endif
|
||||
# else
|
||||
unsigned long index;
|
||||
|
||||
# ifdef PLATFORM_BITS_64
|
||||
_BitScanReverse64(&index, x);
|
||||
# else
|
||||
_BitScanReverse(&index, (unsigned long)x);
|
||||
# endif
|
||||
|
||||
return BITS - index - 1;
|
||||
# endif
|
||||
#else
|
||||
return static_cast<size_t>(__builtin_clzl(x));
|
||||
#endif
|
||||
}
|
||||
|
||||
inline constexpr size_t rotr_const(size_t x, size_t n)
|
||||
{
|
||||
size_t nn = n & (BITS - 1);
|
||||
return (x >> nn) |
|
||||
(x << ((static_cast<size_t>(-static_cast<int>(nn))) & (BITS - 1)));
|
||||
}
|
||||
|
||||
inline constexpr size_t rotl_const(size_t x, size_t n)
|
||||
{
|
||||
size_t nn = n & (BITS - 1);
|
||||
return (x << nn) |
|
||||
(x >> ((static_cast<size_t>(-static_cast<int>(nn))) & (BITS - 1)));
|
||||
}
|
||||
|
||||
inline size_t rotr(size_t x, size_t n)
|
||||
{
|
||||
#if defined(_MSC_VER)
|
||||
# ifdef PLATFORM_BITS_64
|
||||
return _rotr64(x, (int)n);
|
||||
# else
|
||||
return _rotr((uint32_t)x, (int)n);
|
||||
# endif
|
||||
#else
|
||||
return rotr_const(x, n);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline size_t rotl(size_t x, size_t n)
|
||||
{
|
||||
#if defined(_MSC_VER)
|
||||
# ifdef PLATFORM_BITS_64
|
||||
return _rotl64(x, (int)n);
|
||||
# else
|
||||
return _rotl((uint32_t)x, (int)n);
|
||||
# endif
|
||||
#else
|
||||
return rotl_const(x, n);
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr size_t clz_const(size_t x)
|
||||
{
|
||||
size_t n = 0;
|
||||
|
||||
for (int i = BITS - 1; i >= 0; i--)
|
||||
{
|
||||
size_t mask = one_at_bit(i);
|
||||
|
||||
if ((x & mask) == mask)
|
||||
return n;
|
||||
|
||||
n++;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
inline size_t ctz(size_t x)
|
||||
{
|
||||
#if defined(_MSC_VER)
|
||||
# ifdef PLATFORM_BITS_64
|
||||
return _tzcnt_u64(x);
|
||||
# else
|
||||
return _tzcnt_u32((uint32_t)x);
|
||||
# endif
|
||||
#else
|
||||
return static_cast<size_t>(__builtin_ctzl(x));
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr size_t ctz_const(size_t x)
|
||||
{
|
||||
size_t n = 0;
|
||||
|
||||
for (size_t i = 0; i < BITS; i++)
|
||||
{
|
||||
size_t mask = one_at_bit(i);
|
||||
|
||||
if ((x & mask) == mask)
|
||||
return n;
|
||||
|
||||
n++;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
inline size_t umul(size_t x, size_t y, bool& overflow)
|
||||
{
|
||||
#if __has_builtin(__builtin_mul_overflow)
|
||||
size_t prod;
|
||||
overflow = __builtin_mul_overflow(x, y, &prod);
|
||||
return prod;
|
||||
#elif defined(_MSC_VER)
|
||||
# if defined(PLATFORM_BITS_64)
|
||||
size_t high_prod;
|
||||
size_t prod = _umul128(x, y, &high_prod);
|
||||
overflow = high_prod != 0;
|
||||
return prod;
|
||||
# else
|
||||
size_t prod;
|
||||
overflow = S_OK != UIntMult(x, y, &prod);
|
||||
return prod;
|
||||
# endif
|
||||
#else
|
||||
size_t prod = x * y;
|
||||
overflow = y && (x > ((size_t)-1 / y));
|
||||
return prod;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline size_t next_pow2(size_t x)
|
||||
{
|
||||
// Correct for numbers [0..MAX_SIZE >> 1).
|
||||
// Returns 1 for x > (MAX_SIZE >> 1).
|
||||
if (x <= 2)
|
||||
return x;
|
||||
|
||||
return one_at_bit(BITS - clz(x - 1));
|
||||
}
|
||||
|
||||
inline size_t next_pow2_bits(size_t x)
|
||||
{
|
||||
// Correct for numbers [1..MAX_SIZE].
|
||||
// Returns 64 for 0. Approximately 2 cycles.
|
||||
return BITS - clz(x - 1);
|
||||
}
|
||||
|
||||
constexpr size_t next_pow2_const(size_t x)
|
||||
{
|
||||
if (x <= 2)
|
||||
return x;
|
||||
|
||||
return one_at_bit(BITS - clz_const(x - 1));
|
||||
}
|
||||
|
||||
constexpr size_t next_pow2_bits_const(size_t x)
|
||||
{
|
||||
return BITS - clz_const(x - 1);
|
||||
}
|
||||
|
||||
static inline size_t align_down(size_t value, size_t alignment)
|
||||
{
|
||||
assert(next_pow2(alignment) == alignment);
|
||||
|
||||
size_t align_1 = alignment - 1;
|
||||
value &= ~align_1;
|
||||
return value;
|
||||
}
|
||||
|
||||
static inline size_t align_up(size_t value, size_t alignment)
|
||||
{
|
||||
assert(next_pow2(alignment) == alignment);
|
||||
|
||||
size_t align_1 = alignment - 1;
|
||||
value += align_1;
|
||||
value &= ~align_1;
|
||||
return value;
|
||||
}
|
||||
|
||||
template<size_t alignment>
|
||||
static inline bool is_aligned_block(void* p, size_t size)
|
||||
{
|
||||
assert(next_pow2(alignment) == alignment);
|
||||
|
||||
return ((static_cast<size_t>(address_cast(p)) | size) &
|
||||
(alignment - 1)) == 0;
|
||||
}
|
||||
|
||||
/************************************************
|
||||
*
|
||||
* Map large range of strictly positive integers
|
||||
* into an exponent and mantissa pair.
|
||||
*
|
||||
* The reverse mapping is given by first adding one to the value, and then
|
||||
* extracting the bottom MANTISSA bits as m, and the rest as e.
|
||||
* Then each value maps as:
|
||||
*
|
||||
* e | m | value
|
||||
* ---------------------------------
|
||||
* 0 | x1 ... xm | 0..00 x1 .. xm
|
||||
* 1 | x1 ... xm | 0..01 x1 .. xm
|
||||
* 2 | x1 ... xm | 0..1 x1 .. xm 0
|
||||
* 3 | x1 ... xm | 0.1 x1 .. xm 00
|
||||
*
|
||||
* The forward mapping maps a value to the
|
||||
* smallest exponent and mantissa with a
|
||||
* reverse mapping not less than the value.
|
||||
*
|
||||
* The e and m in the forward mapping and reverse are not the same, and the
|
||||
* initial increment in from_exp_mant and the decrement in to_exp_mant
|
||||
* handle the different ways it is calculating and using the split.
|
||||
* This is due to the rounding of bits below the mantissa in the
|
||||
* representation, which is confusing but leads to the fastest code.
|
||||
*
|
||||
* Does not work for value=0.
|
||||
***********************************************/
|
||||
template<size_t MANTISSA_BITS, size_t LOW_BITS = 0>
|
||||
static size_t to_exp_mant(size_t value)
|
||||
{
|
||||
size_t LEADING_BIT = one_at_bit(MANTISSA_BITS + LOW_BITS) >> 1;
|
||||
size_t MANTISSA_MASK = one_at_bit(MANTISSA_BITS) - 1;
|
||||
|
||||
value = value - 1;
|
||||
|
||||
size_t e =
|
||||
bits::BITS - MANTISSA_BITS - LOW_BITS - clz(value | LEADING_BIT);
|
||||
size_t b = (e == 0) ? 0 : 1;
|
||||
size_t m = (value >> (LOW_BITS + e - b)) & MANTISSA_MASK;
|
||||
|
||||
return (e << MANTISSA_BITS) + m;
|
||||
}
|
||||
|
||||
template<size_t MANTISSA_BITS, size_t LOW_BITS = 0>
|
||||
constexpr static size_t to_exp_mant_const(size_t value)
|
||||
{
|
||||
size_t LEADING_BIT = one_at_bit(MANTISSA_BITS + LOW_BITS) >> 1;
|
||||
size_t MANTISSA_MASK = one_at_bit(MANTISSA_BITS) - 1;
|
||||
|
||||
value = value - 1;
|
||||
|
||||
size_t e =
|
||||
bits::BITS - MANTISSA_BITS - LOW_BITS - clz_const(value | LEADING_BIT);
|
||||
size_t b = (e == 0) ? 0 : 1;
|
||||
size_t m = (value >> (LOW_BITS + e - b)) & MANTISSA_MASK;
|
||||
|
||||
return (e << MANTISSA_BITS) + m;
|
||||
}
|
||||
|
||||
template<size_t MANTISSA_BITS, size_t LOW_BITS = 0>
|
||||
constexpr static size_t from_exp_mant(size_t m_e)
|
||||
{
|
||||
if (MANTISSA_BITS > 0)
|
||||
{
|
||||
m_e = m_e + 1;
|
||||
size_t MANTISSA_MASK = one_at_bit(MANTISSA_BITS) - 1;
|
||||
size_t m = m_e & MANTISSA_MASK;
|
||||
size_t e = m_e >> MANTISSA_BITS;
|
||||
size_t b = e == 0 ? 0 : 1;
|
||||
size_t shifted_e = e - b;
|
||||
size_t extended_m = (m + (b << MANTISSA_BITS));
|
||||
return extended_m << (shifted_e + LOW_BITS);
|
||||
}
|
||||
|
||||
return one_at_bit(m_e + LOW_BITS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of `std::min`
|
||||
*
|
||||
* `std::min` is in `<algorithm>`, so pulls in a lot of unneccessary code
|
||||
* We write our own to reduce the code that potentially needs reviewing.
|
||||
**/
|
||||
template<typename T>
|
||||
constexpr inline T min(T t1, T t2)
|
||||
{
|
||||
return t1 < t2 ? t1 : t2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of `std::max`
|
||||
*
|
||||
* `std::max` is in `<algorithm>`, so pulls in a lot of unneccessary code
|
||||
* We write our own to reduce the code that potentially needs reviewing.
|
||||
**/
|
||||
template<typename T>
|
||||
constexpr inline T max(T t1, T t2)
|
||||
{
|
||||
return t1 > t2 ? t1 : t2;
|
||||
}
|
||||
} // namespace bits
|
||||
} // namespace snmalloc
|
|
@ -1,55 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
class CSVStream
|
||||
{
|
||||
private:
|
||||
std::ostream* out;
|
||||
bool first = true;
|
||||
|
||||
public:
|
||||
class Endl
|
||||
{};
|
||||
|
||||
Endl endl;
|
||||
|
||||
CSVStream(std::ostream* o) : out(o) {}
|
||||
|
||||
void preprint()
|
||||
{
|
||||
if (!first)
|
||||
{
|
||||
*out << ", ";
|
||||
}
|
||||
else
|
||||
{
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
|
||||
CSVStream& operator<<(const std::string& str)
|
||||
{
|
||||
preprint();
|
||||
*out << str;
|
||||
return *this;
|
||||
}
|
||||
|
||||
CSVStream& operator<<(uint64_t u)
|
||||
{
|
||||
preprint();
|
||||
*out << u;
|
||||
return *this;
|
||||
}
|
||||
|
||||
CSVStream& operator<<(Endl)
|
||||
{
|
||||
*out << std::endl;
|
||||
first = true;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,172 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
/**
|
||||
* Invalid pointer class. This is similar to `std::nullptr_t`, but allows
|
||||
* other values.
|
||||
*/
|
||||
template<address_t Sentinel>
|
||||
struct InvalidPointer
|
||||
{
|
||||
/**
|
||||
* Equality comparison. Two invalid pointer values with the same sentinel
|
||||
* are always the same, invalid pointer values with different sentinels are
|
||||
* always different.
|
||||
*/
|
||||
template<uintptr_t OtherSentinel>
|
||||
constexpr bool operator==(const InvalidPointer<OtherSentinel>&)
|
||||
{
|
||||
return Sentinel == OtherSentinel;
|
||||
}
|
||||
/**
|
||||
* Equality comparison. Two invalid pointer values with the same sentinel
|
||||
* are always the same, invalid pointer values with different sentinels are
|
||||
* always different.
|
||||
*/
|
||||
template<uintptr_t OtherSentinel>
|
||||
constexpr bool operator!=(const InvalidPointer<OtherSentinel>&)
|
||||
{
|
||||
return Sentinel != OtherSentinel;
|
||||
}
|
||||
/**
|
||||
* Implicit conversion, creates a pointer with the value of the sentinel.
|
||||
* On CHERI and other provenance-tracking systems, this is a
|
||||
* provenance-free integer and so will trap if dereferenced, on other
|
||||
* systems the sentinel should be a value in unmapped memory.
|
||||
*/
|
||||
template<typename T>
|
||||
operator T*() const
|
||||
{
|
||||
return reinterpret_cast<T*>(Sentinel);
|
||||
}
|
||||
/**
|
||||
* Implicit conversion to an address, returns the sentinel value.
|
||||
*/
|
||||
operator address_t() const
|
||||
{
|
||||
return Sentinel;
|
||||
}
|
||||
};
|
||||
|
||||
template<class T, class Terminator = std::nullptr_t>
|
||||
class DLList
|
||||
{
|
||||
private:
|
||||
static_assert(
|
||||
std::is_same<decltype(T::prev), T*>::value, "T->prev must be a T*");
|
||||
static_assert(
|
||||
std::is_same<decltype(T::next), T*>::value, "T->next must be a T*");
|
||||
|
||||
T* head = Terminator();
|
||||
|
||||
public:
|
||||
bool is_empty()
|
||||
{
|
||||
return head == Terminator();
|
||||
}
|
||||
|
||||
T* get_head()
|
||||
{
|
||||
return head;
|
||||
}
|
||||
|
||||
T* pop()
|
||||
{
|
||||
T* item = head;
|
||||
|
||||
if (item != Terminator())
|
||||
remove(item);
|
||||
|
||||
return item;
|
||||
}
|
||||
|
||||
void insert(T* item)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
debug_check_not_contains(item);
|
||||
#endif
|
||||
|
||||
item->next = head;
|
||||
item->prev = Terminator();
|
||||
|
||||
if (head != Terminator())
|
||||
head->prev = item;
|
||||
|
||||
head = item;
|
||||
#ifndef NDEBUG
|
||||
debug_check();
|
||||
#endif
|
||||
}
|
||||
|
||||
void remove(T* item)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
debug_check_contains(item);
|
||||
#endif
|
||||
|
||||
if (item->next != Terminator())
|
||||
item->next->prev = item->prev;
|
||||
|
||||
if (item->prev != Terminator())
|
||||
item->prev->next = item->next;
|
||||
else
|
||||
head = item->next;
|
||||
|
||||
#ifndef NDEBUG
|
||||
debug_check();
|
||||
#endif
|
||||
}
|
||||
|
||||
void debug_check_contains(T* item)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
debug_check();
|
||||
T* curr = head;
|
||||
|
||||
while (curr != item)
|
||||
{
|
||||
assert(curr != Terminator());
|
||||
curr = curr->next;
|
||||
}
|
||||
#else
|
||||
UNUSED(item);
|
||||
#endif
|
||||
}
|
||||
|
||||
void debug_check_not_contains(T* item)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
debug_check();
|
||||
T* curr = head;
|
||||
|
||||
while (curr != Terminator())
|
||||
{
|
||||
assert(curr != item);
|
||||
curr = curr->next;
|
||||
}
|
||||
#else
|
||||
UNUSED(item);
|
||||
#endif
|
||||
}
|
||||
|
||||
void debug_check()
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
T* item = head;
|
||||
T* prev = Terminator();
|
||||
|
||||
while (item != Terminator())
|
||||
{
|
||||
assert(item->prev == prev);
|
||||
prev = item;
|
||||
item = item->next;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,24 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "bits.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
class FlagLock
|
||||
{
|
||||
private:
|
||||
std::atomic_flag& lock;
|
||||
|
||||
public:
|
||||
FlagLock(std::atomic_flag& lock) : lock(lock)
|
||||
{
|
||||
while (lock.test_and_set(std::memory_order_acquire))
|
||||
bits::pause();
|
||||
}
|
||||
|
||||
~FlagLock()
|
||||
{
|
||||
lock.clear(std::memory_order_release);
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,90 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "bits.h"
|
||||
#include "flaglock.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
/*
|
||||
* In some use cases we need to run before any of the C++ runtime has been
|
||||
* initialised. This singleton class is design to not depend on the runtime.
|
||||
*/
|
||||
template<class Object, Object init() noexcept>
|
||||
class Singleton
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* If argument is non-null, then it is assigned the value
|
||||
* true, if this is the first call to get.
|
||||
* At most one call will be first.
|
||||
*/
|
||||
inline static Object& get(bool* first = nullptr)
|
||||
{
|
||||
static std::atomic_flag flag;
|
||||
static std::atomic<bool> initialised;
|
||||
static Object obj;
|
||||
|
||||
// If defined should be initially false;
|
||||
assert(first == nullptr || *first == false);
|
||||
|
||||
if (!initialised.load(std::memory_order_acquire))
|
||||
{
|
||||
FlagLock lock(flag);
|
||||
if (!initialised)
|
||||
{
|
||||
obj = init();
|
||||
initialised.store(true, std::memory_order_release);
|
||||
if (first != nullptr)
|
||||
*first = true;
|
||||
}
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Wrapper for wrapping values.
|
||||
*
|
||||
* Wraps on read. This allows code to trust the value is in range, even when
|
||||
* there is a memory corruption.
|
||||
**/
|
||||
template<size_t length, typename T>
|
||||
class Mod
|
||||
{
|
||||
static_assert(
|
||||
length == bits::next_pow2_const(length), "Must be a power of two.");
|
||||
|
||||
private:
|
||||
T value;
|
||||
|
||||
public:
|
||||
operator T()
|
||||
{
|
||||
return static_cast<T>(value & (length - 1));
|
||||
}
|
||||
|
||||
Mod& operator=(const T v)
|
||||
{
|
||||
value = v;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
template<size_t length, typename T>
|
||||
class ModArray
|
||||
{
|
||||
static constexpr size_t rlength = bits::next_pow2_const(length);
|
||||
T array[rlength];
|
||||
|
||||
public:
|
||||
constexpr const T& operator[](const size_t i) const
|
||||
{
|
||||
return array[i & (rlength - 1)];
|
||||
}
|
||||
|
||||
constexpr T& operator[](const size_t i)
|
||||
{
|
||||
return array[i & (rlength - 1)];
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,75 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "aba.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
template<class T, Construction c = RequiresInit>
|
||||
class MPMCStack
|
||||
{
|
||||
using ABAT = ABA<T, c>;
|
||||
|
||||
private:
|
||||
static_assert(
|
||||
std::is_same<decltype(T::next), std::atomic<T*>>::value,
|
||||
"T->next must be a std::atomic<T*>");
|
||||
|
||||
ABAT stack;
|
||||
|
||||
public:
|
||||
void push(T* item)
|
||||
{
|
||||
return push(item, item);
|
||||
}
|
||||
|
||||
void push(T* first, T* last)
|
||||
{
|
||||
// Pushes an item on the stack.
|
||||
auto cmp = stack.read();
|
||||
|
||||
do
|
||||
{
|
||||
T* top = ABAT::load(cmp);
|
||||
last->next.store(top, std::memory_order_release);
|
||||
} while (!stack.compare_exchange(cmp, first));
|
||||
}
|
||||
|
||||
T* pop()
|
||||
{
|
||||
// Returns the next item. If the returned value is decommitted, it is
|
||||
// possible for the read of top->next to segfault.
|
||||
auto cmp = stack.read();
|
||||
T* top;
|
||||
T* next;
|
||||
|
||||
do
|
||||
{
|
||||
top = ABAT::load(cmp);
|
||||
|
||||
if (top == nullptr)
|
||||
break;
|
||||
|
||||
next = top->next.load(std::memory_order_acquire);
|
||||
} while (!stack.compare_exchange(cmp, next));
|
||||
|
||||
return top;
|
||||
}
|
||||
|
||||
T* pop_all()
|
||||
{
|
||||
// Returns all items as a linked list, leaving an empty stack.
|
||||
auto cmp = stack.read();
|
||||
T* top;
|
||||
|
||||
do
|
||||
{
|
||||
top = ABAT::load(cmp);
|
||||
|
||||
if (top == nullptr)
|
||||
break;
|
||||
} while (!stack.compare_exchange(cmp, nullptr));
|
||||
|
||||
return top;
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,82 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "bits.h"
|
||||
#include "helpers.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
template<class T>
|
||||
class MPSCQ
|
||||
{
|
||||
private:
|
||||
static_assert(
|
||||
std::is_same<decltype(T::next), std::atomic<T*>>::value,
|
||||
"T->next must be a std::atomic<T*>");
|
||||
|
||||
std::atomic<T*> back;
|
||||
T* front;
|
||||
|
||||
public:
|
||||
void invariant()
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
assert(back != nullptr);
|
||||
assert(front != nullptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
void init(T* stub)
|
||||
{
|
||||
stub->next.store(nullptr, std::memory_order_relaxed);
|
||||
front = stub;
|
||||
back.store(stub, std::memory_order_relaxed);
|
||||
invariant();
|
||||
}
|
||||
|
||||
T* destroy()
|
||||
{
|
||||
T* fnt = front;
|
||||
back.store(nullptr, std::memory_order_relaxed);
|
||||
front = nullptr;
|
||||
return fnt;
|
||||
}
|
||||
|
||||
inline bool is_empty()
|
||||
{
|
||||
T* bk = back.load(std::memory_order_relaxed);
|
||||
|
||||
return bk == front;
|
||||
}
|
||||
|
||||
void enqueue(T* first, T* last)
|
||||
{
|
||||
// Pushes a list of messages to the queue. Each message from first to
|
||||
// last should be linked together through their next pointers.
|
||||
invariant();
|
||||
last->next.store(nullptr, std::memory_order_relaxed);
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
T* prev = back.exchange(last, std::memory_order_relaxed);
|
||||
prev->next.store(first, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
T* dequeue()
|
||||
{
|
||||
// Returns the front message, or null if not possible to return a message.
|
||||
invariant();
|
||||
T* first = front;
|
||||
T* next = first->next.load(std::memory_order_relaxed);
|
||||
|
||||
if (next != nullptr)
|
||||
{
|
||||
front = next;
|
||||
|
||||
assert(front);
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
invariant();
|
||||
return first;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,146 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ds/bits.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
// 0 intermediate bits results in power of 2 small allocs. 1 intermediate
|
||||
// bit gives additional sizeclasses at the midpoint between each power of 2.
|
||||
// 2 intermediate bits gives 3 intermediate sizeclasses, etc.
|
||||
static constexpr size_t INTERMEDIATE_BITS =
|
||||
#ifdef USE_INTERMEDIATE_BITS
|
||||
USE_INTERMEDIATE_BITS
|
||||
#else
|
||||
2
|
||||
#endif
|
||||
;
|
||||
|
||||
// Return remote small allocs when the local cache reaches this size.
|
||||
static constexpr size_t REMOTE_CACHE =
|
||||
#ifdef USE_REMOTE_CACHE
|
||||
USE_REMOTE_CACHE
|
||||
#else
|
||||
1 << 20
|
||||
#endif
|
||||
;
|
||||
|
||||
// Handle at most this many object from the remote dealloc queue at a time.
|
||||
static constexpr size_t REMOTE_BATCH =
|
||||
#ifdef USE_REMOTE_BATCH
|
||||
REMOTE_BATCH
|
||||
#else
|
||||
64
|
||||
#endif
|
||||
;
|
||||
|
||||
// Specifies smaller slab and super slab sizes for address space
|
||||
// constrained scenarios.
|
||||
static constexpr size_t ADDRESS_SPACE_CONSTRAINED =
|
||||
#ifdef IS_ADDRESS_SPACE_CONSTRAINED
|
||||
true
|
||||
#else
|
||||
// In 32 bit uses smaller superslab.
|
||||
(!bits::is64())
|
||||
#endif
|
||||
;
|
||||
|
||||
static constexpr size_t RESERVE_MULTIPLE =
|
||||
#ifdef USE_RESERVE_MULTIPLE
|
||||
USE_RESERVE_MULTIPLE
|
||||
#else
|
||||
bits::is64() ? 16 : 2
|
||||
#endif
|
||||
;
|
||||
|
||||
enum DecommitStrategy
|
||||
{
|
||||
/**
|
||||
* Never decommit memory.
|
||||
*/
|
||||
DecommitNone,
|
||||
/**
|
||||
* Decommit superslabs when they are entirely empty.
|
||||
*/
|
||||
DecommitSuper,
|
||||
/**
|
||||
* Decommit all slabs once they are empty.
|
||||
*/
|
||||
DecommitAll,
|
||||
/**
|
||||
* Decommit superslabs only when we are informed of memory pressure by the
|
||||
* OS, do not decommit anything in normal operation.
|
||||
*/
|
||||
DecommitSuperLazy
|
||||
};
|
||||
|
||||
static constexpr DecommitStrategy decommit_strategy =
|
||||
#ifdef USE_DECOMMIT_STRATEGY
|
||||
USE_DECOMMIT_STRATEGY
|
||||
#elif defined(_WIN32) && !defined(OPEN_ENCLAVE)
|
||||
DecommitSuperLazy
|
||||
#else
|
||||
DecommitSuper
|
||||
#endif
|
||||
;
|
||||
|
||||
// The remaining values are derived, not configurable.
|
||||
static constexpr size_t POINTER_BITS =
|
||||
bits::next_pow2_bits_const(sizeof(uintptr_t));
|
||||
|
||||
// Used to isolate values on cache lines to prevent false sharing.
|
||||
static constexpr size_t CACHELINE_SIZE = 64;
|
||||
|
||||
// Used to keep Superslab metadata committed.
|
||||
static constexpr size_t OS_PAGE_SIZE = 0x1000;
|
||||
static constexpr size_t PAGE_ALIGNED_SIZE = OS_PAGE_SIZE << INTERMEDIATE_BITS;
|
||||
// Some system headers (e.g. Linux' sys/user.h, FreeBSD's machine/param.h)
|
||||
// define `PAGE_SIZE` as a macro. We don't use `PAGE_SIZE` as our variable
|
||||
// name, to avoid conflicts, but if we do see a macro definition then check
|
||||
// that our value matches the platform's expected value.
|
||||
#ifdef PAGE_SIZE
|
||||
static_assert(
|
||||
PAGE_SIZE == OS_PAGE_SIZE,
|
||||
"Page size from system header does not match snmalloc config page size.");
|
||||
#endif
|
||||
|
||||
// Minimum allocation size is space for two pointers.
|
||||
static constexpr size_t MIN_ALLOC_BITS = bits::is64() ? 4 : 3;
|
||||
static constexpr size_t MIN_ALLOC_SIZE = 1 << MIN_ALLOC_BITS;
|
||||
|
||||
// Slabs are 64 kb.
|
||||
static constexpr size_t SLAB_BITS = ADDRESS_SPACE_CONSTRAINED ? 14 : 16;
|
||||
static constexpr size_t SLAB_SIZE = 1 << SLAB_BITS;
|
||||
static constexpr size_t SLAB_MASK = ~(SLAB_SIZE - 1);
|
||||
|
||||
// Superslabs are composed of this many slabs. Slab offsets are encoded as
|
||||
// a byte, so the maximum count is 256. This must be a power of two to
|
||||
// allow fast masking to find a superslab start address.
|
||||
static constexpr size_t SLAB_COUNT_BITS = ADDRESS_SPACE_CONSTRAINED ? 6 : 8;
|
||||
static constexpr size_t SLAB_COUNT = 1 << SLAB_COUNT_BITS;
|
||||
static constexpr size_t SUPERSLAB_SIZE = SLAB_SIZE * SLAB_COUNT;
|
||||
static constexpr size_t SUPERSLAB_MASK = ~(SUPERSLAB_SIZE - 1);
|
||||
static constexpr size_t SUPERSLAB_BITS = SLAB_BITS + SLAB_COUNT_BITS;
|
||||
static constexpr size_t RESERVE_SIZE = SUPERSLAB_SIZE * RESERVE_MULTIPLE;
|
||||
|
||||
static_assert((1ULL << SUPERSLAB_BITS) == SUPERSLAB_SIZE, "Sanity check");
|
||||
|
||||
// Number of slots for remote deallocation.
|
||||
static constexpr size_t REMOTE_SLOT_BITS = 6;
|
||||
static constexpr size_t REMOTE_SLOTS = 1 << REMOTE_SLOT_BITS;
|
||||
static constexpr size_t REMOTE_MASK = REMOTE_SLOTS - 1;
|
||||
|
||||
static_assert(
|
||||
INTERMEDIATE_BITS < MIN_ALLOC_BITS,
|
||||
"INTERMEDIATE_BITS must be less than MIN_ALLOC_BITS");
|
||||
static_assert(
|
||||
MIN_ALLOC_SIZE >= (sizeof(void*) * 2),
|
||||
"MIN_ALLOC_SIZE must be sufficient for two pointers");
|
||||
static_assert(
|
||||
SLAB_BITS <= (sizeof(uint16_t) * 8),
|
||||
"SLAB_BITS must not be more than the bits in a uint16_t");
|
||||
static_assert(
|
||||
SLAB_COUNT == bits::next_pow2_const(SLAB_COUNT),
|
||||
"SLAB_COUNT must be a power of 2");
|
||||
static_assert(
|
||||
SLAB_COUNT <= (UINT8_MAX + 1), "SLAB_COUNT must fit in a uint8_t");
|
||||
} // namespace snmalloc
|
|
@ -1,19 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../mem/baseslab.h"
|
||||
#include "remoteallocator.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
class Allocslab : public Baseslab
|
||||
{
|
||||
protected:
|
||||
RemoteAllocator* allocator;
|
||||
|
||||
public:
|
||||
RemoteAllocator* get_allocator()
|
||||
{
|
||||
return allocator;
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,397 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ds/bits.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
# include "../ds/csv.h"
|
||||
# include "sizeclass.h"
|
||||
|
||||
# include <cstring>
|
||||
# include <iostream>
|
||||
#endif
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
template<size_t N, size_t LARGE_N>
|
||||
struct AllocStats
|
||||
{
|
||||
struct CurrentMaxPair
|
||||
{
|
||||
size_t current = 0;
|
||||
size_t max = 0;
|
||||
size_t used = 0;
|
||||
|
||||
void inc()
|
||||
{
|
||||
current++;
|
||||
used++;
|
||||
if (current > max)
|
||||
max++;
|
||||
}
|
||||
|
||||
void dec()
|
||||
{
|
||||
current--;
|
||||
}
|
||||
|
||||
bool is_empty()
|
||||
{
|
||||
return current == 0;
|
||||
}
|
||||
|
||||
bool is_unused()
|
||||
{
|
||||
return max == 0;
|
||||
}
|
||||
|
||||
void add(CurrentMaxPair& that)
|
||||
{
|
||||
current += that.current;
|
||||
max += that.max;
|
||||
used += that.used;
|
||||
}
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
void print(CSVStream& csv, size_t multiplier = 1)
|
||||
{
|
||||
csv << current * multiplier << max * multiplier << used * multiplier;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
struct Stats
|
||||
{
|
||||
CurrentMaxPair count;
|
||||
CurrentMaxPair slab_count;
|
||||
uint64_t time = bits::tick();
|
||||
uint64_t ticks = 0;
|
||||
double online_average = 0;
|
||||
|
||||
bool is_empty()
|
||||
{
|
||||
return count.is_empty();
|
||||
}
|
||||
|
||||
void add(Stats& that)
|
||||
{
|
||||
count.add(that.count);
|
||||
slab_count.add(that.slab_count);
|
||||
}
|
||||
|
||||
void addToRunningAverage()
|
||||
{
|
||||
uint64_t now = bits::tick();
|
||||
|
||||
if (slab_count.current != 0)
|
||||
{
|
||||
double occupancy = static_cast<double>(count.current) /
|
||||
static_cast<double>(slab_count.current);
|
||||
uint64_t duration = now - time;
|
||||
|
||||
if (ticks == 0)
|
||||
online_average = occupancy;
|
||||
else
|
||||
online_average += ((occupancy - online_average) * duration) / ticks;
|
||||
|
||||
ticks += duration;
|
||||
}
|
||||
|
||||
time = now;
|
||||
}
|
||||
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
void
|
||||
print(CSVStream& csv, size_t multiplier = 1, size_t slab_multiplier = 1)
|
||||
{
|
||||
// Keep in sync with header lower down
|
||||
count.print(csv, multiplier);
|
||||
slab_count.print(csv, slab_multiplier);
|
||||
size_t average = static_cast<size_t>(online_average * multiplier);
|
||||
|
||||
csv << average << (slab_multiplier - average) * slab_count.max
|
||||
<< csv.endl;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
static constexpr size_t BUCKETS_BITS = 4;
|
||||
static constexpr size_t BUCKETS = 1 << BUCKETS_BITS;
|
||||
static constexpr size_t TOTAL_BUCKETS =
|
||||
bits::to_exp_mant_const<BUCKETS_BITS>(
|
||||
bits::one_at_bit(bits::ADDRESS_BITS - 1));
|
||||
|
||||
Stats sizeclass[N];
|
||||
Stats large[LARGE_N];
|
||||
|
||||
size_t remote_freed = 0;
|
||||
size_t remote_posted = 0;
|
||||
size_t remote_received = 0;
|
||||
size_t superslab_push_count = 0;
|
||||
size_t superslab_pop_count = 0;
|
||||
size_t superslab_fresh_count = 0;
|
||||
size_t segment_count = 0;
|
||||
size_t bucketed_requests[TOTAL_BUCKETS] = {};
|
||||
#endif
|
||||
|
||||
void alloc_request(size_t size)
|
||||
{
|
||||
UNUSED(size);
|
||||
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
bucketed_requests[bits::to_exp_mant<BUCKETS_BITS>(size)]++;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool is_empty()
|
||||
{
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
if (!sizeclass[i].is_empty())
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < LARGE_N; i++)
|
||||
{
|
||||
if (!large[i].is_empty())
|
||||
return false;
|
||||
}
|
||||
|
||||
return (remote_freed == remote_posted);
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
void sizeclass_alloc(uint8_t sc)
|
||||
{
|
||||
UNUSED(sc);
|
||||
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
sizeclass[sc].addToRunningAverage();
|
||||
sizeclass[sc].count.inc();
|
||||
#endif
|
||||
}
|
||||
|
||||
void sizeclass_dealloc(uint8_t sc)
|
||||
{
|
||||
UNUSED(sc);
|
||||
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
sizeclass[sc].addToRunningAverage();
|
||||
sizeclass[sc].count.dec();
|
||||
#endif
|
||||
}
|
||||
|
||||
void large_alloc(size_t sc)
|
||||
{
|
||||
UNUSED(sc);
|
||||
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
large[sc].count.inc();
|
||||
#endif
|
||||
}
|
||||
|
||||
void sizeclass_alloc_slab(uint8_t sc)
|
||||
{
|
||||
UNUSED(sc);
|
||||
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
sizeclass[sc].addToRunningAverage();
|
||||
sizeclass[sc].slab_count.inc();
|
||||
#endif
|
||||
}
|
||||
|
||||
void sizeclass_dealloc_slab(uint8_t sc)
|
||||
{
|
||||
UNUSED(sc);
|
||||
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
sizeclass[sc].addToRunningAverage();
|
||||
sizeclass[sc].slab_count.dec();
|
||||
#endif
|
||||
}
|
||||
|
||||
void large_dealloc(size_t sc)
|
||||
{
|
||||
UNUSED(sc);
|
||||
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
large[sc].count.dec();
|
||||
#endif
|
||||
}
|
||||
|
||||
void segment_create()
|
||||
{
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
segment_count++;
|
||||
#endif
|
||||
}
|
||||
|
||||
void superslab_pop()
|
||||
{
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
superslab_pop_count++;
|
||||
#endif
|
||||
}
|
||||
|
||||
void superslab_push()
|
||||
{
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
superslab_push_count++;
|
||||
#endif
|
||||
}
|
||||
|
||||
void superslab_fresh()
|
||||
{
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
superslab_fresh_count++;
|
||||
#endif
|
||||
}
|
||||
|
||||
void remote_free(uint8_t sc)
|
||||
{
|
||||
UNUSED(sc);
|
||||
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
remote_freed += sizeclass_to_size(sc);
|
||||
#endif
|
||||
}
|
||||
|
||||
void remote_post()
|
||||
{
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
remote_posted = remote_freed;
|
||||
#endif
|
||||
}
|
||||
|
||||
void remote_receive(uint8_t sc)
|
||||
{
|
||||
UNUSED(sc);
|
||||
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
remote_received += sizeclass_to_size(sc);
|
||||
#endif
|
||||
}
|
||||
|
||||
void add(AllocStats<N, LARGE_N>& that)
|
||||
{
|
||||
UNUSED(that);
|
||||
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
for (size_t i = 0; i < N; i++)
|
||||
sizeclass[i].add(that.sizeclass[i]);
|
||||
|
||||
for (size_t i = 0; i < LARGE_N; i++)
|
||||
large[i].add(that.large[i]);
|
||||
|
||||
for (size_t i = 0; i < TOTAL_BUCKETS; i++)
|
||||
bucketed_requests[i] += that.bucketed_requests[i];
|
||||
|
||||
remote_freed += that.remote_freed;
|
||||
remote_posted += that.remote_posted;
|
||||
remote_received += that.remote_received;
|
||||
superslab_pop_count += that.superslab_pop_count;
|
||||
superslab_push_count += that.superslab_push_count;
|
||||
superslab_fresh_count += that.superslab_fresh_count;
|
||||
segment_count += that.segment_count;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
template<class Alloc>
|
||||
void print(std::ostream& o, uint64_t dumpid = 0, uint64_t allocatorid = 0)
|
||||
{
|
||||
UNUSED(o);
|
||||
UNUSED(dumpid);
|
||||
UNUSED(allocatorid);
|
||||
|
||||
CSVStream csv(&o);
|
||||
|
||||
if (dumpid == 0)
|
||||
{
|
||||
// Output headers for initial dump
|
||||
// Keep in sync with data dump
|
||||
csv << "GlobalStats"
|
||||
<< "DumpID"
|
||||
<< "AllocatorID"
|
||||
<< "Remote freed"
|
||||
<< "Remote posted"
|
||||
<< "Remote received"
|
||||
<< "Superslab pop"
|
||||
<< "Superslab push"
|
||||
<< "Superslab fresh"
|
||||
<< "Segments" << csv.endl;
|
||||
|
||||
csv << "BucketedStats"
|
||||
<< "DumpID"
|
||||
<< "AllocatorID"
|
||||
<< "Size group"
|
||||
<< "Size"
|
||||
<< "Current count"
|
||||
<< "Max count"
|
||||
<< "Total Allocs"
|
||||
<< "Current Slab bytes"
|
||||
<< "Max Slab bytes"
|
||||
<< "Total slab allocs"
|
||||
<< "Average Slab Usage"
|
||||
<< "Average wasted space" << csv.endl;
|
||||
|
||||
csv << "AllocSizes"
|
||||
<< "DumpID"
|
||||
<< "AllocatorID"
|
||||
<< "ClassID"
|
||||
<< "Low size"
|
||||
<< "High size"
|
||||
<< "Count" << csv.endl;
|
||||
}
|
||||
|
||||
for (uint8_t i = 0; i < N; i++)
|
||||
{
|
||||
if (sizeclass[i].count.is_unused())
|
||||
continue;
|
||||
|
||||
sizeclass[i].addToRunningAverage();
|
||||
|
||||
csv << "BucketedStats" << dumpid << allocatorid << i
|
||||
<< sizeclass_to_size(i);
|
||||
|
||||
sizeclass[i].print(csv, sizeclass_to_size(i));
|
||||
}
|
||||
|
||||
for (uint8_t i = 0; i < LARGE_N; i++)
|
||||
{
|
||||
if (large[i].count.is_unused())
|
||||
continue;
|
||||
|
||||
csv << "BucketedStats" << dumpid << allocatorid << (i + N)
|
||||
<< large_sizeclass_to_size(i);
|
||||
|
||||
large[i].print(csv, large_sizeclass_to_size(i));
|
||||
}
|
||||
|
||||
size_t low = 0;
|
||||
size_t high = 0;
|
||||
|
||||
for (size_t i = 0; i < TOTAL_BUCKETS; i++)
|
||||
{
|
||||
low = high + 1;
|
||||
high = bits::from_exp_mant<BUCKETS_BITS>(i);
|
||||
|
||||
if (bucketed_requests[i] == 0)
|
||||
continue;
|
||||
|
||||
csv << "AllocSizes" << dumpid << allocatorid << i << low << high
|
||||
<< bucketed_requests[i] << csv.endl;
|
||||
}
|
||||
|
||||
csv << "GlobalStats" << dumpid << allocatorid << remote_freed
|
||||
<< remote_posted << remote_received << superslab_pop_count
|
||||
<< superslab_push_count << superslab_fresh_count << segment_count
|
||||
<< csv.endl;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,32 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ds/mpmcstack.h"
|
||||
#include "allocconfig.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
enum SlabKind
|
||||
{
|
||||
Fresh = 0,
|
||||
Large,
|
||||
Medium,
|
||||
Super,
|
||||
/**
|
||||
* If the decommit policy is lazy, slabs are moved to this state when all
|
||||
* pages other than the first one have been decommitted.
|
||||
*/
|
||||
Decommitted
|
||||
};
|
||||
|
||||
class Baseslab
|
||||
{
|
||||
protected:
|
||||
SlabKind kind;
|
||||
|
||||
public:
|
||||
SlabKind get_kind()
|
||||
{
|
||||
return kind;
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,179 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ds/helpers.h"
|
||||
#include "alloc.h"
|
||||
#include "pool.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
template<class MemoryProvider>
|
||||
class AllocPool : Pool<Allocator<MemoryProvider>, MemoryProvider>
|
||||
{
|
||||
using Alloc = Allocator<MemoryProvider>;
|
||||
using Parent = Pool<Allocator<MemoryProvider>, MemoryProvider>;
|
||||
|
||||
public:
|
||||
static AllocPool* make(MemoryProvider& mp)
|
||||
{
|
||||
static_assert(
|
||||
sizeof(AllocPool) == sizeof(Parent),
|
||||
"You cannot add fields to this class.");
|
||||
// This cast is safe due to the static assert.
|
||||
return static_cast<AllocPool*>(Parent::make(mp));
|
||||
}
|
||||
|
||||
static AllocPool* make() noexcept
|
||||
{
|
||||
return make(default_memory_provider);
|
||||
}
|
||||
|
||||
Alloc* acquire()
|
||||
{
|
||||
return Parent::acquire(Parent::memory_provider);
|
||||
}
|
||||
|
||||
void release(Alloc* a)
|
||||
{
|
||||
Parent::release(a);
|
||||
}
|
||||
|
||||
public:
|
||||
void aggregate_stats(Stats& stats)
|
||||
{
|
||||
auto* alloc = Parent::iterate();
|
||||
|
||||
while (alloc != nullptr)
|
||||
{
|
||||
stats.add(alloc->stats());
|
||||
alloc = Parent::iterate(alloc);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
void print_all_stats(std::ostream& o, uint64_t dumpid = 0)
|
||||
{
|
||||
auto alloc = Parent::iterate();
|
||||
|
||||
while (alloc != nullptr)
|
||||
{
|
||||
alloc->stats().template print<Alloc>(o, dumpid, alloc->id());
|
||||
alloc = Parent::iterate(alloc);
|
||||
}
|
||||
}
|
||||
#else
|
||||
void print_all_stats(void*& o, uint64_t dumpid = 0)
|
||||
{
|
||||
UNUSED(o);
|
||||
UNUSED(dumpid);
|
||||
}
|
||||
#endif
|
||||
|
||||
void cleanup_unused()
|
||||
{
|
||||
#ifndef USE_MALLOC
|
||||
// Call this periodically to free and coalesce memory allocated by
|
||||
// allocators that are not currently in use by any thread.
|
||||
// One atomic operation to extract the stack, another to restore it.
|
||||
// Handling the message queue for each stack is non-atomic.
|
||||
auto* first = Parent::extract();
|
||||
auto* alloc = first;
|
||||
decltype(alloc) last;
|
||||
|
||||
if (alloc != nullptr)
|
||||
{
|
||||
while (alloc != nullptr)
|
||||
{
|
||||
alloc->handle_message_queue();
|
||||
last = alloc;
|
||||
alloc = Parent::extract(alloc);
|
||||
}
|
||||
|
||||
restore(first, last);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void debug_check_empty()
|
||||
{
|
||||
#ifndef USE_MALLOC
|
||||
// This is a debugging function. It checks that all memory from all
|
||||
// allocators has been freed.
|
||||
size_t alloc_count = 0;
|
||||
|
||||
auto* alloc = Parent::iterate();
|
||||
|
||||
// Count the linked allocators.
|
||||
while (alloc != nullptr)
|
||||
{
|
||||
alloc = Parent::iterate(alloc);
|
||||
alloc_count++;
|
||||
}
|
||||
|
||||
bool done = false;
|
||||
|
||||
while (!done)
|
||||
{
|
||||
done = true;
|
||||
alloc = Parent::iterate();
|
||||
|
||||
while (alloc != nullptr)
|
||||
{
|
||||
// Destroy the message queue so that it has no stub message.
|
||||
Remote* p = alloc->message_queue().destroy();
|
||||
|
||||
while (p != nullptr)
|
||||
{
|
||||
Remote* next = p->non_atomic_next;
|
||||
alloc->handle_dealloc_remote(p);
|
||||
p = next;
|
||||
}
|
||||
|
||||
// Place the static stub message on the queue.
|
||||
alloc->init_message_queue();
|
||||
|
||||
// Post all remotes, including forwarded ones. If any allocator posts,
|
||||
// repeat the loop.
|
||||
if (alloc->remote.size > 0)
|
||||
{
|
||||
alloc->stats().remote_post();
|
||||
alloc->remote.post(alloc->id());
|
||||
done = false;
|
||||
}
|
||||
|
||||
alloc = Parent::iterate(alloc);
|
||||
}
|
||||
}
|
||||
|
||||
alloc = Parent::iterate();
|
||||
size_t empty_count = 0;
|
||||
|
||||
while (alloc != nullptr)
|
||||
{
|
||||
// Check that the allocator has freed all memory.
|
||||
if (alloc->stats().is_empty())
|
||||
empty_count++;
|
||||
|
||||
alloc = Parent::iterate(alloc);
|
||||
}
|
||||
|
||||
if (alloc_count != empty_count)
|
||||
error("Incorrect number of allocators");
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
inline AllocPool<GlobalVirtual>*& current_alloc_pool()
|
||||
{
|
||||
return Singleton<
|
||||
AllocPool<GlobalVirtual>*,
|
||||
AllocPool<GlobalVirtual>::make>::get();
|
||||
}
|
||||
|
||||
template<class MemoryProvider>
|
||||
inline AllocPool<MemoryProvider>* make_alloc_pool(MemoryProvider& mp)
|
||||
{
|
||||
return AllocPool<MemoryProvider>::make(mp);
|
||||
}
|
||||
|
||||
using Alloc = Allocator<GlobalVirtual>;
|
||||
} // namespace snmalloc
|
|
@ -1,418 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ds/flaglock.h"
|
||||
#include "../ds/helpers.h"
|
||||
#include "../ds/mpmcstack.h"
|
||||
#include "../pal/pal.h"
|
||||
#include "allocstats.h"
|
||||
#include "baseslab.h"
|
||||
#include "sizeclass.h"
|
||||
|
||||
#include <new>
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
template<class PAL>
|
||||
class MemoryProviderStateMixin;
|
||||
|
||||
class Largeslab : public Baseslab
|
||||
{
|
||||
// This is the view of a contiguous memory area when it is being kept
|
||||
// in the global size-classed caches of available contiguous memory areas.
|
||||
private:
|
||||
template<class a, Construction c>
|
||||
friend class MPMCStack;
|
||||
template<class PAL>
|
||||
friend class MemoryProviderStateMixin;
|
||||
std::atomic<Largeslab*> next;
|
||||
|
||||
public:
|
||||
void init()
|
||||
{
|
||||
kind = Large;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A slab that has been decommitted. The first page remains committed and
|
||||
* the only fields that are guaranteed to exist are the kind and next
|
||||
* pointer from the superclass.
|
||||
*/
|
||||
struct Decommittedslab : public Largeslab
|
||||
{
|
||||
/**
|
||||
* Constructor. Expected to be called via placement new into some memory
|
||||
* that was formerly a superslab or large allocation and is now just some
|
||||
* spare address space.
|
||||
*/
|
||||
Decommittedslab()
|
||||
{
|
||||
kind = Decommitted;
|
||||
}
|
||||
};
|
||||
|
||||
// This represents the state that the large allcoator needs to add to the
|
||||
// global state of the allocator. This is currently stored in the memory
|
||||
// provider, so we add this in.
|
||||
template<class PAL>
|
||||
class MemoryProviderStateMixin : public PAL
|
||||
{
|
||||
std::atomic_flag lock = ATOMIC_FLAG_INIT;
|
||||
address_t bump;
|
||||
size_t remaining;
|
||||
|
||||
void new_block()
|
||||
{
|
||||
size_t size = SUPERSLAB_SIZE;
|
||||
void* r = reserve<false>(&size, SUPERSLAB_SIZE);
|
||||
|
||||
if (size < SUPERSLAB_SIZE)
|
||||
error("out of memory");
|
||||
|
||||
PAL::template notify_using<NoZero>(r, OS_PAGE_SIZE);
|
||||
|
||||
bump = address_cast(r);
|
||||
remaining = size;
|
||||
}
|
||||
|
||||
/**
|
||||
* The last time we saw a low memory notification.
|
||||
*/
|
||||
std::atomic<uint64_t> last_low_memory_epoch = 0;
|
||||
std::atomic_flag lazy_decommit_guard;
|
||||
void lazy_decommit()
|
||||
{
|
||||
// If another thread is try to do lazy decommit, let it continue. If
|
||||
// we try to parallelise this, we'll most likely end up waiting on the
|
||||
// same page table locks.
|
||||
if (!lazy_decommit_guard.test_and_set())
|
||||
{
|
||||
return;
|
||||
}
|
||||
// When we hit low memory, iterate over size classes and decommit all of
|
||||
// the memory that we can. Start with the small size classes so that we
|
||||
// hit cached superslabs first.
|
||||
// FIXME: We probably shouldn't do this all at once.
|
||||
for (size_t large_class = 0; large_class < NUM_LARGE_CLASSES;
|
||||
large_class++)
|
||||
{
|
||||
if (!PAL::expensive_low_memory_check())
|
||||
{
|
||||
break;
|
||||
}
|
||||
size_t rsize = bits::one_at_bit(SUPERSLAB_BITS) << large_class;
|
||||
size_t decommit_size = rsize - OS_PAGE_SIZE;
|
||||
// Grab all of the chunks of this size class.
|
||||
auto* slab = large_stack[large_class].pop_all();
|
||||
while (slab)
|
||||
{
|
||||
// Decommit all except for the first page and then put it back on
|
||||
// the stack.
|
||||
if (slab->get_kind() != Decommitted)
|
||||
{
|
||||
PAL::notify_not_using(
|
||||
pointer_offset(slab, OS_PAGE_SIZE), decommit_size);
|
||||
}
|
||||
// Once we've removed these from the stack, there will be no
|
||||
// concurrent accesses and removal should have established a
|
||||
// happens-before relationship, so it's safe to use relaxed loads
|
||||
// here.
|
||||
auto next = slab->next.load(std::memory_order_relaxed);
|
||||
large_stack[large_class].push(new (slab) Decommittedslab());
|
||||
slab = next;
|
||||
}
|
||||
}
|
||||
lazy_decommit_guard.clear();
|
||||
}
|
||||
|
||||
public:
|
||||
/**
|
||||
* Stack of large allocations that have been returned for reuse.
|
||||
*/
|
||||
ModArray<NUM_LARGE_CLASSES, MPMCStack<Largeslab, PreZeroed>> large_stack;
|
||||
|
||||
/**
|
||||
* Primitive allocator for structure that are required before
|
||||
* the allocator can be running.
|
||||
*/
|
||||
template<typename T, size_t alignment, typename... Args>
|
||||
T* alloc_chunk(Args&&... args)
|
||||
{
|
||||
// Cache line align
|
||||
size_t size = bits::align_up(sizeof(T), 64);
|
||||
|
||||
void* p;
|
||||
{
|
||||
FlagLock f(lock);
|
||||
|
||||
auto aligned_bump = bits::align_up(bump, alignment);
|
||||
if ((aligned_bump - bump) > remaining)
|
||||
{
|
||||
new_block();
|
||||
}
|
||||
else
|
||||
{
|
||||
remaining -= aligned_bump - bump;
|
||||
bump = aligned_bump;
|
||||
}
|
||||
|
||||
if (remaining < size)
|
||||
{
|
||||
new_block();
|
||||
}
|
||||
|
||||
p = pointer_cast<void>(bump);
|
||||
bump += size;
|
||||
remaining -= size;
|
||||
}
|
||||
|
||||
auto page_start = bits::align_down(address_cast(p), OS_PAGE_SIZE);
|
||||
auto page_end = bits::align_up(address_cast(p) + size, OS_PAGE_SIZE);
|
||||
|
||||
PAL::template notify_using<NoZero>(
|
||||
pointer_cast<void>(page_start), page_end - page_start);
|
||||
|
||||
return new (p) T(std::forward<Args...>(args)...);
|
||||
}
|
||||
|
||||
/**
|
||||
* Query whether the PAL supports a specific feature.
|
||||
*/
|
||||
template<PalFeatures F>
|
||||
constexpr static bool pal_supports()
|
||||
{
|
||||
return (PAL::pal_features & F) == F;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of low memory notifications that have been received
|
||||
* (over the lifetime of this process). If the underlying system does not
|
||||
* support low memory notifications, this will return 0.
|
||||
*/
|
||||
ALWAYSINLINE
|
||||
uint64_t low_memory_epoch()
|
||||
{
|
||||
if constexpr (pal_supports<LowMemoryNotification>())
|
||||
{
|
||||
return PAL::low_memory_epoch();
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
template<bool committed>
|
||||
void* reserve(size_t* size, size_t align) noexcept
|
||||
{
|
||||
if constexpr (pal_supports<AlignedAllocation>())
|
||||
{
|
||||
return PAL::template reserve<committed>(size, align);
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t request = *size;
|
||||
// Add align, so we can guarantee to provide at least size.
|
||||
request += align;
|
||||
// Alignment must be a power of 2.
|
||||
assert(align == bits::next_pow2(align));
|
||||
|
||||
void* p = PAL::template reserve<committed>(&request);
|
||||
|
||||
*size = request;
|
||||
auto p0 = address_cast(p);
|
||||
auto start = bits::align_up(p0, align);
|
||||
|
||||
if (start > p0)
|
||||
{
|
||||
uintptr_t end = bits::align_down(p0 + request, align);
|
||||
*size = end - start;
|
||||
PAL::notify_not_using(p, start - p0);
|
||||
PAL::notify_not_using(pointer_cast<void>(end), (p0 + request) - end);
|
||||
p = pointer_cast<void>(start);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
}
|
||||
|
||||
ALWAYSINLINE void lazy_decommit_if_needed()
|
||||
{
|
||||
#ifdef TEST_LAZY_DECOMMIT
|
||||
static_assert(
|
||||
TEST_LAZY_DECOMMIT > 0,
|
||||
"TEST_LAZY_DECOMMIT must be a positive integer value.");
|
||||
static std::atomic<uint64_t> counter;
|
||||
auto c = counter++;
|
||||
if (c % TEST_LAZY_DECOMMIT == 0)
|
||||
{
|
||||
lazy_decommit();
|
||||
}
|
||||
#else
|
||||
if constexpr (decommit_strategy == DecommitSuperLazy)
|
||||
{
|
||||
auto new_epoch = low_memory_epoch();
|
||||
auto old_epoch = last_low_memory_epoch.load(std::memory_order_acquire);
|
||||
if (new_epoch > old_epoch)
|
||||
{
|
||||
// Try to update the epoch to the value that we've seen. If
|
||||
// another thread has seen a newer epoch than us (or done the same
|
||||
// update) let them win.
|
||||
do
|
||||
{
|
||||
if (last_low_memory_epoch.compare_exchange_strong(
|
||||
old_epoch, new_epoch))
|
||||
{
|
||||
lazy_decommit();
|
||||
}
|
||||
} while (old_epoch <= new_epoch);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
using Stats = AllocStats<NUM_SIZECLASSES, NUM_LARGE_CLASSES>;
|
||||
|
||||
enum AllowReserve
|
||||
{
|
||||
NoReserve,
|
||||
YesReserve
|
||||
};
|
||||
|
||||
template<class MemoryProvider>
|
||||
class LargeAlloc
|
||||
{
|
||||
void* reserved_start = nullptr;
|
||||
void* reserved_end = nullptr;
|
||||
|
||||
public:
|
||||
// This will be a zero-size structure if stats are not enabled.
|
||||
Stats stats;
|
||||
|
||||
MemoryProvider& memory_provider;
|
||||
|
||||
LargeAlloc(MemoryProvider& mp) : memory_provider(mp) {}
|
||||
|
||||
template<AllowReserve allow_reserve>
|
||||
bool reserve_memory(size_t need, size_t add)
|
||||
{
|
||||
if ((address_cast(reserved_start) + need) > address_cast(reserved_end))
|
||||
{
|
||||
if constexpr (allow_reserve == YesReserve)
|
||||
{
|
||||
stats.segment_create();
|
||||
reserved_start =
|
||||
memory_provider.template reserve<false>(&add, SUPERSLAB_SIZE);
|
||||
reserved_end = pointer_offset(reserved_start, add);
|
||||
reserved_start = pointer_cast<void>(
|
||||
bits::align_up(address_cast(reserved_start), SUPERSLAB_SIZE));
|
||||
|
||||
if (add < need)
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<ZeroMem zero_mem = NoZero, AllowReserve allow_reserve = YesReserve>
|
||||
void* alloc(size_t large_class, size_t size)
|
||||
{
|
||||
size_t rsize = bits::one_at_bit(SUPERSLAB_BITS) << large_class;
|
||||
if (size == 0)
|
||||
size = rsize;
|
||||
|
||||
void* p = memory_provider.large_stack[large_class].pop();
|
||||
memory_provider.lazy_decommit_if_needed();
|
||||
|
||||
if (p == nullptr)
|
||||
{
|
||||
assert(reserved_start <= reserved_end);
|
||||
size_t add;
|
||||
|
||||
if ((rsize + SUPERSLAB_SIZE) < RESERVE_SIZE)
|
||||
add = RESERVE_SIZE;
|
||||
else
|
||||
add = rsize + SUPERSLAB_SIZE;
|
||||
|
||||
if (!reserve_memory<allow_reserve>(rsize, add))
|
||||
return nullptr;
|
||||
|
||||
p = reserved_start;
|
||||
reserved_start = pointer_offset(p, rsize);
|
||||
|
||||
stats.superslab_fresh();
|
||||
// All memory is zeroed since it comes from reserved space.
|
||||
memory_provider.template notify_using<NoZero>(p, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
stats.superslab_pop();
|
||||
|
||||
if constexpr (decommit_strategy == DecommitSuperLazy)
|
||||
{
|
||||
if (static_cast<Baseslab*>(p)->get_kind() == Decommitted)
|
||||
{
|
||||
// The first page is already in "use" for the stack element,
|
||||
// this will need zeroing for a YesZero call.
|
||||
if constexpr (zero_mem == YesZero)
|
||||
memory_provider.template zero<true>(p, OS_PAGE_SIZE);
|
||||
|
||||
// Notify we are using the rest of the allocation.
|
||||
// Passing zero_mem ensures the PAL provides zeroed pages if
|
||||
// required.
|
||||
memory_provider.template notify_using<zero_mem>(
|
||||
pointer_offset(p, OS_PAGE_SIZE),
|
||||
bits::align_up(size, OS_PAGE_SIZE) - OS_PAGE_SIZE);
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (zero_mem == YesZero)
|
||||
memory_provider.template zero<true>(
|
||||
p, bits::align_up(size, OS_PAGE_SIZE));
|
||||
}
|
||||
}
|
||||
if ((decommit_strategy != DecommitNone) || (large_class > 0))
|
||||
{
|
||||
// The first page is already in "use" for the stack element,
|
||||
// this will need zeroing for a YesZero call.
|
||||
if constexpr (zero_mem == YesZero)
|
||||
memory_provider.template zero<true>(p, OS_PAGE_SIZE);
|
||||
|
||||
// Notify we are using the rest of the allocation.
|
||||
// Passing zero_mem ensures the PAL provides zeroed pages if required.
|
||||
memory_provider.template notify_using<zero_mem>(
|
||||
pointer_offset(p, OS_PAGE_SIZE),
|
||||
bits::align_up(size, OS_PAGE_SIZE) - OS_PAGE_SIZE);
|
||||
}
|
||||
else
|
||||
{
|
||||
// This is a superslab that has not been decommitted.
|
||||
if constexpr (zero_mem == YesZero)
|
||||
memory_provider.template zero<true>(
|
||||
p, bits::align_up(size, OS_PAGE_SIZE));
|
||||
}
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
void dealloc(void* p, size_t large_class)
|
||||
{
|
||||
stats.superslab_push();
|
||||
memory_provider.large_stack[large_class].push(static_cast<Largeslab*>(p));
|
||||
memory_provider.lazy_decommit_if_needed();
|
||||
}
|
||||
};
|
||||
|
||||
using GlobalVirtual = MemoryProviderStateMixin<Pal>;
|
||||
/**
|
||||
* The memory provider that will be used if no other provider is explicitly
|
||||
* passed as an argument.
|
||||
*/
|
||||
HEADER_GLOBAL GlobalVirtual default_memory_provider;
|
||||
} // namespace snmalloc
|
|
@ -1,132 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ds/dllist.h"
|
||||
#include "allocconfig.h"
|
||||
#include "allocslab.h"
|
||||
#include "sizeclass.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
class Mediumslab : public Allocslab
|
||||
{
|
||||
// This is the view of a 16 mb area when it is being used to allocate
|
||||
// medium sized classes: 64 kb to 16 mb, non-inclusive.
|
||||
private:
|
||||
friend DLList<Mediumslab>;
|
||||
|
||||
// Keep the allocator pointer on a separate cache line. It is read by
|
||||
// other threads, and does not change, so we avoid false sharing.
|
||||
alignas(CACHELINE_SIZE) Mediumslab* next;
|
||||
Mediumslab* prev;
|
||||
|
||||
uint16_t free;
|
||||
uint8_t head;
|
||||
uint8_t sizeclass;
|
||||
uint16_t stack[SLAB_COUNT - 1];
|
||||
|
||||
public:
|
||||
static constexpr uint32_t header_size()
|
||||
{
|
||||
static_assert(
|
||||
sizeof(Mediumslab) < OS_PAGE_SIZE,
|
||||
"Mediumslab header size must be less than the page size");
|
||||
static_assert(
|
||||
sizeof(Mediumslab) < SLAB_SIZE,
|
||||
"Mediumslab header size must be less than the slab size");
|
||||
|
||||
// Always use a full page as the header, in order to get page sized
|
||||
// alignment of individual allocations.
|
||||
return OS_PAGE_SIZE;
|
||||
}
|
||||
|
||||
static Mediumslab* get(void* p)
|
||||
{
|
||||
return pointer_cast<Mediumslab>(address_cast(p) & SUPERSLAB_MASK);
|
||||
}
|
||||
|
||||
void init(RemoteAllocator* alloc, uint8_t sc, size_t rsize)
|
||||
{
|
||||
assert(sc >= NUM_SMALL_CLASSES);
|
||||
assert((sc - NUM_SMALL_CLASSES) < NUM_MEDIUM_CLASSES);
|
||||
|
||||
allocator = alloc;
|
||||
head = 0;
|
||||
|
||||
// If this was previously a Mediumslab of the same sizeclass, don't
|
||||
// initialise the allocation stack.
|
||||
if ((kind != Medium) || (sizeclass != sc))
|
||||
{
|
||||
sizeclass = sc;
|
||||
uint16_t ssize = static_cast<uint16_t>(rsize >> 8);
|
||||
kind = Medium;
|
||||
free = medium_slab_free(sc);
|
||||
for (uint16_t i = free; i > 0; i--)
|
||||
stack[free - i] =
|
||||
static_cast<uint16_t>((SUPERSLAB_SIZE >> 8) - (i * ssize));
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(free == medium_slab_free(sc));
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t get_sizeclass()
|
||||
{
|
||||
return sizeclass;
|
||||
}
|
||||
|
||||
template<ZeroMem zero_mem, typename MemoryProvider>
|
||||
void* alloc(size_t size, MemoryProvider& memory_provider)
|
||||
{
|
||||
assert(!full());
|
||||
|
||||
uint16_t index = stack[head++];
|
||||
void* p = pointer_offset(this, (static_cast<size_t>(index) << 8));
|
||||
free--;
|
||||
|
||||
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, OS_PAGE_SIZE));
|
||||
size = bits::align_up(size, OS_PAGE_SIZE);
|
||||
|
||||
if constexpr (decommit_strategy == DecommitAll)
|
||||
memory_provider.template notify_using<zero_mem>(p, size);
|
||||
else if constexpr (zero_mem == YesZero)
|
||||
memory_provider.template zero<true>(p, size);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
template<typename MemoryProvider>
|
||||
bool dealloc(void* p, MemoryProvider& memory_provider)
|
||||
{
|
||||
assert(head > 0);
|
||||
|
||||
// Returns true if the Mediumslab was full before this deallocation.
|
||||
bool was_full = full();
|
||||
free++;
|
||||
stack[--head] = pointer_to_index(p);
|
||||
|
||||
if constexpr (decommit_strategy == DecommitAll)
|
||||
memory_provider.notify_not_using(p, sizeclass_to_size(sizeclass));
|
||||
|
||||
return was_full;
|
||||
}
|
||||
|
||||
bool full()
|
||||
{
|
||||
return free == 0;
|
||||
}
|
||||
|
||||
bool empty()
|
||||
{
|
||||
return head == 0;
|
||||
}
|
||||
|
||||
private:
|
||||
uint16_t pointer_to_index(void* p)
|
||||
{
|
||||
// Get the offset from the slab for a memory location.
|
||||
return static_cast<uint16_t>(
|
||||
((address_cast(p) - address_cast(this))) >> 8);
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,169 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ds/dllist.h"
|
||||
#include "../ds/helpers.h"
|
||||
#include "sizeclass.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
class Slab;
|
||||
|
||||
struct SlabLink
|
||||
{
|
||||
SlabLink* prev;
|
||||
SlabLink* next;
|
||||
|
||||
Slab* get_slab()
|
||||
{
|
||||
return pointer_cast<Slab>(address_cast(this) & SLAB_MASK);
|
||||
}
|
||||
};
|
||||
|
||||
using SlabList = DLList<SlabLink>;
|
||||
|
||||
static_assert(
|
||||
sizeof(SlabLink) <= MIN_ALLOC_SIZE,
|
||||
"Need to be able to pack a SlabLink into any free small alloc");
|
||||
|
||||
// The Metaslab represent the status of a single slab.
|
||||
// This can be either a short or a standard slab.
|
||||
class Metaslab
|
||||
{
|
||||
private:
|
||||
// How many entries are used in this slab.
|
||||
uint16_t used = 0;
|
||||
|
||||
public:
|
||||
// Bump free list of unused entries in this sizeclass.
|
||||
// If the bottom bit is 1, then this represents a bump_ptr
|
||||
// of where we have allocated up to in this slab. Otherwise,
|
||||
// it represents the location of the first block in the free
|
||||
// list. The free list is chained through deallocated blocks.
|
||||
// It is terminated with a bump ptr.
|
||||
//
|
||||
// Note that, the first entry in a slab is never bump allocated
|
||||
// but is used for the link. This means that 1 represents the fully
|
||||
// bump allocated slab.
|
||||
Mod<SLAB_SIZE, uint16_t> head;
|
||||
// When a slab has free space it will be on the has space list for
|
||||
// that size class. We use an empty block in this slab to be the
|
||||
// doubly linked node into that size class's free list.
|
||||
Mod<SLAB_SIZE, uint16_t> link;
|
||||
|
||||
uint8_t sizeclass;
|
||||
// Initially zero to encode the superslabs relative list of slabs.
|
||||
uint8_t next = 0;
|
||||
|
||||
void add_use()
|
||||
{
|
||||
used++;
|
||||
}
|
||||
|
||||
void sub_use()
|
||||
{
|
||||
used--;
|
||||
}
|
||||
|
||||
void set_unused()
|
||||
{
|
||||
used = 0;
|
||||
}
|
||||
|
||||
bool is_unused()
|
||||
{
|
||||
return used == 0;
|
||||
}
|
||||
|
||||
bool is_full()
|
||||
{
|
||||
return link == 1;
|
||||
}
|
||||
|
||||
void set_full()
|
||||
{
|
||||
assert(head == 1);
|
||||
assert(link != 1);
|
||||
link = 1;
|
||||
}
|
||||
|
||||
SlabLink* get_link(Slab* slab)
|
||||
{
|
||||
return reinterpret_cast<SlabLink*>(pointer_offset(slab, link));
|
||||
}
|
||||
|
||||
bool valid_head(bool is_short)
|
||||
{
|
||||
size_t size = sizeclass_to_size(sizeclass);
|
||||
size_t slab_start = get_initial_link(sizeclass, is_short);
|
||||
size_t all_high_bits = ~static_cast<size_t>(1);
|
||||
|
||||
size_t head_start =
|
||||
remove_cache_friendly_offset(head & all_high_bits, sizeclass);
|
||||
|
||||
return ((head_start - slab_start) % size) == 0;
|
||||
}
|
||||
|
||||
void debug_slab_invariant(bool is_short, Slab* slab)
|
||||
{
|
||||
#if !defined(NDEBUG) && !defined(SNMALLOC_CHEAP_CHECKS)
|
||||
size_t size = sizeclass_to_size(sizeclass);
|
||||
size_t offset = get_initial_link(sizeclass, is_short);
|
||||
|
||||
size_t accounted_for = used * size + offset;
|
||||
|
||||
if (is_full())
|
||||
{
|
||||
// All the blocks must be used.
|
||||
assert(SLAB_SIZE == accounted_for);
|
||||
// There is no free list to validate
|
||||
// 'link' value is not important if full.
|
||||
return;
|
||||
}
|
||||
// Block is not full
|
||||
assert(SLAB_SIZE > accounted_for);
|
||||
|
||||
// Walk bump-free-list-segment accounting for unused space
|
||||
uint16_t curr = head;
|
||||
while ((curr & 1) != 1)
|
||||
{
|
||||
// Check we are looking at a correctly aligned block
|
||||
uint16_t start = remove_cache_friendly_offset(curr, sizeclass);
|
||||
assert((start - offset) % size == 0);
|
||||
|
||||
// Account for free elements in free list
|
||||
accounted_for += size;
|
||||
assert(SLAB_SIZE >= accounted_for);
|
||||
// We should never reach the link node in the free list.
|
||||
assert(curr != link);
|
||||
|
||||
// Iterate bump/free list segment
|
||||
curr = *reinterpret_cast<uint16_t*>(pointer_offset(slab, curr));
|
||||
}
|
||||
|
||||
if (curr != 1)
|
||||
{
|
||||
// Check we terminated traversal on a correctly aligned block
|
||||
uint16_t start = remove_cache_friendly_offset(curr & ~1, sizeclass);
|
||||
assert((start - offset) % size == 0);
|
||||
|
||||
// Account for to be bump allocated space
|
||||
accounted_for += SLAB_SIZE - (curr - 1);
|
||||
|
||||
// The link should be the first allocation as we
|
||||
// haven't completely filled this block at any point.
|
||||
assert(link == get_initial_link(sizeclass, is_short));
|
||||
}
|
||||
|
||||
assert(!is_full());
|
||||
// Add the link node.
|
||||
accounted_for += size;
|
||||
|
||||
// All space accounted for
|
||||
assert(SLAB_SIZE == accounted_for);
|
||||
#else
|
||||
UNUSED(slab);
|
||||
UNUSED(is_short);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,360 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ds/bits.h"
|
||||
#include "../ds/helpers.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <utility>
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
static constexpr size_t PAGEMAP_NODE_BITS = 16;
|
||||
static constexpr size_t PAGEMAP_NODE_SIZE = 1ULL << PAGEMAP_NODE_BITS;
|
||||
|
||||
/**
|
||||
* Structure describing the configuration of a pagemap. When querying a
|
||||
* pagemap from a different instantiation of snmalloc, the pagemap is exposed
|
||||
* as a `void*`. This structure allows the caller to check whether the
|
||||
* pagemap is of the format that they expect.
|
||||
*/
|
||||
struct PagemapConfig
|
||||
{
|
||||
/**
|
||||
* The version of the pagemap structure. This is always 1 in existing
|
||||
* versions of snmalloc. This will be incremented every time the format
|
||||
* changes in an incompatible way. Changes to the format may add fields to
|
||||
* the end of this structure.
|
||||
*/
|
||||
uint32_t version;
|
||||
/**
|
||||
* Is this a flat pagemap? If this field is false, the pagemap is the
|
||||
* hierarchical structure.
|
||||
*/
|
||||
bool is_flat_pagemap;
|
||||
/**
|
||||
* Number of bytes in a pointer.
|
||||
*/
|
||||
uint8_t sizeof_pointer;
|
||||
/**
|
||||
* The number of bits of the address used to index into the pagemap.
|
||||
*/
|
||||
uint64_t pagemap_bits;
|
||||
/**
|
||||
* The size (in bytes) of a pagemap entry.
|
||||
*/
|
||||
size_t size_of_entry;
|
||||
};
|
||||
|
||||
template<size_t GRANULARITY_BITS, typename T, T default_content>
|
||||
class Pagemap
|
||||
{
|
||||
private:
|
||||
static constexpr size_t COVERED_BITS =
|
||||
bits::ADDRESS_BITS - GRANULARITY_BITS;
|
||||
static constexpr size_t CONTENT_BITS =
|
||||
bits::next_pow2_bits_const(sizeof(T));
|
||||
|
||||
static_assert(
|
||||
PAGEMAP_NODE_BITS - CONTENT_BITS < COVERED_BITS,
|
||||
"Should use the FlatPageMap as it does not require a tree");
|
||||
|
||||
static constexpr size_t BITS_FOR_LEAF = PAGEMAP_NODE_BITS - CONTENT_BITS;
|
||||
static constexpr size_t ENTRIES_PER_LEAF = 1 << BITS_FOR_LEAF;
|
||||
static constexpr size_t LEAF_MASK = ENTRIES_PER_LEAF - 1;
|
||||
|
||||
static constexpr size_t BITS_PER_INDEX_LEVEL =
|
||||
PAGEMAP_NODE_BITS - POINTER_BITS;
|
||||
static constexpr size_t ENTRIES_PER_INDEX_LEVEL = 1 << BITS_PER_INDEX_LEVEL;
|
||||
static constexpr size_t ENTRIES_MASK = ENTRIES_PER_INDEX_LEVEL - 1;
|
||||
|
||||
static constexpr size_t INDEX_BITS =
|
||||
BITS_FOR_LEAF > COVERED_BITS ? 0 : COVERED_BITS - BITS_FOR_LEAF;
|
||||
|
||||
static constexpr size_t INDEX_LEVELS = INDEX_BITS / BITS_PER_INDEX_LEVEL;
|
||||
static constexpr size_t TOPLEVEL_BITS =
|
||||
INDEX_BITS - (INDEX_LEVELS * BITS_PER_INDEX_LEVEL);
|
||||
static constexpr size_t TOPLEVEL_ENTRIES = 1 << TOPLEVEL_BITS;
|
||||
static constexpr size_t TOPLEVEL_SHIFT =
|
||||
(INDEX_LEVELS * BITS_PER_INDEX_LEVEL) + BITS_FOR_LEAF + GRANULARITY_BITS;
|
||||
|
||||
// Value used to represent when a node is being added too
|
||||
static constexpr InvalidPointer<1> LOCKED_ENTRY{};
|
||||
|
||||
struct Leaf
|
||||
{
|
||||
std::atomic<T> values[ENTRIES_PER_LEAF];
|
||||
};
|
||||
|
||||
struct PagemapEntry
|
||||
{
|
||||
std::atomic<PagemapEntry*> entries[ENTRIES_PER_INDEX_LEVEL];
|
||||
};
|
||||
|
||||
static_assert(
|
||||
sizeof(PagemapEntry) == sizeof(Leaf), "Should be the same size");
|
||||
|
||||
static_assert(
|
||||
sizeof(PagemapEntry) == PAGEMAP_NODE_SIZE, "Should be the same size");
|
||||
|
||||
// Init removed as not required as this is only ever a global
|
||||
// cl is generating a memset of zero, which will be a problem
|
||||
// in libc/ucrt bring up. On ucrt this will run after the first
|
||||
// allocation.
|
||||
// TODO: This is fragile that it is not being memset, and we should review
|
||||
// to ensure we don't get bitten by this in the future.
|
||||
std::atomic<PagemapEntry*> top[TOPLEVEL_ENTRIES]; // = {nullptr};
|
||||
|
||||
template<bool create_addr>
|
||||
inline PagemapEntry* get_node(std::atomic<PagemapEntry*>* e, bool& result)
|
||||
{
|
||||
// The page map nodes are all allocated directly from the OS zero
|
||||
// initialised with a system call. We don't need any ordered to guarantee
|
||||
// to see that correctly.
|
||||
PagemapEntry* value = e->load(std::memory_order_relaxed);
|
||||
|
||||
if ((value == nullptr) || (value == LOCKED_ENTRY))
|
||||
{
|
||||
if constexpr (create_addr)
|
||||
{
|
||||
value = nullptr;
|
||||
|
||||
if (e->compare_exchange_strong(
|
||||
value, LOCKED_ENTRY, std::memory_order_relaxed))
|
||||
{
|
||||
auto& v = default_memory_provider;
|
||||
value = v.alloc_chunk<PagemapEntry, OS_PAGE_SIZE>();
|
||||
e->store(value, std::memory_order_release);
|
||||
}
|
||||
else
|
||||
{
|
||||
while (address_cast(e->load(std::memory_order_relaxed)) ==
|
||||
LOCKED_ENTRY)
|
||||
{
|
||||
bits::pause();
|
||||
}
|
||||
value = e->load(std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
result = false;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
result = true;
|
||||
return value;
|
||||
}
|
||||
|
||||
template<bool create_addr>
|
||||
inline std::pair<Leaf*, size_t> get_leaf_index(uintptr_t addr, bool& result)
|
||||
{
|
||||
#ifdef FreeBSD_KERNEL
|
||||
// Zero the top 16 bits - kernel addresses all have them set, but the
|
||||
// data structure assumes that they're zero.
|
||||
addr &= 0xffffffffffffULL;
|
||||
#endif
|
||||
size_t ix = addr >> TOPLEVEL_SHIFT;
|
||||
size_t shift = TOPLEVEL_SHIFT;
|
||||
std::atomic<PagemapEntry*>* e = &top[ix];
|
||||
|
||||
for (size_t i = 0; i < INDEX_LEVELS; i++)
|
||||
{
|
||||
PagemapEntry* value = get_node<create_addr>(e, result);
|
||||
if (!result)
|
||||
return std::pair(nullptr, 0);
|
||||
|
||||
shift -= BITS_PER_INDEX_LEVEL;
|
||||
ix = (addr >> shift) & ENTRIES_MASK;
|
||||
e = &value->entries[ix];
|
||||
|
||||
if constexpr (INDEX_LEVELS == 1)
|
||||
{
|
||||
UNUSED(i);
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
|
||||
if (i == INDEX_LEVELS)
|
||||
break;
|
||||
}
|
||||
|
||||
Leaf* leaf = reinterpret_cast<Leaf*>(get_node<create_addr>(e, result));
|
||||
|
||||
if (!result)
|
||||
return std::pair(nullptr, 0);
|
||||
|
||||
shift -= BITS_FOR_LEAF;
|
||||
ix = (addr >> shift) & LEAF_MASK;
|
||||
return std::pair(leaf, ix);
|
||||
}
|
||||
|
||||
template<bool create_addr>
|
||||
inline std::atomic<T>* get_addr(uintptr_t p, bool& success)
|
||||
{
|
||||
auto leaf_ix = get_leaf_index<create_addr>(p, success);
|
||||
return &(leaf_ix.first->values[leaf_ix.second]);
|
||||
}
|
||||
|
||||
std::atomic<T>* get_ptr(uintptr_t p)
|
||||
{
|
||||
bool success;
|
||||
return get_addr<true>(p, success);
|
||||
}
|
||||
|
||||
public:
|
||||
/**
|
||||
* The pagemap configuration describing this instantiation of the template.
|
||||
*/
|
||||
static constexpr PagemapConfig config = {
|
||||
1, false, sizeof(uintptr_t), GRANULARITY_BITS, sizeof(T)};
|
||||
|
||||
/**
|
||||
* Cast a `void*` to a pointer to this template instantiation, given a
|
||||
* config describing the configuration. Return null if the configuration
|
||||
* passed does not correspond to this template instantiation.
|
||||
*
|
||||
* This intended to allow code that depends on the pagemap having a
|
||||
* specific representation to fail gracefully.
|
||||
*/
|
||||
static Pagemap* cast_to_pagemap(void* pm, const PagemapConfig* c)
|
||||
{
|
||||
if (
|
||||
(c->version != 1) || (c->is_flat_pagemap) ||
|
||||
(c->sizeof_pointer != sizeof(uintptr_t)) ||
|
||||
(c->pagemap_bits != GRANULARITY_BITS) ||
|
||||
(c->size_of_entry != sizeof(T)) || (!std::is_integral_v<T>))
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
return static_cast<Pagemap*>(pm);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of a pagemap entry within a given page. This is used
|
||||
* in code that propagates changes to the pagemap elsewhere.
|
||||
*/
|
||||
size_t index_for_address(uintptr_t p)
|
||||
{
|
||||
bool success;
|
||||
return (OS_PAGE_SIZE - 1) &
|
||||
reinterpret_cast<size_t>(get_addr<true>(p, success));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the address of the page containing
|
||||
*/
|
||||
void* page_for_address(uintptr_t p)
|
||||
{
|
||||
bool success;
|
||||
return reinterpret_cast<void*>(
|
||||
~(OS_PAGE_SIZE - 1) &
|
||||
reinterpret_cast<uintptr_t>(get_addr<true>(p, success)));
|
||||
}
|
||||
|
||||
T get(uintptr_t p)
|
||||
{
|
||||
bool success;
|
||||
auto addr = get_addr<false>(p, success);
|
||||
if (!success)
|
||||
return default_content;
|
||||
return addr->load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void set(uintptr_t p, T x)
|
||||
{
|
||||
bool success;
|
||||
auto addr = get_addr<true>(p, success);
|
||||
addr->store(x, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void set_range(uintptr_t p, T x, size_t length)
|
||||
{
|
||||
bool success;
|
||||
do
|
||||
{
|
||||
auto leaf_ix = get_leaf_index<true>(p, success);
|
||||
size_t ix = leaf_ix.second;
|
||||
|
||||
auto last = bits::min(LEAF_MASK + 1, ix + length);
|
||||
|
||||
auto diff = last - ix;
|
||||
|
||||
for (; ix < last; ix++)
|
||||
{
|
||||
SNMALLOC_ASSUME(leaf_ix.first != nullptr);
|
||||
leaf_ix.first->values[ix] = x;
|
||||
}
|
||||
|
||||
length = length - diff;
|
||||
p = p + (diff << GRANULARITY_BITS);
|
||||
} while (length > 0);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Simple pagemap that for each GRANULARITY_BITS of the address range
|
||||
* stores a T.
|
||||
**/
|
||||
template<size_t GRANULARITY_BITS, typename T>
|
||||
class FlatPagemap
|
||||
{
|
||||
private:
|
||||
static constexpr size_t COVERED_BITS =
|
||||
bits::ADDRESS_BITS - GRANULARITY_BITS;
|
||||
static constexpr size_t CONTENT_BITS =
|
||||
bits::next_pow2_bits_const(sizeof(T));
|
||||
static constexpr size_t ENTRIES = 1ULL << (COVERED_BITS + CONTENT_BITS);
|
||||
static constexpr size_t SHIFT = GRANULARITY_BITS;
|
||||
|
||||
std::atomic<T> top[ENTRIES];
|
||||
|
||||
public:
|
||||
/**
|
||||
* The pagemap configuration describing this instantiation of the template.
|
||||
*/
|
||||
static constexpr PagemapConfig config = {
|
||||
1, true, sizeof(uintptr_t), GRANULARITY_BITS, sizeof(T)};
|
||||
|
||||
/**
|
||||
* Cast a `void*` to a pointer to this template instantiation, given a
|
||||
* config describing the configuration. Return null if the configuration
|
||||
* passed does not correspond to this template instantiation.
|
||||
*
|
||||
* This intended to allow code that depends on the pagemap having a
|
||||
* specific representation to fail gracefully.
|
||||
*/
|
||||
static FlatPagemap* cast_to_pagemap(void* pm, const PagemapConfig* c)
|
||||
{
|
||||
if (
|
||||
(c->version != 1) || (!c->is_flat_pagemap) ||
|
||||
(c->sizeof_pointer != sizeof(uintptr_t)) ||
|
||||
(c->pagemap_bits != GRANULARITY_BITS) ||
|
||||
(c->size_of_entry != sizeof(T)) || (!std::is_integral_v<T>))
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
return static_cast<FlatPagemap*>(pm);
|
||||
}
|
||||
|
||||
T get(uintptr_t p)
|
||||
{
|
||||
return top[p >> SHIFT].load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void set(uintptr_t p, T x)
|
||||
{
|
||||
top[p >> SHIFT].store(x, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void set_range(uintptr_t p, T x, size_t length)
|
||||
{
|
||||
size_t index = p >> SHIFT;
|
||||
do
|
||||
{
|
||||
top[index].store(x, std::memory_order_relaxed);
|
||||
index++;
|
||||
length--;
|
||||
} while (length > 0);
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,98 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ds/flaglock.h"
|
||||
#include "../ds/mpmcstack.h"
|
||||
#include "pooled.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
/**
|
||||
* Pool of a particular type of object.
|
||||
*
|
||||
* This pool will never return objects to the OS. It maintains a list of all
|
||||
* objects ever allocated that can be iterated (not concurrency safe). Pooled
|
||||
* types can be acquired from the pool, and released back to the pool. This is
|
||||
* concurrency safe.
|
||||
*
|
||||
* This is used to bootstrap the allocation of allocators.
|
||||
**/
|
||||
template<class T, class MemoryProvider = GlobalVirtual>
|
||||
class Pool
|
||||
{
|
||||
private:
|
||||
friend Pooled<T>;
|
||||
template<typename TT>
|
||||
friend class MemoryProviderStateMixin;
|
||||
|
||||
std::atomic_flag lock = ATOMIC_FLAG_INIT;
|
||||
MPMCStack<T, PreZeroed> stack;
|
||||
T* list = nullptr;
|
||||
|
||||
Pool(MemoryProvider& m) : memory_provider(m) {}
|
||||
|
||||
public:
|
||||
MemoryProvider& memory_provider;
|
||||
|
||||
static Pool* make(MemoryProvider& memory_provider) noexcept
|
||||
{
|
||||
return memory_provider.template alloc_chunk<Pool, 0, MemoryProvider&>(
|
||||
memory_provider);
|
||||
}
|
||||
|
||||
static Pool* make() noexcept
|
||||
{
|
||||
return make(default_memory_provider);
|
||||
}
|
||||
|
||||
template<typename... Args>
|
||||
T* acquire(Args&&... args)
|
||||
{
|
||||
T* p = stack.pop();
|
||||
|
||||
if (p != nullptr)
|
||||
return p;
|
||||
|
||||
p = memory_provider
|
||||
.template alloc_chunk<T, bits::next_pow2_const(sizeof(T))>(
|
||||
std::forward<Args...>(args)...);
|
||||
|
||||
FlagLock f(lock);
|
||||
p->list_next = list;
|
||||
list = p;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
void release(T* p)
|
||||
{
|
||||
// The object's destructor is not run. If the object is "reallocated", it
|
||||
// is returned without the constructor being run, so the object is reused
|
||||
// without re-initialisation.
|
||||
stack.push(p);
|
||||
}
|
||||
|
||||
T* extract(T* p = nullptr)
|
||||
{
|
||||
// Returns a linked list of all objects in the stack, emptying the stack.
|
||||
if (p == nullptr)
|
||||
return stack.pop_all();
|
||||
|
||||
return p->next;
|
||||
}
|
||||
|
||||
void restore(T* first, T* last)
|
||||
{
|
||||
// Pushes a linked list of objects onto the stack. Use to put a linked
|
||||
// list returned by extract back onto the stack.
|
||||
stack.push(first, last);
|
||||
}
|
||||
|
||||
T* iterate(T* p = nullptr)
|
||||
{
|
||||
if (p == nullptr)
|
||||
return list;
|
||||
|
||||
return p->list_next;
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,21 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ds/bits.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
template<class T>
|
||||
class Pooled
|
||||
{
|
||||
private:
|
||||
template<class TT, class MemoryProvider>
|
||||
friend class Pool;
|
||||
template<class TT, Construction c>
|
||||
friend class MPMCStack;
|
||||
|
||||
/// Used by the pool for chaining together entries when not in use.
|
||||
std::atomic<T*> next = nullptr;
|
||||
/// Used by the pool to keep the list of all entries ever created.
|
||||
T* list_next;
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,50 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ds/mpscq.h"
|
||||
#include "../mem/allocconfig.h"
|
||||
#include "../mem/sizeclass.h"
|
||||
|
||||
#include <atomic>
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
struct Remote
|
||||
{
|
||||
using alloc_id_t = size_t;
|
||||
union
|
||||
{
|
||||
std::atomic<Remote*> next;
|
||||
Remote* non_atomic_next;
|
||||
};
|
||||
|
||||
alloc_id_t allocator_id;
|
||||
|
||||
void set_target_id(alloc_id_t id)
|
||||
{
|
||||
allocator_id = id;
|
||||
}
|
||||
|
||||
alloc_id_t target_id()
|
||||
{
|
||||
return allocator_id;
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(
|
||||
sizeof(Remote) <= MIN_ALLOC_SIZE,
|
||||
"Needs to be able to fit in smallest allocation.");
|
||||
|
||||
struct RemoteAllocator
|
||||
{
|
||||
using alloc_id_t = Remote::alloc_id_t;
|
||||
// Store the message queue on a separate cacheline. It is mutable data that
|
||||
// is read by other threads.
|
||||
alignas(CACHELINE_SIZE) MPSCQ<Remote> message_queue;
|
||||
|
||||
alloc_id_t id()
|
||||
{
|
||||
return static_cast<alloc_id_t>(
|
||||
reinterpret_cast<uintptr_t>(&message_queue));
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,171 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "allocconfig.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
constexpr static uint16_t get_initial_bumpptr(uint8_t sc, bool is_short);
|
||||
constexpr static uint16_t get_initial_link(uint8_t sc, bool is_short);
|
||||
constexpr static size_t sizeclass_to_size(uint8_t sizeclass);
|
||||
constexpr static size_t sizeclass_to_cache_friendly_mask(uint8_t sizeclass);
|
||||
constexpr static size_t sizeclass_to_inverse_cache_friendly_mask(uint8_t sc);
|
||||
constexpr static uint16_t medium_slab_free(uint8_t sizeclass);
|
||||
|
||||
static inline uint8_t size_to_sizeclass(size_t size)
|
||||
{
|
||||
// Don't use sizeclasses that are not a multiple of the alignment.
|
||||
// For example, 24 byte allocations can be
|
||||
// problematic for some data due to alignment issues.
|
||||
return static_cast<uint8_t>(
|
||||
bits::to_exp_mant<INTERMEDIATE_BITS, MIN_ALLOC_BITS>(size));
|
||||
}
|
||||
|
||||
constexpr static inline uint8_t size_to_sizeclass_const(size_t size)
|
||||
{
|
||||
// Don't use sizeclasses that are not a multiple of the alignment.
|
||||
// For example, 24 byte allocations can be
|
||||
// problematic for some data due to alignment issues.
|
||||
return static_cast<uint8_t>(
|
||||
bits::to_exp_mant_const<INTERMEDIATE_BITS, MIN_ALLOC_BITS>(size));
|
||||
}
|
||||
|
||||
constexpr static inline size_t large_sizeclass_to_size(uint8_t large_class)
|
||||
{
|
||||
return bits::one_at_bit(large_class + SUPERSLAB_BITS);
|
||||
}
|
||||
|
||||
// Small classes range from [MIN, SLAB], i.e. inclusive.
|
||||
static constexpr size_t NUM_SMALL_CLASSES =
|
||||
size_to_sizeclass_const(bits::one_at_bit(SLAB_BITS)) + 1;
|
||||
|
||||
static constexpr size_t NUM_SIZECLASSES =
|
||||
size_to_sizeclass_const(SUPERSLAB_SIZE);
|
||||
|
||||
// Medium classes range from (SLAB, SUPERSLAB), i.e. non-inclusive.
|
||||
static constexpr size_t NUM_MEDIUM_CLASSES =
|
||||
NUM_SIZECLASSES - NUM_SMALL_CLASSES;
|
||||
|
||||
// Large classes range from [SUPERSLAB, ADDRESS_SPACE).
|
||||
static constexpr size_t NUM_LARGE_CLASSES =
|
||||
bits::ADDRESS_BITS - SUPERSLAB_BITS;
|
||||
|
||||
inline static size_t round_by_sizeclass(size_t rsize, size_t offset)
|
||||
{
|
||||
// check_same<NUM_LARGE_CLASSES, Globals::num_large_classes>();
|
||||
// Must be called with a rounded size.
|
||||
assert(sizeclass_to_size(size_to_sizeclass(rsize)) == rsize);
|
||||
// Only works up to certain offsets, exhaustively tested upto
|
||||
// SUPERSLAB_SIZE.
|
||||
assert(offset <= SUPERSLAB_SIZE);
|
||||
|
||||
size_t align = bits::ctz(rsize);
|
||||
size_t divider = rsize >> align;
|
||||
// Maximum of 24 bits for 16MiB super/medium slab
|
||||
if (INTERMEDIATE_BITS == 0 || divider == 1)
|
||||
{
|
||||
assert(divider == 1);
|
||||
return offset & ~(rsize - 1);
|
||||
}
|
||||
|
||||
if constexpr (bits::is64() && INTERMEDIATE_BITS <= 2)
|
||||
{
|
||||
// Only works for 64 bit multiplication, as the following will overflow in
|
||||
// 32bit.
|
||||
// The code is using reciprocal division, with a shift of 26 bits, this
|
||||
// is considerably more bits than we need in the result. If SUPERSLABS
|
||||
// get larger then we should review this code.
|
||||
static_assert(SUPERSLAB_BITS <= 24, "The following code assumes 24 bits");
|
||||
static constexpr size_t shift = 26;
|
||||
size_t back_shift = shift + align;
|
||||
static constexpr size_t mul_shift = 1ULL << shift;
|
||||
static constexpr uint32_t constants[8] = {0,
|
||||
mul_shift,
|
||||
0,
|
||||
(mul_shift / 3) + 1,
|
||||
0,
|
||||
(mul_shift / 5) + 1,
|
||||
0,
|
||||
(mul_shift / 7) + 1};
|
||||
return ((constants[divider] * offset) >> back_shift) * rsize;
|
||||
}
|
||||
else
|
||||
// Use 32-bit division as considerably faster than 64-bit, and
|
||||
// everything fits into 32bits here.
|
||||
return static_cast<uint32_t>(offset / rsize) * rsize;
|
||||
}
|
||||
|
||||
inline static bool is_multiple_of_sizeclass(size_t rsize, size_t offset)
|
||||
{
|
||||
// Must be called with a rounded size.
|
||||
assert(sizeclass_to_size(size_to_sizeclass(rsize)) == rsize);
|
||||
// Only works up to certain offsets, exhaustively tested upto
|
||||
// SUPERSLAB_SIZE.
|
||||
assert(offset <= SUPERSLAB_SIZE);
|
||||
|
||||
size_t align = bits::ctz(rsize);
|
||||
size_t divider = rsize >> align;
|
||||
// Maximum of 24 bits for 16MiB super/medium slab
|
||||
if (INTERMEDIATE_BITS == 0 || divider == 1)
|
||||
{
|
||||
assert(divider == 1);
|
||||
return (offset & (rsize - 1)) == 0;
|
||||
}
|
||||
|
||||
if constexpr (bits::is64() && INTERMEDIATE_BITS <= 2)
|
||||
{
|
||||
// Only works for 64 bit multiplication, as the following will overflow in
|
||||
// 32bit.
|
||||
// The code is using reciprocal division, with a shift of 26 bits, this
|
||||
// is considerably more bits than we need in the result. If SUPERSLABS
|
||||
// get larger then we should review this code.
|
||||
static_assert(SUPERSLAB_BITS <= 24, "The following code assumes 24 bits");
|
||||
static constexpr size_t shift = 31;
|
||||
static constexpr size_t mul_shift = 1ULL << shift;
|
||||
static constexpr uint32_t constants[8] = {0,
|
||||
mul_shift,
|
||||
0,
|
||||
(mul_shift / 3) + 1,
|
||||
0,
|
||||
(mul_shift / 5) + 1,
|
||||
0,
|
||||
(mul_shift / 7) + 1};
|
||||
|
||||
// There is a long chain of zeros after the backshift
|
||||
// However, not all zero so just check a range.
|
||||
// This is exhaustively tested for the current use case
|
||||
return (((constants[divider] * offset)) &
|
||||
(((1ULL << (align + 3)) - 1) << (shift - 3))) == 0;
|
||||
}
|
||||
else
|
||||
// Use 32-bit division as considerably faster than 64-bit, and
|
||||
// everything fits into 32bits here.
|
||||
return static_cast<uint32_t>(offset % rsize) == 0;
|
||||
}
|
||||
|
||||
#ifdef CACHE_FRIENDLY_OFFSET
|
||||
inline static void* remove_cache_friendly_offset(void* p, uint8_t sizeclass)
|
||||
{
|
||||
size_t mask = sizeclass_to_inverse_cache_friendly_mask(sizeclass);
|
||||
return p = (void*)((uintptr_t)p & mask);
|
||||
}
|
||||
|
||||
inline static uint16_t
|
||||
remove_cache_friendly_offset(uint16_t relative, uint8_t sizeclass)
|
||||
{
|
||||
size_t mask = sizeclass_to_inverse_cache_friendly_mask(sizeclass);
|
||||
return relative & mask;
|
||||
}
|
||||
#else
|
||||
inline static void* remove_cache_friendly_offset(void* p, uint8_t sizeclass)
|
||||
{
|
||||
UNUSED(sizeclass);
|
||||
return p;
|
||||
}
|
||||
inline static uint16_t
|
||||
remove_cache_friendly_offset(uint16_t relative, uint8_t sizeclass)
|
||||
{
|
||||
UNUSED(sizeclass);
|
||||
return relative;
|
||||
}
|
||||
#endif
|
||||
} // namespace snmalloc
|
|
@ -1,115 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ds/helpers.h"
|
||||
#include "superslab.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
struct SizeClassTable
|
||||
{
|
||||
ModArray<NUM_SIZECLASSES, size_t> size;
|
||||
ModArray<NUM_SIZECLASSES, size_t> cache_friendly_mask;
|
||||
ModArray<NUM_SIZECLASSES, size_t> inverse_cache_friendly_mask;
|
||||
ModArray<NUM_SMALL_CLASSES, uint16_t> bump_ptr_start;
|
||||
ModArray<NUM_SMALL_CLASSES, uint16_t> short_bump_ptr_start;
|
||||
ModArray<NUM_SMALL_CLASSES, uint16_t> initial_link_ptr;
|
||||
ModArray<NUM_SMALL_CLASSES, uint16_t> short_initial_link_ptr;
|
||||
ModArray<NUM_MEDIUM_CLASSES, uint16_t> medium_slab_slots;
|
||||
|
||||
constexpr SizeClassTable()
|
||||
: size(),
|
||||
cache_friendly_mask(),
|
||||
inverse_cache_friendly_mask(),
|
||||
bump_ptr_start(),
|
||||
short_bump_ptr_start(),
|
||||
initial_link_ptr(),
|
||||
short_initial_link_ptr(),
|
||||
medium_slab_slots()
|
||||
{
|
||||
for (uint8_t sizeclass = 0; sizeclass < NUM_SIZECLASSES; sizeclass++)
|
||||
{
|
||||
size[sizeclass] =
|
||||
bits::from_exp_mant<INTERMEDIATE_BITS, MIN_ALLOC_BITS>(sizeclass);
|
||||
|
||||
size_t alignment = bits::min(
|
||||
bits::one_at_bit(bits::ctz_const(size[sizeclass])), OS_PAGE_SIZE);
|
||||
cache_friendly_mask[sizeclass] = (alignment - 1);
|
||||
inverse_cache_friendly_mask[sizeclass] = ~(alignment - 1);
|
||||
}
|
||||
|
||||
size_t header_size = sizeof(Superslab);
|
||||
size_t short_slab_size = SLAB_SIZE - header_size;
|
||||
|
||||
for (uint8_t i = 0; i < NUM_SMALL_CLASSES; i++)
|
||||
{
|
||||
// We align to the end of the block to remove special cases for the
|
||||
// short block. Calculate remainders
|
||||
size_t short_correction = short_slab_size % size[i];
|
||||
size_t correction = SLAB_SIZE % size[i];
|
||||
|
||||
// First element in the block is the link
|
||||
initial_link_ptr[i] = static_cast<uint16_t>(correction);
|
||||
short_initial_link_ptr[i] =
|
||||
static_cast<uint16_t>(header_size + short_correction);
|
||||
|
||||
// Move to object after link.
|
||||
auto short_after_link = short_initial_link_ptr[i] + size[i];
|
||||
size_t after_link = initial_link_ptr[i] + size[i];
|
||||
|
||||
// Bump ptr has bottom bit set.
|
||||
// In case we only have one object on this slab check for wrap around.
|
||||
short_bump_ptr_start[i] =
|
||||
static_cast<uint16_t>((short_after_link + 1) % SLAB_SIZE);
|
||||
bump_ptr_start[i] = static_cast<uint16_t>((after_link + 1) % SLAB_SIZE);
|
||||
}
|
||||
|
||||
for (uint8_t i = NUM_SMALL_CLASSES; i < NUM_SIZECLASSES; i++)
|
||||
{
|
||||
medium_slab_slots[i - NUM_SMALL_CLASSES] = static_cast<uint16_t>(
|
||||
(SUPERSLAB_SIZE - Mediumslab::header_size()) / size[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr SizeClassTable sizeclass_metadata = SizeClassTable();
|
||||
|
||||
static inline constexpr uint16_t
|
||||
get_initial_bumpptr(uint8_t sc, bool is_short)
|
||||
{
|
||||
if (is_short)
|
||||
return sizeclass_metadata.short_bump_ptr_start[sc];
|
||||
|
||||
return sizeclass_metadata.bump_ptr_start[sc];
|
||||
}
|
||||
|
||||
static inline constexpr uint16_t get_initial_link(uint8_t sc, bool is_short)
|
||||
{
|
||||
if (is_short)
|
||||
return sizeclass_metadata.short_initial_link_ptr[sc];
|
||||
|
||||
return sizeclass_metadata.initial_link_ptr[sc];
|
||||
}
|
||||
|
||||
constexpr static inline size_t sizeclass_to_size(uint8_t sizeclass)
|
||||
{
|
||||
return sizeclass_metadata.size[sizeclass];
|
||||
}
|
||||
|
||||
constexpr static inline size_t
|
||||
sizeclass_to_cache_friendly_mask(uint8_t sizeclass)
|
||||
{
|
||||
return sizeclass_metadata.cache_friendly_mask[sizeclass];
|
||||
}
|
||||
|
||||
constexpr static inline size_t
|
||||
sizeclass_to_inverse_cache_friendly_mask(uint8_t sizeclass)
|
||||
{
|
||||
return sizeclass_metadata.inverse_cache_friendly_mask[sizeclass];
|
||||
}
|
||||
|
||||
constexpr static inline uint16_t medium_slab_free(uint8_t sizeclass)
|
||||
{
|
||||
return sizeclass_metadata
|
||||
.medium_slab_slots[(sizeclass - NUM_SMALL_CLASSES)];
|
||||
}
|
||||
} // namespace snmalloc
|
|
@ -1,166 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "superslab.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
class Slab
|
||||
{
|
||||
private:
|
||||
uint16_t pointer_to_index(void* p)
|
||||
{
|
||||
// Get the offset from the slab for a memory location.
|
||||
return static_cast<uint16_t>(address_cast(p) - address_cast(this));
|
||||
}
|
||||
|
||||
public:
|
||||
static Slab* get(void* p)
|
||||
{
|
||||
return pointer_cast<Slab>(address_cast(p) & SLAB_MASK);
|
||||
}
|
||||
|
||||
Metaslab& get_meta()
|
||||
{
|
||||
Superslab* super = Superslab::get(this);
|
||||
return super->get_meta(this);
|
||||
}
|
||||
|
||||
SlabLink* get_link()
|
||||
{
|
||||
return get_meta().get_link(this);
|
||||
}
|
||||
|
||||
template<ZeroMem zero_mem, typename MemoryProvider>
|
||||
void* alloc(SlabList* sc, size_t rsize, MemoryProvider& memory_provider)
|
||||
{
|
||||
// Read the head from the metadata stored in the superslab.
|
||||
Metaslab& meta = get_meta();
|
||||
uint16_t head = meta.head;
|
||||
|
||||
assert(rsize == sizeclass_to_size(meta.sizeclass));
|
||||
meta.debug_slab_invariant(is_short(), this);
|
||||
assert(sc->get_head() == (SlabLink*)((size_t)this + meta.link));
|
||||
assert(!meta.is_full());
|
||||
|
||||
meta.add_use();
|
||||
|
||||
void* p;
|
||||
|
||||
if ((head & 1) == 0)
|
||||
{
|
||||
void* node = pointer_offset(this, head);
|
||||
|
||||
// Read the next slot from the memory that's about to be allocated.
|
||||
uint16_t next = *static_cast<uint16_t*>(node);
|
||||
meta.head = next;
|
||||
|
||||
p = remove_cache_friendly_offset(node, meta.sizeclass);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (meta.head == 1)
|
||||
{
|
||||
p = pointer_offset(this, meta.link);
|
||||
sc->pop();
|
||||
meta.set_full();
|
||||
}
|
||||
else
|
||||
{
|
||||
// This slab is being bump allocated.
|
||||
p = pointer_offset(this, head - 1);
|
||||
meta.head = (head + static_cast<uint16_t>(rsize)) & (SLAB_SIZE - 1);
|
||||
}
|
||||
}
|
||||
|
||||
meta.debug_slab_invariant(is_short(), this);
|
||||
|
||||
if constexpr (zero_mem == YesZero)
|
||||
{
|
||||
if (rsize < PAGE_ALIGNED_SIZE)
|
||||
memory_provider.zero(p, rsize);
|
||||
else
|
||||
memory_provider.template zero<true>(p, rsize);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
bool is_start_of_object(Superslab* super, void* p)
|
||||
{
|
||||
Metaslab& meta = super->get_meta(this);
|
||||
return is_multiple_of_sizeclass(
|
||||
sizeclass_to_size(meta.sizeclass),
|
||||
address_cast(this) + SLAB_SIZE - address_cast(p));
|
||||
}
|
||||
|
||||
// Returns true, if it alters get_status.
|
||||
template<typename MemoryProvider>
|
||||
inline typename Superslab::Action dealloc(
|
||||
SlabList* sc, Superslab* super, void* p, MemoryProvider& memory_provider)
|
||||
{
|
||||
Metaslab& meta = super->get_meta(this);
|
||||
|
||||
bool was_full = meta.is_full();
|
||||
meta.debug_slab_invariant(is_short(), this);
|
||||
meta.sub_use();
|
||||
|
||||
if (was_full)
|
||||
{
|
||||
// We are not on the sizeclass list.
|
||||
if (!meta.is_unused())
|
||||
{
|
||||
// Update the head and the sizeclass link.
|
||||
uint16_t index = pointer_to_index(p);
|
||||
assert(meta.head == 1);
|
||||
meta.link = index;
|
||||
|
||||
// Push on the list of slabs for this sizeclass.
|
||||
sc->insert(meta.get_link(this));
|
||||
meta.debug_slab_invariant(is_short(), this);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Dealloc on the superslab.
|
||||
if (is_short())
|
||||
return super->dealloc_short_slab(memory_provider);
|
||||
|
||||
return super->dealloc_slab(this, memory_provider);
|
||||
}
|
||||
}
|
||||
else if (meta.is_unused())
|
||||
{
|
||||
// Remove from the sizeclass list and dealloc on the superslab.
|
||||
sc->remove(meta.get_link(this));
|
||||
|
||||
if (is_short())
|
||||
return super->dealloc_short_slab(memory_provider);
|
||||
|
||||
return super->dealloc_slab(this, memory_provider);
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
sc->debug_check_contains(meta.get_link(this));
|
||||
#endif
|
||||
|
||||
// Update the head and the next pointer in the free list.
|
||||
uint16_t head = meta.head;
|
||||
uint16_t current = pointer_to_index(p);
|
||||
|
||||
// Set the head to the memory being deallocated.
|
||||
meta.head = current;
|
||||
assert(meta.valid_head(is_short()));
|
||||
|
||||
// Set the next pointer to the previous head.
|
||||
*static_cast<uint16_t*>(p) = head;
|
||||
meta.debug_slab_invariant(is_short(), this);
|
||||
}
|
||||
return Superslab::NoSlabReturn;
|
||||
}
|
||||
|
||||
bool is_short()
|
||||
{
|
||||
return (address_cast(this) & SUPERSLAB_MASK) == address_cast(this);
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,68 +0,0 @@
|
|||
#include "globalalloc.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
/**
|
||||
* RAII wrapper around an `Alloc`. This class gets an allocator from the
|
||||
* global pool and wraps it so that `Alloc` methods can be called
|
||||
* directly via the `->` operator on this class. When this object is
|
||||
* destroyed, it returns the allocator to the global pool.
|
||||
*
|
||||
* This does not depend on thread-local storage working, so can be used for
|
||||
* bootstrapping.
|
||||
*/
|
||||
struct SlowAllocator
|
||||
{
|
||||
/**
|
||||
* The allocator that this wrapper will use.
|
||||
*/
|
||||
Alloc* alloc;
|
||||
/**
|
||||
* Constructor. Claims an allocator from the global pool
|
||||
*/
|
||||
SlowAllocator() : alloc(current_alloc_pool()->acquire()) {}
|
||||
/**
|
||||
* Copying is not supported, it could easily lead to accidental sharing of
|
||||
* allocators.
|
||||
*/
|
||||
SlowAllocator(const SlowAllocator&) = delete;
|
||||
/**
|
||||
* Moving is not supported, though it would be easy to add if there's a use
|
||||
* case for it.
|
||||
*/
|
||||
SlowAllocator(SlowAllocator&&) = delete;
|
||||
/**
|
||||
* Copying is not supported, it could easily lead to accidental sharing of
|
||||
* allocators.
|
||||
*/
|
||||
SlowAllocator& operator=(const SlowAllocator&) = delete;
|
||||
/**
|
||||
* Moving is not supported, though it would be easy to add if there's a use
|
||||
* case for it.
|
||||
*/
|
||||
SlowAllocator& operator=(SlowAllocator&&) = delete;
|
||||
/**
|
||||
* Destructor. Returns the allocator to the pool.
|
||||
*/
|
||||
~SlowAllocator()
|
||||
{
|
||||
current_alloc_pool()->release(alloc);
|
||||
}
|
||||
/**
|
||||
* Arrow operator, allows methods exposed by `Alloc` to be called on the
|
||||
* wrapper.
|
||||
*/
|
||||
Alloc* operator->()
|
||||
{
|
||||
return alloc;
|
||||
}
|
||||
};
|
||||
/**
|
||||
* Returns a new slow allocator. When the `SlowAllocator` goes out of scope,
|
||||
* the underlying `Alloc` will be returned to the pool.
|
||||
*/
|
||||
inline SlowAllocator get_slow_allocator()
|
||||
{
|
||||
return SlowAllocator{};
|
||||
}
|
||||
} // namespace snmalloc
|
|
@ -1,245 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ds/helpers.h"
|
||||
#include "allocslab.h"
|
||||
#include "metaslab.h"
|
||||
|
||||
#include <new>
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
class Superslab : public Allocslab
|
||||
{
|
||||
// This is the view of a 16 mb superslab when it is being used to allocate
|
||||
// 64 kb slabs.
|
||||
private:
|
||||
friend DLList<Superslab>;
|
||||
|
||||
// Keep the allocator pointer on a separate cache line. It is read by
|
||||
// other threads, and does not change, so we avoid false sharing.
|
||||
alignas(CACHELINE_SIZE)
|
||||
// The superslab is kept on a doubly linked list of superslabs which
|
||||
// have some space.
|
||||
Superslab* next;
|
||||
Superslab* prev;
|
||||
|
||||
// This is a reference to the first unused slab in the free slab list
|
||||
// It is does not contain the short slab, which is handled using a bit
|
||||
// in the "used" field below. The list is terminated by pointing to
|
||||
// the short slab.
|
||||
// The head linked list has an absolute pointer for head, but the next
|
||||
// pointers stores in the metaslabs are relative pointers, that is they
|
||||
// are the relative offset to the next entry minus 1. This means that
|
||||
// all zeros is a list that chains through all the blocks, so the zero
|
||||
// initialised memory requires no more work.
|
||||
Mod<SLAB_COUNT, uint8_t> head;
|
||||
|
||||
// Represents twice the number of full size slabs used
|
||||
// plus 1 for the short slab. i.e. using 3 slabs and the
|
||||
// short slab would be 6 + 1 = 7
|
||||
uint16_t used;
|
||||
|
||||
ModArray<SLAB_COUNT, Metaslab> meta;
|
||||
|
||||
// Used size_t as results in better code in MSVC
|
||||
size_t slab_to_index(Slab* slab)
|
||||
{
|
||||
auto res = ((address_cast(slab) - address_cast(this)) >> SLAB_BITS);
|
||||
assert(res == (uint8_t)res);
|
||||
return res;
|
||||
}
|
||||
|
||||
public:
|
||||
enum Status
|
||||
{
|
||||
Full,
|
||||
Available,
|
||||
OnlyShortSlabAvailable,
|
||||
Empty
|
||||
};
|
||||
|
||||
enum Action
|
||||
{
|
||||
NoSlabReturn = 0,
|
||||
NoStatusChange = 1,
|
||||
StatusChange = 2
|
||||
};
|
||||
|
||||
static Superslab* get(void* p)
|
||||
{
|
||||
return pointer_cast<Superslab>(address_cast(p) & SUPERSLAB_MASK);
|
||||
}
|
||||
|
||||
static bool is_short_sizeclass(uint8_t sizeclass)
|
||||
{
|
||||
constexpr uint8_t h = size_to_sizeclass_const(sizeof(Superslab));
|
||||
return sizeclass <= h;
|
||||
}
|
||||
|
||||
void init(RemoteAllocator* alloc)
|
||||
{
|
||||
allocator = alloc;
|
||||
|
||||
if (kind != Super)
|
||||
{
|
||||
// If this wasn't previously a Superslab, we need to set up the
|
||||
// header.
|
||||
kind = Super;
|
||||
// Point head at the first non-short slab.
|
||||
head = 1;
|
||||
|
||||
if (kind != Fresh)
|
||||
{
|
||||
// If this wasn't previously Fresh, we need to zero some things.
|
||||
used = 0;
|
||||
for (size_t i = 0; i < SLAB_COUNT; i++)
|
||||
{
|
||||
new (&(meta[i])) Metaslab();
|
||||
}
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
auto curr = head;
|
||||
for (size_t i = 0; i < SLAB_COUNT - used - 1; i++)
|
||||
{
|
||||
curr = (curr + meta[curr].next + 1) & (SLAB_COUNT - 1);
|
||||
}
|
||||
assert(curr == 0);
|
||||
|
||||
for (size_t i = 0; i < SLAB_COUNT; i++)
|
||||
{
|
||||
assert(meta[i].is_unused());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
bool is_empty()
|
||||
{
|
||||
return used == 0;
|
||||
}
|
||||
|
||||
bool is_full()
|
||||
{
|
||||
return (used == (((SLAB_COUNT - 1) << 1) + 1));
|
||||
}
|
||||
|
||||
bool is_almost_full()
|
||||
{
|
||||
return (used >= ((SLAB_COUNT - 1) << 1));
|
||||
}
|
||||
|
||||
Status get_status()
|
||||
{
|
||||
if (!is_almost_full())
|
||||
{
|
||||
if (!is_empty())
|
||||
{
|
||||
return Available;
|
||||
}
|
||||
|
||||
return Empty;
|
||||
}
|
||||
|
||||
if (!is_full())
|
||||
{
|
||||
return OnlyShortSlabAvailable;
|
||||
}
|
||||
|
||||
return Full;
|
||||
}
|
||||
|
||||
Metaslab& get_meta(Slab* slab)
|
||||
{
|
||||
return meta[slab_to_index(slab)];
|
||||
}
|
||||
|
||||
template<typename MemoryProvider>
|
||||
Slab* alloc_short_slab(uint8_t sizeclass, MemoryProvider& memory_provider)
|
||||
{
|
||||
if ((used & 1) == 1)
|
||||
return alloc_slab(sizeclass, memory_provider);
|
||||
|
||||
meta[0].head = get_initial_bumpptr(sizeclass, true);
|
||||
meta[0].sizeclass = sizeclass;
|
||||
meta[0].link = get_initial_link(sizeclass, true);
|
||||
|
||||
if constexpr (decommit_strategy == DecommitAll)
|
||||
{
|
||||
memory_provider.template notify_using<NoZero>(
|
||||
pointer_offset(this, OS_PAGE_SIZE), SLAB_SIZE - OS_PAGE_SIZE);
|
||||
}
|
||||
|
||||
used++;
|
||||
return (Slab*)this;
|
||||
}
|
||||
|
||||
template<typename MemoryProvider>
|
||||
Slab* alloc_slab(uint8_t sizeclass, MemoryProvider& memory_provider)
|
||||
{
|
||||
uint8_t h = head;
|
||||
Slab* slab = pointer_cast<Slab>(
|
||||
address_cast(this) + (static_cast<size_t>(h) << SLAB_BITS));
|
||||
|
||||
uint8_t n = meta[h].next;
|
||||
|
||||
meta[h].head = get_initial_bumpptr(sizeclass, false);
|
||||
meta[h].sizeclass = sizeclass;
|
||||
meta[h].link = get_initial_link(sizeclass, false);
|
||||
|
||||
head = h + n + 1;
|
||||
used += 2;
|
||||
|
||||
if constexpr (decommit_strategy == DecommitAll)
|
||||
{
|
||||
memory_provider.template notify_using<NoZero>(slab, SLAB_SIZE);
|
||||
}
|
||||
|
||||
return slab;
|
||||
}
|
||||
|
||||
// Returns true, if this alters the value of get_status
|
||||
template<typename MemoryProvider>
|
||||
Action dealloc_slab(Slab* slab, MemoryProvider& memory_provider)
|
||||
{
|
||||
// This is not the short slab.
|
||||
uint8_t index = static_cast<uint8_t>(slab_to_index(slab));
|
||||
uint8_t n = head - index - 1;
|
||||
|
||||
meta[index].sizeclass = 0;
|
||||
meta[index].next = n;
|
||||
head = index;
|
||||
bool was_almost_full = is_almost_full();
|
||||
used -= 2;
|
||||
|
||||
if constexpr (decommit_strategy == DecommitAll)
|
||||
memory_provider.notify_not_using(slab, SLAB_SIZE);
|
||||
|
||||
assert(meta[index].is_unused());
|
||||
if (was_almost_full || is_empty())
|
||||
return StatusChange;
|
||||
|
||||
return NoStatusChange;
|
||||
}
|
||||
|
||||
// Returns true, if this alters the value of get_status
|
||||
template<typename MemoryProvider>
|
||||
Action dealloc_short_slab(MemoryProvider& memory_provider)
|
||||
{
|
||||
// This is the short slab.
|
||||
if constexpr (decommit_strategy == DecommitAll)
|
||||
{
|
||||
memory_provider.notify_not_using(
|
||||
pointer_offset(this, OS_PAGE_SIZE), SLAB_SIZE - OS_PAGE_SIZE);
|
||||
}
|
||||
|
||||
bool was_full = is_full();
|
||||
used--;
|
||||
|
||||
assert(meta[0].is_unused());
|
||||
if (was_full || is_empty())
|
||||
return StatusChange;
|
||||
|
||||
return NoStatusChange;
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,313 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ds/helpers.h"
|
||||
#include "globalalloc.h"
|
||||
#if defined(SNMALLOC_USE_THREAD_DESTRUCTOR) && \
|
||||
defined(SNMALLOC_USE_THREAD_CLEANUP)
|
||||
#error At most one out of SNMALLOC_USE_THREAD_CLEANUP and SNMALLOC_USE_THREAD_DESTRUCTOR may be defined.
|
||||
#endif
|
||||
|
||||
#if !defined(_WIN32) && !defined(FreeBSD_KERNEL)
|
||||
# include "pthread.h"
|
||||
#endif
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
extern "C" void _malloc_thread_cleanup(void);
|
||||
|
||||
#ifdef SNMALLOC_EXTERNAL_THREAD_ALLOC
|
||||
/**
|
||||
* Version of the `ThreadAlloc` interface that does no management of thread
|
||||
* local state, and just assumes that "ThreadAllocUntyped::get" has been
|
||||
* declared before including snmalloc.h. As it is included before, it cannot
|
||||
* know the allocator type, hence the casting.
|
||||
*
|
||||
* This class is used only when snmalloc is compiled as part of a runtime,
|
||||
* which has its own management of the thread local allocator pointer.
|
||||
*/
|
||||
class ThreadAllocUntypedWrapper
|
||||
{
|
||||
public:
|
||||
static inline Alloc*& get()
|
||||
{
|
||||
return (Alloc*&)ThreadAllocUntyped::get();
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Version of the `ThreadAlloc` interface that uses a hook provided by libc
|
||||
* to destroy thread-local state. This is the ideal option, because it
|
||||
* enforces ordering of destruction such that the malloc state is destroyed
|
||||
* after anything that can allocate memory.
|
||||
*
|
||||
* This class is used only when snmalloc is compiled as part of a compatible
|
||||
* libc (for example, FreeBSD libc).
|
||||
*/
|
||||
class ThreadAllocLibcCleanup
|
||||
{
|
||||
/**
|
||||
* Libc will call `_malloc_thread_cleanup` just before a thread terminates.
|
||||
* This function must be allowed to call back into this class to destroy
|
||||
* the state.
|
||||
*/
|
||||
friend void _malloc_thread_cleanup(void);
|
||||
|
||||
/**
|
||||
* Function called when the thread exits. This is guaranteed to be called
|
||||
* precisely once per thread and releases the current allocator.
|
||||
*/
|
||||
static inline void exit()
|
||||
{
|
||||
if (auto* per_thread = get(false))
|
||||
{
|
||||
current_alloc_pool()->release(per_thread);
|
||||
per_thread = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
/**
|
||||
* Returns a pointer to the allocator associated with this thread. If
|
||||
* `create` is true, it will create an allocator if one does not exist,
|
||||
* otherwise it will return `nullptr` in this case. This should be called
|
||||
* with `create == false` only during thread teardown.
|
||||
*
|
||||
* The non-create case exists so that the `per_thread` variable can be a
|
||||
* local static and not a global, allowing ODR to deduplicate it.
|
||||
*/
|
||||
static inline Alloc*& get(bool create = true)
|
||||
{
|
||||
static thread_local Alloc* per_thread;
|
||||
if (!per_thread && create)
|
||||
{
|
||||
per_thread = current_alloc_pool()->acquire();
|
||||
}
|
||||
return per_thread;
|
||||
}
|
||||
};
|
||||
/**
|
||||
* Version of the `ThreadAlloc` interface that uses C++ `thread_local`
|
||||
* destructors for cleanup. If a per-thread allocator is used during the
|
||||
* destruction of other per-thread data, this class will create a new
|
||||
* instance and register its destructor, so should eventually result in
|
||||
* cleanup, but may result in allocators being returned to the global pool
|
||||
* and then reacquired multiple times.
|
||||
*
|
||||
* This implementation depends on nothing outside of a working C++
|
||||
* environment and so should be the simplest for initial bringup on an
|
||||
* unsupported platform. It is currently used in the FreeBSD kernel version.
|
||||
*/
|
||||
class ThreadAllocThreadDestructor
|
||||
{
|
||||
/**
|
||||
* A pointer to the allocator owned by this thread.
|
||||
*/
|
||||
Alloc* alloc;
|
||||
|
||||
/**
|
||||
* Constructor. Acquires a new allocator and associates it with this
|
||||
* object. There should be only one instance of this class per thread.
|
||||
*/
|
||||
ThreadAllocThreadDestructor() : alloc(current_alloc_pool()->acquire()) {}
|
||||
|
||||
/**
|
||||
* Destructor. Releases the allocator owned by this thread.
|
||||
*/
|
||||
~ThreadAllocThreadDestructor()
|
||||
{
|
||||
current_alloc_pool()->release(alloc);
|
||||
}
|
||||
|
||||
public:
|
||||
/**
|
||||
* Public interface, returns the allocator for this thread, constructing
|
||||
* one if necessary.
|
||||
*/
|
||||
static inline Alloc*& get()
|
||||
{
|
||||
static thread_local ThreadAllocThreadDestructor per_thread;
|
||||
return per_thread.alloc;
|
||||
}
|
||||
};
|
||||
// When targeting the FreeBSD kernel, the pthread header exists, but the
|
||||
// pthread symbols do not, so don't compile this because it will fail to
|
||||
// link.
|
||||
#ifndef FreeBSD_KERNEL
|
||||
/**
|
||||
* Version of the `ThreadAlloc` interface that uses thread-specific (POSIX
|
||||
* threads) or Fiber-local (Windows) storage with an explicit destructor.
|
||||
* Neither of the underlying mechanisms guarantee ordering, so the cleanup
|
||||
* may be called before other cleanup functions or thread-local destructors.
|
||||
*
|
||||
* This implementation is used when using snmalloc as a library
|
||||
* implementation of malloc, but not embedding it in C standard library.
|
||||
* Using this implementation removes the dependency on a C++ runtime library.
|
||||
*/
|
||||
class ThreadAllocExplicitTLSCleanup
|
||||
{
|
||||
/**
|
||||
* Cleanup function. This is registered with the operating system's
|
||||
* thread- or fibre-local storage subsystem to clean up the per-thread
|
||||
* allocator.
|
||||
*/
|
||||
static inline void
|
||||
# ifdef _WIN32
|
||||
NTAPI
|
||||
# endif
|
||||
thread_alloc_release(void* p)
|
||||
{
|
||||
Alloc** pp = static_cast<Alloc**>(p);
|
||||
current_alloc_pool()->release(*pp);
|
||||
*pp = nullptr;
|
||||
}
|
||||
|
||||
# ifdef _WIN32
|
||||
/**
|
||||
* Key type used to identify fibre-local storage.
|
||||
*/
|
||||
using tls_key_t = DWORD;
|
||||
|
||||
/**
|
||||
* On Windows, construct a new fibre-local storage allocation. This
|
||||
* function must not be called more than once.
|
||||
*/
|
||||
static inline tls_key_t tls_key_create() noexcept
|
||||
{
|
||||
return FlsAlloc(thread_alloc_release);
|
||||
}
|
||||
|
||||
/**
|
||||
* On Windows, store a pointer to a `thread_local` pointer to an allocator
|
||||
* into fibre-local storage. This function takes a pointer to the
|
||||
* `thread_local` allocation, rather than to the pointee, so that the
|
||||
* cleanup function can zero the pointer.
|
||||
*
|
||||
* This must not be called until after `tls_key_create` has returned.
|
||||
*/
|
||||
static inline void tls_set_value(tls_key_t key, Alloc** value)
|
||||
{
|
||||
FlsSetValue(key, static_cast<void*>(value));
|
||||
}
|
||||
# else
|
||||
/**
|
||||
* Key type used for thread-specific storage.
|
||||
*/
|
||||
using tls_key_t = pthread_key_t;
|
||||
|
||||
/**
|
||||
* On POSIX systems, construct a new thread-specific storage allocation.
|
||||
* This function must not be called more than once.
|
||||
*/
|
||||
static inline tls_key_t tls_key_create() noexcept
|
||||
{
|
||||
tls_key_t key;
|
||||
pthread_key_create(&key, thread_alloc_release);
|
||||
return key;
|
||||
}
|
||||
|
||||
/**
|
||||
* On POSIX systems, store a pointer to a `thread_local` pointer to an
|
||||
* allocator into fibre-local storage. This function takes a pointer to
|
||||
* the `thread_local` allocation, rather than to the pointee, so that the
|
||||
* cleanup function can zero the pointer.
|
||||
*
|
||||
* This must not be called until after `tls_key_create` has returned.
|
||||
*/
|
||||
static inline void tls_set_value(tls_key_t key, Alloc** value)
|
||||
{
|
||||
pthread_setspecific(key, static_cast<void*>(value));
|
||||
}
|
||||
# endif
|
||||
|
||||
/**
|
||||
* Private accessor to the per thread allocator
|
||||
* Provides no checking for initialization
|
||||
*/
|
||||
static ALWAYSINLINE Alloc*& inner_get()
|
||||
{
|
||||
static thread_local Alloc* per_thread;
|
||||
return per_thread;
|
||||
}
|
||||
|
||||
# ifdef USE_SNMALLOC_STATS
|
||||
static void print_stats()
|
||||
{
|
||||
Stats s;
|
||||
current_alloc_pool()->aggregate_stats(s);
|
||||
s.print<Alloc>(std::cout);
|
||||
}
|
||||
# endif
|
||||
|
||||
/**
|
||||
* Private initialiser for the per thread allocator
|
||||
*/
|
||||
static NOINLINE Alloc*& inner_init()
|
||||
{
|
||||
Alloc*& per_thread = inner_get();
|
||||
|
||||
// If we don't have an allocator, construct one.
|
||||
if (!per_thread)
|
||||
{
|
||||
// Construct the allocator and assign it to `per_thread` *before* doing
|
||||
// anything else. This is important because `tls_key_create` may
|
||||
// allocate memory and if we are providing the `malloc` implementation
|
||||
// then this function must be re-entrant within a single thread. In
|
||||
// this case, the second call to this function will simply return the
|
||||
// allocator.
|
||||
per_thread = current_alloc_pool()->acquire();
|
||||
|
||||
bool first = false;
|
||||
tls_key_t key = Singleton<tls_key_t, tls_key_create>::get(&first);
|
||||
// Associate the new allocator with the destructor.
|
||||
tls_set_value(key, &per_thread);
|
||||
|
||||
# ifdef USE_SNMALLOC_STATS
|
||||
// Allocator is up and running now, safe to call atexit.
|
||||
if (first)
|
||||
{
|
||||
atexit(print_stats);
|
||||
}
|
||||
# else
|
||||
UNUSED(first);
|
||||
# endif
|
||||
}
|
||||
return per_thread;
|
||||
}
|
||||
|
||||
public:
|
||||
/**
|
||||
* Public interface, returns the allocator for the current thread,
|
||||
* constructing it if necessary.
|
||||
*/
|
||||
static ALWAYSINLINE Alloc*& get()
|
||||
{
|
||||
Alloc*& per_thread = inner_get();
|
||||
|
||||
if (per_thread != nullptr)
|
||||
return per_thread;
|
||||
|
||||
// Slow path that performs initialization
|
||||
return inner_init();
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef SNMALLOC_USE_THREAD_CLEANUP
|
||||
/**
|
||||
* Entry point the allows libc to call into the allocator for per-thread
|
||||
* cleanup.
|
||||
*/
|
||||
extern "C" void _malloc_thread_cleanup(void)
|
||||
{
|
||||
ThreadAllocLibcCleanup::exit();
|
||||
}
|
||||
using ThreadAlloc = ThreadAllocLibcCleanup;
|
||||
#elif defined(SNMALLOC_USE_THREAD_DESTRUCTOR)
|
||||
using ThreadAlloc = ThreadAllocThreadDestructor;
|
||||
#elif defined(SNMALLOC_EXTERNAL_THREAD_ALLOC)
|
||||
using ThreadAlloc = ThreadAllocUntypedWrapper;
|
||||
#else
|
||||
using ThreadAlloc = ThreadAllocExplicitTLSCleanup;
|
||||
#endif
|
||||
} // namespace snmalloc
|
|
@ -1,269 +0,0 @@
|
|||
#include "../mem/slowalloc.h"
|
||||
#include "../snmalloc.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
|
||||
using namespace snmalloc;
|
||||
|
||||
#ifndef SNMALLOC_EXPORT
|
||||
# define SNMALLOC_EXPORT
|
||||
#endif
|
||||
|
||||
#ifndef SNMALLOC_NAME_MANGLE
|
||||
# define SNMALLOC_NAME_MANGLE(a) a
|
||||
#endif
|
||||
|
||||
extern "C"
|
||||
{
|
||||
SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(__malloc_end_pointer)(void* ptr)
|
||||
{
|
||||
return Alloc::external_pointer<OnePastEnd>(ptr);
|
||||
}
|
||||
|
||||
SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(malloc)(size_t size)
|
||||
{
|
||||
// Include size 0 in the first sizeclass.
|
||||
size = ((size - 1) >> (bits::BITS - 1)) + size;
|
||||
|
||||
return ThreadAlloc::get()->alloc(size);
|
||||
}
|
||||
|
||||
SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(free)(void* ptr)
|
||||
{
|
||||
if (ptr == nullptr)
|
||||
return;
|
||||
|
||||
ThreadAlloc::get()->dealloc(ptr);
|
||||
}
|
||||
|
||||
SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(calloc)(size_t nmemb, size_t size)
|
||||
{
|
||||
bool overflow = false;
|
||||
size_t sz = bits::umul(size, nmemb, overflow);
|
||||
if (overflow)
|
||||
{
|
||||
errno = ENOMEM;
|
||||
return nullptr;
|
||||
}
|
||||
// Include size 0 in the first sizeclass.
|
||||
sz = ((sz - 1) >> (bits::BITS - 1)) + sz;
|
||||
return ThreadAlloc::get()->alloc<ZeroMem::YesZero>(sz);
|
||||
}
|
||||
|
||||
SNMALLOC_EXPORT size_t SNMALLOC_NAME_MANGLE(malloc_usable_size)(void* ptr)
|
||||
{
|
||||
return Alloc::alloc_size(ptr);
|
||||
}
|
||||
|
||||
SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(realloc)(void* ptr, size_t size)
|
||||
{
|
||||
if (size == (size_t)-1)
|
||||
{
|
||||
errno = ENOMEM;
|
||||
return nullptr;
|
||||
}
|
||||
if (ptr == nullptr)
|
||||
{
|
||||
return SNMALLOC_NAME_MANGLE(malloc)(size);
|
||||
}
|
||||
if (size == 0)
|
||||
{
|
||||
SNMALLOC_NAME_MANGLE(free)(ptr);
|
||||
return nullptr;
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
// This check is redundant, because the check in memcpy will fail if this
|
||||
// is skipped, but it's useful for debugging.
|
||||
if (Alloc::external_pointer<Start>(ptr) != ptr)
|
||||
{
|
||||
error(
|
||||
"Calling realloc on pointer that is not to the start of an allocation");
|
||||
}
|
||||
#endif
|
||||
size_t sz = Alloc::alloc_size(ptr);
|
||||
// Keep the current allocation if the given size is in the same sizeclass.
|
||||
if (sz == sizeclass_to_size(size_to_sizeclass(size)))
|
||||
return ptr;
|
||||
|
||||
void* p = SNMALLOC_NAME_MANGLE(malloc)(size);
|
||||
if (p != nullptr)
|
||||
{
|
||||
assert(p == Alloc::external_pointer<Start>(p));
|
||||
sz = bits::min(size, sz);
|
||||
memcpy(p, ptr, sz);
|
||||
SNMALLOC_NAME_MANGLE(free)(ptr);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
#ifndef __FreeBSD__
|
||||
SNMALLOC_EXPORT void*
|
||||
SNMALLOC_NAME_MANGLE(reallocarray)(void* ptr, size_t nmemb, size_t size)
|
||||
{
|
||||
bool overflow = false;
|
||||
size_t sz = bits::umul(size, nmemb, overflow);
|
||||
if (overflow)
|
||||
{
|
||||
errno = ENOMEM;
|
||||
return nullptr;
|
||||
}
|
||||
return SNMALLOC_NAME_MANGLE(realloc)(ptr, sz);
|
||||
}
|
||||
#endif
|
||||
|
||||
SNMALLOC_EXPORT void*
|
||||
SNMALLOC_NAME_MANGLE(aligned_alloc)(size_t alignment, size_t size)
|
||||
{
|
||||
assert((size % alignment) == 0);
|
||||
(void)alignment;
|
||||
return SNMALLOC_NAME_MANGLE(malloc)(size);
|
||||
}
|
||||
|
||||
SNMALLOC_EXPORT void*
|
||||
SNMALLOC_NAME_MANGLE(memalign)(size_t alignment, size_t size)
|
||||
{
|
||||
if (
|
||||
(alignment == 0) || (alignment == size_t(-1)) ||
|
||||
(alignment > SUPERSLAB_SIZE))
|
||||
{
|
||||
errno = EINVAL;
|
||||
return nullptr;
|
||||
}
|
||||
if ((size + alignment) < size)
|
||||
{
|
||||
errno = ENOMEM;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
size = bits::max(size, alignment);
|
||||
uint8_t sc = size_to_sizeclass(size);
|
||||
if (sc >= NUM_SIZECLASSES)
|
||||
{
|
||||
// large allocs are 16M aligned.
|
||||
return SNMALLOC_NAME_MANGLE(malloc)(size);
|
||||
}
|
||||
for (; sc < NUM_SIZECLASSES; sc++)
|
||||
{
|
||||
size = sizeclass_to_size(sc);
|
||||
if ((size & (~size + 1)) >= alignment)
|
||||
{
|
||||
return SNMALLOC_NAME_MANGLE(aligned_alloc)(alignment, size);
|
||||
}
|
||||
}
|
||||
return SNMALLOC_NAME_MANGLE(malloc)(SUPERSLAB_SIZE);
|
||||
}
|
||||
|
||||
SNMALLOC_EXPORT int SNMALLOC_NAME_MANGLE(posix_memalign)(
|
||||
void** memptr, size_t alignment, size_t size)
|
||||
{
|
||||
if (
|
||||
((alignment % sizeof(uintptr_t)) != 0) ||
|
||||
((alignment & (alignment - 1)) != 0) || (alignment == 0))
|
||||
{
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
void* p = SNMALLOC_NAME_MANGLE(memalign)(alignment, size);
|
||||
if (p == nullptr)
|
||||
{
|
||||
return ENOMEM;
|
||||
}
|
||||
*memptr = p;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifndef __FreeBSD__
|
||||
SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(valloc)(size_t size)
|
||||
{
|
||||
return SNMALLOC_NAME_MANGLE(memalign)(OS_PAGE_SIZE, size);
|
||||
}
|
||||
#endif
|
||||
|
||||
SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(pvalloc)(size_t size)
|
||||
{
|
||||
if (size == size_t(-1))
|
||||
{
|
||||
errno = ENOMEM;
|
||||
return nullptr;
|
||||
}
|
||||
return SNMALLOC_NAME_MANGLE(memalign)(
|
||||
OS_PAGE_SIZE, (size + OS_PAGE_SIZE - 1) & ~(OS_PAGE_SIZE - 1));
|
||||
}
|
||||
|
||||
// Stub implementations for jemalloc compatibility.
|
||||
// These are called by FreeBSD's libthr (pthreads) to notify malloc of
|
||||
// various events. They are currently unused, though we may wish to reset
|
||||
// statistics on fork if built with statistics.
|
||||
|
||||
SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(_malloc_prefork)(void) {}
|
||||
SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(_malloc_postfork)(void) {}
|
||||
SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(_malloc_first_thread)(void) {}
|
||||
|
||||
SNMALLOC_EXPORT int
|
||||
SNMALLOC_NAME_MANGLE(mallctl)(const char*, void*, size_t*, void*, size_t)
|
||||
{
|
||||
return ENOENT;
|
||||
}
|
||||
|
||||
#ifdef SNMALLOC_EXPOSE_PAGEMAP
|
||||
/**
|
||||
* Export the pagemap. The return value is a pointer to the pagemap
|
||||
* structure. The argument is used to return a pointer to a `PagemapConfig`
|
||||
* structure describing the type of the pagemap. Static methods on the
|
||||
* concrete pagemap templates can then be used to safely cast the return from
|
||||
* this function to the correct type. This allows us to preserve some
|
||||
* semblance of ABI safety via a pure C API.
|
||||
*/
|
||||
SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(snmalloc_pagemap_global_get)(
|
||||
PagemapConfig const** config)
|
||||
{
|
||||
if (config)
|
||||
{
|
||||
*config = &decltype(snmalloc::global_pagemap)::config;
|
||||
assert(
|
||||
decltype(snmalloc::global_pagemap)::cast_to_pagemap(
|
||||
&snmalloc::global_pagemap, *config) == &snmalloc::global_pagemap);
|
||||
}
|
||||
return &snmalloc::global_pagemap;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SNMALLOC_EXPOSE_RESERVE
|
||||
SNMALLOC_EXPORT void*
|
||||
SNMALLOC_NAME_MANGLE(snmalloc_reserve_shared)(size_t* size, size_t align)
|
||||
{
|
||||
return snmalloc::default_memory_provider.reserve<true>(size, align);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !defined(__PIC__) && !defined(NO_BOOTSTRAP_ALLOCATOR)
|
||||
// The following functions are required to work before TLS is set up, in
|
||||
// statically-linked programs. These temporarily grab an allocator from the
|
||||
// pool and return it.
|
||||
|
||||
void* __je_bootstrap_malloc(size_t size)
|
||||
{
|
||||
return get_slow_allocator()->alloc(size);
|
||||
}
|
||||
|
||||
void* __je_bootstrap_calloc(size_t nmemb, size_t size)
|
||||
{
|
||||
bool overflow = false;
|
||||
size_t sz = bits::umul(size, nmemb, overflow);
|
||||
if (overflow)
|
||||
{
|
||||
errno = ENOMEM;
|
||||
return nullptr;
|
||||
}
|
||||
// Include size 0 in the first sizeclass.
|
||||
sz = ((sz - 1) >> (bits::BITS - 1)) + sz;
|
||||
return get_slow_allocator()->alloc<ZeroMem::YesZero>(sz);
|
||||
}
|
||||
|
||||
void __je_bootstrap_free(void* ptr)
|
||||
{
|
||||
get_slow_allocator()->dealloc(ptr);
|
||||
}
|
||||
#endif
|
||||
}
|
|
@ -1,67 +0,0 @@
|
|||
#include "../mem/alloc.h"
|
||||
#include "../mem/threadalloc.h"
|
||||
#include "../snmalloc.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
# define EXCEPTSPEC
|
||||
#else
|
||||
# ifdef _GLIBCXX_USE_NOEXCEPT
|
||||
# define EXCEPTSPEC _GLIBCXX_USE_NOEXCEPT
|
||||
# elif defined(_NOEXCEPT)
|
||||
# define EXCEPTSPEC _NOEXCEPT
|
||||
# else
|
||||
# define EXCEPTSPEC
|
||||
# endif
|
||||
#endif
|
||||
|
||||
using namespace snmalloc;
|
||||
|
||||
void* operator new(size_t size)
|
||||
{
|
||||
return ThreadAlloc::get()->alloc(size);
|
||||
}
|
||||
|
||||
void* operator new[](size_t size)
|
||||
{
|
||||
return ThreadAlloc::get()->alloc(size);
|
||||
}
|
||||
|
||||
void* operator new(size_t size, std::nothrow_t&)
|
||||
{
|
||||
return ThreadAlloc::get()->alloc(size);
|
||||
}
|
||||
|
||||
void* operator new[](size_t size, std::nothrow_t&)
|
||||
{
|
||||
return ThreadAlloc::get()->alloc(size);
|
||||
}
|
||||
|
||||
void operator delete(void* p)EXCEPTSPEC
|
||||
{
|
||||
ThreadAlloc::get()->dealloc(p);
|
||||
}
|
||||
|
||||
void operator delete(void* p, size_t size)EXCEPTSPEC
|
||||
{
|
||||
ThreadAlloc::get()->dealloc(p, size);
|
||||
}
|
||||
|
||||
void operator delete(void* p, std::nothrow_t&)
|
||||
{
|
||||
ThreadAlloc::get()->dealloc(p);
|
||||
}
|
||||
|
||||
void operator delete[](void* p) EXCEPTSPEC
|
||||
{
|
||||
ThreadAlloc::get()->dealloc(p);
|
||||
}
|
||||
|
||||
void operator delete[](void* p, size_t size) EXCEPTSPEC
|
||||
{
|
||||
ThreadAlloc::get()->dealloc(p, size);
|
||||
}
|
||||
|
||||
void operator delete[](void* p, std::nothrow_t&)
|
||||
{
|
||||
ThreadAlloc::get()->dealloc(p);
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "pal_consts.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
void error(const char* const str);
|
||||
} // namespace snmalloc
|
||||
|
||||
// If simultating OE, then we need the underlying platform
|
||||
#if !defined(OPEN_ENCLAVE) || defined(OPEN_ENCLAVE_SIMULATION)
|
||||
# include "pal_apple.h"
|
||||
# include "pal_free_bsd_kernel.h"
|
||||
# include "pal_freebsd.h"
|
||||
# include "pal_linux.h"
|
||||
# include "pal_windows.h"
|
||||
#endif
|
||||
#include "pal_open_enclave.h"
|
||||
#include "pal_plain.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
#if !defined(OPEN_ENCLAVE) || defined(OPEN_ENCLAVE_SIMULATION)
|
||||
using DefaultPal =
|
||||
# if defined(_WIN32)
|
||||
PALWindows;
|
||||
# elif defined(__APPLE__)
|
||||
PALApple;
|
||||
# elif defined(__linux__)
|
||||
PALLinux;
|
||||
# elif defined(FreeBSD_KERNEL)
|
||||
PALFreeBSDKernel;
|
||||
# elif defined(__FreeBSD__)
|
||||
PALFBSD;
|
||||
# else
|
||||
# error Unsupported platform
|
||||
# endif
|
||||
#endif
|
||||
|
||||
using Pal =
|
||||
#if defined(SNMALLOC_MEMORY_PROVIDER)
|
||||
PALPlainMixin<SNMALLOC_MEMORY_PROVIDER>;
|
||||
#elif defined(OPEN_ENCLAVE)
|
||||
PALPlainMixin<PALOpenEnclave>;
|
||||
#else
|
||||
DefaultPal;
|
||||
#endif
|
||||
|
||||
inline void error(const char* const str)
|
||||
{
|
||||
Pal::error(str);
|
||||
}
|
||||
} // namespace snmalloc
|
|
@ -1,89 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef __APPLE__
|
||||
# include "../ds/bits.h"
|
||||
# include "../mem/allocconfig.h"
|
||||
|
||||
# include <pthread.h>
|
||||
# include <strings.h>
|
||||
# include <sys/mman.h>
|
||||
|
||||
extern "C" int puts(const char* str);
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
/**
|
||||
* PAL implementation for Apple systems (macOS, iOS, watchOS, tvOS...).
|
||||
*/
|
||||
class PALApple
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Bitmap of PalFeatures flags indicating the optional features that this
|
||||
* PAL supports.
|
||||
*/
|
||||
static constexpr uint64_t pal_features = 0;
|
||||
static void error(const char* const str)
|
||||
{
|
||||
puts(str);
|
||||
abort();
|
||||
}
|
||||
|
||||
/// Notify platform that we will not be using these pages
|
||||
void notify_not_using(void* p, size_t size) noexcept
|
||||
{
|
||||
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, size));
|
||||
madvise(p, size, MADV_FREE);
|
||||
}
|
||||
|
||||
/// Notify platform that we will be using these pages
|
||||
template<ZeroMem zero_mem>
|
||||
void notify_using(void* p, size_t size) noexcept
|
||||
{
|
||||
assert(
|
||||
bits::is_aligned_block<OS_PAGE_SIZE>(p, size) || (zero_mem == NoZero));
|
||||
if constexpr (zero_mem == YesZero)
|
||||
zero(p, size);
|
||||
}
|
||||
|
||||
/// OS specific function for zeroing memory
|
||||
template<bool page_aligned = false>
|
||||
void zero(void* p, size_t size) noexcept
|
||||
{
|
||||
if (page_aligned || bits::is_aligned_block<OS_PAGE_SIZE>(p, size))
|
||||
{
|
||||
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, size));
|
||||
void* r = mmap(
|
||||
p,
|
||||
size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
|
||||
-1,
|
||||
0);
|
||||
|
||||
if (r != MAP_FAILED)
|
||||
return;
|
||||
}
|
||||
|
||||
bzero(p, size);
|
||||
}
|
||||
|
||||
template<bool committed>
|
||||
void* reserve(size_t* size) noexcept
|
||||
{
|
||||
void* p = mmap(
|
||||
NULL,
|
||||
*size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS,
|
||||
-1,
|
||||
0);
|
||||
|
||||
if (p == MAP_FAILED)
|
||||
error("Out of memory");
|
||||
|
||||
return p;
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
|
@ -1,45 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
/**
|
||||
* Flags in a bitfield of optional features that a PAL may support. These
|
||||
* should be set in the PAL's `pal_features` static constexpr field.
|
||||
*/
|
||||
enum PalFeatures : uint64_t
|
||||
{
|
||||
/**
|
||||
* This PAL supports low memory notifications. It must implement a
|
||||
* `low_memory_epoch` method that returns a `uint64_t` of the number of
|
||||
* times that a low-memory notification has been raised and an
|
||||
* `expensive_low_memory_check()` method that returns a `bool` indicating
|
||||
* whether low memory conditions are still in effect.
|
||||
*/
|
||||
LowMemoryNotification = (1 << 0),
|
||||
/**
|
||||
* This PAL natively supports allocation with a guaranteed alignment. If
|
||||
* this is not supported, then we will over-allocate and round the
|
||||
* allocation.
|
||||
*
|
||||
* A PAL that does supports this must expose a `request()` method that takes
|
||||
* a size and alignment. A PAL that does *not* support it must expose a
|
||||
* `request()` method that takes only a size.
|
||||
*/
|
||||
AlignedAllocation = (1 << 1)
|
||||
};
|
||||
/**
|
||||
* Flag indicating whether requested memory should be zeroed.
|
||||
*/
|
||||
enum ZeroMem
|
||||
{
|
||||
/**
|
||||
* Memory should not be zeroed, contents are undefined.
|
||||
*/
|
||||
NoZero,
|
||||
/**
|
||||
* Memory must be zeroed. This can be lazily allocated via a copy-on-write
|
||||
* mechanism as long as any load from the memory returns zero.
|
||||
*/
|
||||
YesZero
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,95 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ds/bits.h"
|
||||
#include "../mem/allocconfig.h"
|
||||
|
||||
#if defined(FreeBSD_KERNEL)
|
||||
extern "C"
|
||||
{
|
||||
# include <sys/vmem.h>
|
||||
# include <vm/vm.h>
|
||||
# include <vm/vm_extern.h>
|
||||
# include <vm/vm_kern.h>
|
||||
# include <vm/vm_object.h>
|
||||
# include <vm/vm_param.h>
|
||||
}
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
class PALFreeBSDKernel
|
||||
{
|
||||
vm_offset_t get_vm_offset(uint_ptr_t p)
|
||||
{
|
||||
return static_cast<vm_offset_t>(reinterpret_cast<uintptr_t>(p));
|
||||
}
|
||||
|
||||
public:
|
||||
/**
|
||||
* Bitmap of PalFeatures flags indicating the optional features that this
|
||||
* PAL supports.
|
||||
*/
|
||||
static constexpr uint64_t pal_features = AlignedAllocation;
|
||||
void error(const char* const str)
|
||||
{
|
||||
panic("snmalloc error: %s", str);
|
||||
}
|
||||
|
||||
/// Notify platform that we will not be using these pages
|
||||
void notify_not_using(void* p, size_t size)
|
||||
{
|
||||
vm_offset_t addr = get_vm_offset(p);
|
||||
kmem_unback(kernel_object, addr, size);
|
||||
}
|
||||
|
||||
/// Notify platform that we will be using these pages
|
||||
template<ZeroMem zero_mem>
|
||||
void notify_using(void* p, size_t size)
|
||||
{
|
||||
vm_offset_t addr = get_vm_offset(p);
|
||||
int flags = M_WAITOK | ((zero_mem == YesZero) ? M_ZERO : 0);
|
||||
if (kmem_back(kernel_object, addr, size, flags) != KERN_SUCCESS)
|
||||
{
|
||||
error("Out of memory");
|
||||
}
|
||||
}
|
||||
|
||||
/// OS specific function for zeroing memory
|
||||
template<bool page_aligned = false>
|
||||
void zero(void* p, size_t size)
|
||||
{
|
||||
::bzero(p, size);
|
||||
}
|
||||
|
||||
template<bool committed>
|
||||
void* reserve(size_t* size, size_t align)
|
||||
{
|
||||
size_t request = *size;
|
||||
vm_offset_t addr;
|
||||
if (vmem_xalloc(
|
||||
kernel_arena,
|
||||
request,
|
||||
align,
|
||||
0,
|
||||
0,
|
||||
VMEM_ADDR_MIN,
|
||||
VMEM_ADDR_MAX,
|
||||
M_BESTFIT,
|
||||
&addr))
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
if (committed)
|
||||
{
|
||||
if (
|
||||
kmem_back(kernel_object, addr, request, M_ZERO | M_WAITOK) !=
|
||||
KERN_SUCCESS)
|
||||
{
|
||||
vmem_xfree(kernel_arena, addr, request);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
return get_vm_offset(addr);
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
|
@ -1,95 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if defined(__FreeBSD__) && !defined(_KERNEL)
|
||||
# include "../ds/bits.h"
|
||||
# include "../mem/allocconfig.h"
|
||||
|
||||
# include <stdio.h>
|
||||
# include <strings.h>
|
||||
# include <sys/mman.h>
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
class PALFBSD
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Bitmap of PalFeatures flags indicating the optional features that this
|
||||
* PAL supports.
|
||||
*/
|
||||
static constexpr uint64_t pal_features = AlignedAllocation;
|
||||
static void error(const char* const str)
|
||||
{
|
||||
puts(str);
|
||||
abort();
|
||||
}
|
||||
|
||||
/// Notify platform that we will not be using these pages
|
||||
void notify_not_using(void* p, size_t size) noexcept
|
||||
{
|
||||
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, size));
|
||||
madvise(p, size, MADV_FREE);
|
||||
}
|
||||
|
||||
/// Notify platform that we will be using these pages
|
||||
template<ZeroMem zero_mem>
|
||||
void notify_using(void* p, size_t size) noexcept
|
||||
{
|
||||
assert(
|
||||
bits::is_aligned_block<OS_PAGE_SIZE>(p, size) || (zero_mem == NoZero));
|
||||
if constexpr (zero_mem == YesZero)
|
||||
zero(p, size);
|
||||
}
|
||||
|
||||
/// OS specific function for zeroing memory
|
||||
template<bool page_aligned = false>
|
||||
void zero(void* p, size_t size) noexcept
|
||||
{
|
||||
if (page_aligned || bits::is_aligned_block<OS_PAGE_SIZE>(p, size))
|
||||
{
|
||||
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, size));
|
||||
void* r = mmap(
|
||||
p,
|
||||
size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
|
||||
-1,
|
||||
0);
|
||||
|
||||
if (r != MAP_FAILED)
|
||||
return;
|
||||
}
|
||||
|
||||
bzero(p, size);
|
||||
}
|
||||
|
||||
template<bool committed>
|
||||
void* reserve(const size_t* size, size_t align) noexcept
|
||||
{
|
||||
size_t request = *size;
|
||||
// Alignment must be a power of 2.
|
||||
assert(align == bits::next_pow2(align));
|
||||
|
||||
if (align == 0)
|
||||
{
|
||||
align = 1;
|
||||
}
|
||||
|
||||
size_t log2align = bits::next_pow2_bits(align);
|
||||
|
||||
void* p = mmap(
|
||||
nullptr,
|
||||
request,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED(log2align),
|
||||
-1,
|
||||
0);
|
||||
|
||||
if (p == MAP_FAILED)
|
||||
error("Out of memory");
|
||||
|
||||
return p;
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
||||
#endif
|
|
@ -1,87 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if defined(__linux__)
|
||||
# include "../ds/bits.h"
|
||||
# include "../mem/allocconfig.h"
|
||||
|
||||
# include <string.h>
|
||||
# include <sys/mman.h>
|
||||
|
||||
extern "C" int puts(const char* str);
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
class PALLinux
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Bitmap of PalFeatures flags indicating the optional features that this
|
||||
* PAL supports.
|
||||
*/
|
||||
static constexpr uint64_t pal_features = 0;
|
||||
static void error(const char* const str)
|
||||
{
|
||||
puts(str);
|
||||
abort();
|
||||
}
|
||||
|
||||
/// Notify platform that we will not be using these pages
|
||||
void notify_not_using(void* p, size_t size) noexcept
|
||||
{
|
||||
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, size));
|
||||
// Do nothing. Don't call madvise here, as the system call slows the
|
||||
// allocator down too much.
|
||||
UNUSED(p);
|
||||
UNUSED(size);
|
||||
}
|
||||
|
||||
/// Notify platform that we will be using these pages
|
||||
template<ZeroMem zero_mem>
|
||||
void notify_using(void* p, size_t size) noexcept
|
||||
{
|
||||
assert(
|
||||
bits::is_aligned_block<OS_PAGE_SIZE>(p, size) || (zero_mem == NoZero));
|
||||
|
||||
if constexpr (zero_mem == YesZero)
|
||||
zero<true>(p, size);
|
||||
else
|
||||
{
|
||||
UNUSED(p);
|
||||
UNUSED(size);
|
||||
}
|
||||
}
|
||||
|
||||
/// OS specific function for zeroing memory
|
||||
template<bool page_aligned = false>
|
||||
void zero(void* p, size_t size) noexcept
|
||||
{
|
||||
if (page_aligned || bits::is_aligned_block<OS_PAGE_SIZE>(p, size))
|
||||
{
|
||||
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, size));
|
||||
madvise(p, size, MADV_DONTNEED);
|
||||
}
|
||||
else
|
||||
{
|
||||
::memset(p, 0, size);
|
||||
}
|
||||
}
|
||||
|
||||
template<bool committed>
|
||||
void* reserve(const size_t* size) noexcept
|
||||
{
|
||||
void* p = mmap(
|
||||
nullptr,
|
||||
*size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS,
|
||||
-1,
|
||||
0);
|
||||
|
||||
if (p == MAP_FAILED)
|
||||
error("Out of memory");
|
||||
|
||||
return p;
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
||||
#endif
|
|
@ -1,63 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "pal_plain.h"
|
||||
#ifdef OPEN_ENCLAVE
|
||||
extern "C" const void* __oe_get_heap_base();
|
||||
extern "C" const void* __oe_get_heap_end();
|
||||
extern "C" void* oe_memset(void* p, int c, size_t size);
|
||||
extern "C" void oe_abort();
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
class PALOpenEnclave
|
||||
{
|
||||
std::atomic<uintptr_t> oe_base;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Bitmap of PalFeatures flags indicating the optional features that this
|
||||
* PAL supports.
|
||||
*/
|
||||
static constexpr uint64_t pal_features = AlignedAllocation;
|
||||
static void error(const char* const str)
|
||||
{
|
||||
UNUSED(str);
|
||||
oe_abort();
|
||||
}
|
||||
|
||||
template<bool committed>
|
||||
void* reserve(size_t* size, size_t align) noexcept
|
||||
{
|
||||
if (oe_base == 0)
|
||||
{
|
||||
uintptr_t dummy = 0;
|
||||
oe_base.compare_exchange_strong(dummy, (uintptr_t)__oe_get_heap_base());
|
||||
}
|
||||
|
||||
uintptr_t old_base = oe_base;
|
||||
uintptr_t old_base2 = old_base;
|
||||
uintptr_t next_base;
|
||||
auto end = (uintptr_t)__oe_get_heap_end();
|
||||
do
|
||||
{
|
||||
old_base2 = old_base;
|
||||
auto new_base = bits::align_up(old_base, align);
|
||||
next_base = new_base + *size;
|
||||
|
||||
if (next_base > end)
|
||||
error("Out of memory");
|
||||
|
||||
} while (oe_base.compare_exchange_strong(old_base, next_base));
|
||||
|
||||
*size = next_base - old_base2;
|
||||
return (void*)old_base;
|
||||
}
|
||||
|
||||
template<bool page_aligned = false>
|
||||
void zero(void* p, size_t size) noexcept
|
||||
{
|
||||
oe_memset(p, 0, size);
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
|
@ -1,32 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ds/bits.h"
|
||||
#include "../mem/allocconfig.h"
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
// Can be extended
|
||||
// Will require a reserve method in subclasses.
|
||||
template<class State>
|
||||
class PALPlainMixin : public State
|
||||
{
|
||||
public:
|
||||
// Notify platform that we will not be using these pages
|
||||
void notify_not_using(void*, size_t) noexcept {}
|
||||
|
||||
// Notify platform that we will not be using these pages
|
||||
template<ZeroMem zero_mem>
|
||||
void notify_using(void* p, size_t size) noexcept
|
||||
{
|
||||
if constexpr (zero_mem == YesZero)
|
||||
{
|
||||
State::zero(p, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
UNUSED(p);
|
||||
UNUSED(size);
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace snmalloc
|
|
@ -1,238 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ds/bits.h"
|
||||
#include "../mem/allocconfig.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
# define NOMINMAX
|
||||
# include <windows.h>
|
||||
// VirtualAlloc2 is exposed in RS5 headers.
|
||||
# ifdef NTDDI_WIN10_RS5
|
||||
# if (NTDDI_VERSION >= NTDDI_WIN10_RS5) && \
|
||||
(WINVER >= _WIN32_WINNT_WIN10) && !defined(USE_SYSTEMATIC_TESTING)
|
||||
# define PLATFORM_HAS_VIRTUALALLOC2
|
||||
# endif
|
||||
# endif
|
||||
|
||||
namespace snmalloc
|
||||
{
|
||||
class PALWindows
|
||||
{
|
||||
/**
|
||||
* The number of times that the memory pressure notification has fired.
|
||||
*/
|
||||
static std::atomic<uint64_t> pressure_epoch;
|
||||
/**
|
||||
* A flag indicating that we have tried to register for low-memory
|
||||
* notifications.
|
||||
*/
|
||||
static std::atomic<bool> registered_for_notifications;
|
||||
static HANDLE lowMemoryObject;
|
||||
/**
|
||||
* Callback, used when the system delivers a low-memory notification. This
|
||||
* simply increments an atomic counter each time the notification is raised.
|
||||
*/
|
||||
static void CALLBACK low_memory(_In_ PVOID, _In_ BOOLEAN)
|
||||
{
|
||||
pressure_epoch++;
|
||||
}
|
||||
|
||||
public:
|
||||
PALWindows()
|
||||
{
|
||||
// No error handling here - if this doesn't work, then we will just
|
||||
// consume more memory. There's nothing sensible that we could do in
|
||||
// error handling. We also leak both the low memory notification object
|
||||
// handle and the wait object handle. We'll need them until the program
|
||||
// exits, so there's little point doing anything else.
|
||||
//
|
||||
// We only try to register once. If this fails, give up. Even if we
|
||||
// create multiple PAL objects, we don't want to get more than one
|
||||
// callback.
|
||||
if (!registered_for_notifications.exchange(true))
|
||||
{
|
||||
lowMemoryObject =
|
||||
CreateMemoryResourceNotification(LowMemoryResourceNotification);
|
||||
HANDLE waitObject;
|
||||
RegisterWaitForSingleObject(
|
||||
&waitObject,
|
||||
lowMemoryObject,
|
||||
low_memory,
|
||||
nullptr,
|
||||
INFINITE,
|
||||
WT_EXECUTEDEFAULT);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Bitmap of PalFeatures flags indicating the optional features that this
|
||||
* PAL supports. This PAL supports low-memory notifications.
|
||||
*/
|
||||
static constexpr uint64_t pal_features = LowMemoryNotification
|
||||
# if defined(PLATFORM_HAS_VIRTUALALLOC2) || defined(USE_SYSTEMATIC_TESTING)
|
||||
| AlignedAllocation
|
||||
# endif
|
||||
;
|
||||
|
||||
/**
|
||||
* Counter values for the number of times that a low-pressure notification
|
||||
* has been delivered. Callers should compare this with a previous value
|
||||
* to see if the low memory state has been triggered since they last
|
||||
* checked.
|
||||
*/
|
||||
uint64_t low_memory_epoch()
|
||||
{
|
||||
return pressure_epoch.load(std::memory_order_acquire);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the low memory state is still in effect. This is an
|
||||
* expensive operation and should not be on any fast paths.
|
||||
*/
|
||||
bool expensive_low_memory_check()
|
||||
{
|
||||
BOOL result;
|
||||
QueryMemoryResourceNotification(lowMemoryObject, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
static void error(const char* const str)
|
||||
{
|
||||
puts(str);
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
|
||||
/// Notify platform that we will not be using these pages
|
||||
void notify_not_using(void* p, size_t size) noexcept
|
||||
{
|
||||
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, size));
|
||||
|
||||
BOOL ok = VirtualFree(p, size, MEM_DECOMMIT);
|
||||
|
||||
if (!ok)
|
||||
error("VirtualFree failed");
|
||||
}
|
||||
|
||||
/// Notify platform that we will be using these pages
|
||||
template<ZeroMem zero_mem>
|
||||
void notify_using(void* p, size_t size) noexcept
|
||||
{
|
||||
assert(
|
||||
bits::is_aligned_block<OS_PAGE_SIZE>(p, size) || (zero_mem == NoZero));
|
||||
|
||||
void* r = VirtualAlloc(p, size, MEM_COMMIT, PAGE_READWRITE);
|
||||
|
||||
if (r == nullptr)
|
||||
error("out of memory");
|
||||
}
|
||||
|
||||
/// OS specific function for zeroing memory
|
||||
template<bool page_aligned = false>
|
||||
void zero(void* p, size_t size) noexcept
|
||||
{
|
||||
if (page_aligned || bits::is_aligned_block<OS_PAGE_SIZE>(p, size))
|
||||
{
|
||||
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, size));
|
||||
notify_not_using(p, size);
|
||||
notify_using<YesZero>(p, size);
|
||||
}
|
||||
else
|
||||
::memset(p, 0, size);
|
||||
}
|
||||
|
||||
# ifdef USE_SYSTEMATIC_TESTING
|
||||
size_t& systematic_bump_ptr()
|
||||
{
|
||||
static size_t bump_ptr = (size_t)0x4000'0000'0000;
|
||||
return bump_ptr;
|
||||
}
|
||||
template<bool committed>
|
||||
void* reserve(size_t* size, size_t align) noexcept
|
||||
{
|
||||
DWORD flags = MEM_RESERVE;
|
||||
|
||||
if (committed)
|
||||
flags |= MEM_COMMIT;
|
||||
|
||||
size_t retries = 1000;
|
||||
void* p;
|
||||
size_t request = *size;
|
||||
|
||||
do
|
||||
{
|
||||
p = VirtualAlloc(
|
||||
(void*)systematic_bump_ptr(), request, flags, PAGE_READWRITE);
|
||||
|
||||
systematic_bump_ptr() += request;
|
||||
retries--;
|
||||
} while (p == nullptr && retries > 0);
|
||||
|
||||
uintptr_t aligned_p = bits::align_up((size_t)p, align);
|
||||
|
||||
if (aligned_p != (uintptr_t)p)
|
||||
{
|
||||
auto extra_bit = aligned_p - (uintptr_t)p;
|
||||
uintptr_t end = (uintptr_t)p + request;
|
||||
// Attempt to align end of the block.
|
||||
VirtualAlloc((void*)end, extra_bit, flags, PAGE_READWRITE);
|
||||
}
|
||||
*size = request;
|
||||
return p;
|
||||
}
|
||||
# elif defined(PLATFORM_HAS_VIRTUALALLOC2)
|
||||
template<bool committed>
|
||||
void* reserve(size_t* size, size_t align) noexcept
|
||||
{
|
||||
DWORD flags = MEM_RESERVE;
|
||||
|
||||
if (committed)
|
||||
flags |= MEM_COMMIT;
|
||||
|
||||
// Windows doesn't let you request memory less than 64KB aligned. Most
|
||||
// operating systems will simply give you something more aligned than you
|
||||
// ask for, but Windows complains about invalid parameters.
|
||||
const size_t min_align = 64 * 1024;
|
||||
if (align < min_align)
|
||||
align = min_align;
|
||||
|
||||
// If we're on Windows 10 or newer, we can use the VirtualAlloc2
|
||||
// function. The FromApp variant is useable by UWP applications and
|
||||
// cannot allocate executable memory.
|
||||
MEM_ADDRESS_REQUIREMENTS addressReqs = {0};
|
||||
MEM_EXTENDED_PARAMETER param = {0};
|
||||
addressReqs.Alignment = align;
|
||||
param.Type = MemExtendedParameterAddressRequirements;
|
||||
param.Pointer = &addressReqs;
|
||||
void* ret = VirtualAlloc2FromApp(
|
||||
nullptr, nullptr, *size, flags, PAGE_READWRITE, ¶m, 1);
|
||||
if (ret == nullptr)
|
||||
{
|
||||
error("Failed to allocate memory\n");
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
# else
|
||||
template<bool committed>
|
||||
void* reserve(size_t* size) noexcept
|
||||
{
|
||||
DWORD flags = MEM_RESERVE;
|
||||
|
||||
if (committed)
|
||||
flags |= MEM_COMMIT;
|
||||
|
||||
void* ret = VirtualAlloc(nullptr, *size, flags, PAGE_READWRITE);
|
||||
if (ret == nullptr)
|
||||
{
|
||||
error("Failed to allocate memory\n");
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
# endif
|
||||
};
|
||||
HEADER_GLOBAL std::atomic<uint64_t> PALWindows::pressure_epoch;
|
||||
HEADER_GLOBAL std::atomic<bool> PALWindows::registered_for_notifications;
|
||||
HEADER_GLOBAL HANDLE PALWindows::lowMemoryObject;
|
||||
}
|
||||
#endif
|
|
@ -1,3 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "mem/threadalloc.h"
|
|
@ -1,52 +0,0 @@
|
|||
|
||||
#define OPEN_ENCLAVE
|
||||
#define OPEN_ENCLAVE_SIMULATION
|
||||
#define USE_RESERVE_MULTIPLE 1
|
||||
#include <iostream>
|
||||
#include <snmalloc.h>
|
||||
|
||||
void* oe_base;
|
||||
void* oe_end;
|
||||
extern "C" const void* __oe_get_heap_base()
|
||||
{
|
||||
return oe_base;
|
||||
}
|
||||
|
||||
extern "C" const void* __oe_get_heap_end()
|
||||
{
|
||||
return oe_end;
|
||||
}
|
||||
|
||||
extern "C" void* oe_memset(void* p, int c, size_t size)
|
||||
{
|
||||
return memset(p, c, size);
|
||||
}
|
||||
|
||||
extern "C" void oe_abort()
|
||||
{
|
||||
abort();
|
||||
}
|
||||
|
||||
using namespace snmalloc;
|
||||
int main()
|
||||
{
|
||||
MemoryProviderStateMixin<DefaultPal> mp;
|
||||
|
||||
size_t size = 1ULL << 28;
|
||||
oe_base = mp.reserve<true>(&size, 0);
|
||||
oe_end = (uint8_t*)oe_base + size;
|
||||
std::cout << "Allocated region " << oe_base << " - " << oe_end << std::endl;
|
||||
|
||||
auto a = ThreadAlloc::get();
|
||||
|
||||
for (size_t i = 0; i < 1000; i++)
|
||||
{
|
||||
auto r1 = a->alloc(100);
|
||||
std::cout << "Allocated object " << r1 << std::endl;
|
||||
|
||||
if (oe_base > r1)
|
||||
abort();
|
||||
if (oe_end < r1)
|
||||
abort();
|
||||
}
|
||||
}
|
|
@ -1,119 +0,0 @@
|
|||
#include <stdio.h>
|
||||
|
||||
#define SNMALLOC_NAME_MANGLE(a) our_##a
|
||||
#include "../../../override/malloc.cc"
|
||||
|
||||
using namespace snmalloc;
|
||||
|
||||
void check_result(size_t size, size_t align, void* p, int err, bool null)
|
||||
{
|
||||
if (errno != err)
|
||||
abort();
|
||||
|
||||
if (null)
|
||||
{
|
||||
if (p != nullptr)
|
||||
abort();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (our_malloc_usable_size(p) < size)
|
||||
abort();
|
||||
|
||||
if (((uintptr_t)p % align) != 0)
|
||||
abort();
|
||||
|
||||
our_free(p);
|
||||
}
|
||||
}
|
||||
|
||||
void test_calloc(size_t nmemb, size_t size, int err, bool null)
|
||||
{
|
||||
fprintf(stderr, "calloc(%d, %d)\n", (int)nmemb, (int)size);
|
||||
errno = 0;
|
||||
void* p = our_calloc(nmemb, size);
|
||||
|
||||
if ((p != nullptr) && (errno == 0))
|
||||
{
|
||||
for (size_t i = 0; i < (size * nmemb); i++)
|
||||
{
|
||||
if (((uint8_t*)p)[i] != 0)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
check_result(nmemb * size, 1, p, err, null);
|
||||
}
|
||||
|
||||
void test_realloc(void* p, size_t size, int err, bool null)
|
||||
{
|
||||
fprintf(stderr, "realloc(%p(%d), %d)\n", p, int(size), (int)size);
|
||||
errno = 0;
|
||||
p = our_realloc(p, size);
|
||||
check_result(size, 1, p, err, null);
|
||||
}
|
||||
|
||||
void test_posix_memalign(size_t size, size_t align, int err, bool null)
|
||||
{
|
||||
fprintf(stderr, "posix_memalign(&p, %d, %d)\n", (int)align, (int)size);
|
||||
void* p = nullptr;
|
||||
errno = our_posix_memalign(&p, align, size);
|
||||
check_result(size, align, p, err, null);
|
||||
}
|
||||
|
||||
void test_memalign(size_t size, size_t align, int err, bool null)
|
||||
{
|
||||
fprintf(stderr, "memalign(%d, %d)\n", (int)align, (int)size);
|
||||
errno = 0;
|
||||
void* p = our_memalign(align, size);
|
||||
check_result(size, align, p, err, null);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
UNUSED(argc);
|
||||
UNUSED(argv);
|
||||
|
||||
constexpr int SUCCESS = 0;
|
||||
|
||||
test_calloc(0, 0, SUCCESS, false);
|
||||
|
||||
for (uint8_t sc = 0; sc < NUM_SIZECLASSES; sc++)
|
||||
{
|
||||
const size_t size = sizeclass_to_size(sc);
|
||||
|
||||
bool overflow = false;
|
||||
for (size_t n = 1; bits::umul(size, n, overflow) <= SUPERSLAB_SIZE; n *= 5)
|
||||
{
|
||||
if (overflow)
|
||||
break;
|
||||
|
||||
test_calloc(n, size, SUCCESS, false);
|
||||
test_calloc(n, 0, SUCCESS, false);
|
||||
}
|
||||
test_calloc(0, size, SUCCESS, false);
|
||||
|
||||
test_realloc(our_malloc(size), size, SUCCESS, false);
|
||||
test_realloc(our_malloc(size), 0, SUCCESS, true);
|
||||
test_realloc(nullptr, size, SUCCESS, false);
|
||||
test_realloc(our_malloc(size), (size_t)-1, ENOMEM, true);
|
||||
}
|
||||
|
||||
test_posix_memalign(0, 0, EINVAL, true);
|
||||
test_posix_memalign((size_t)-1, 0, EINVAL, true);
|
||||
|
||||
for (size_t align = sizeof(size_t); align <= SUPERSLAB_SIZE; align <<= 1)
|
||||
{
|
||||
for (uint8_t sc = 0; sc < NUM_SIZECLASSES; sc++)
|
||||
{
|
||||
const size_t size = sizeclass_to_size(sc);
|
||||
test_posix_memalign(size, align, SUCCESS, false);
|
||||
test_posix_memalign(size, 0, EINVAL, true);
|
||||
test_memalign(size, align, SUCCESS, false);
|
||||
}
|
||||
test_posix_memalign(0, align, SUCCESS, false);
|
||||
test_posix_memalign((size_t)-1, align, ENOMEM, true);
|
||||
test_posix_memalign(0, align + 1, EINVAL, true);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,313 +0,0 @@
|
|||
#include <snmalloc.h>
|
||||
#include <test/opt.h>
|
||||
#include <test/xoroshiro.h>
|
||||
#include <unordered_set>
|
||||
|
||||
using namespace snmalloc;
|
||||
|
||||
void test_alloc_dealloc_64k()
|
||||
{
|
||||
auto* alloc = ThreadAlloc::get();
|
||||
|
||||
constexpr size_t count = 1 << 12;
|
||||
constexpr size_t outer_count = 12;
|
||||
void* garbage[count];
|
||||
void* keep_alive[outer_count];
|
||||
|
||||
for (size_t j = 0; j < outer_count; j++)
|
||||
{
|
||||
// Allocate 64k of 16byte allocs
|
||||
// This will fill the short slab, and then start a new slab.
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
garbage[i] = alloc->alloc(16);
|
||||
}
|
||||
|
||||
// Allocate one object on the second slab
|
||||
keep_alive[j] = alloc->alloc(16);
|
||||
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
alloc->dealloc(garbage[i]);
|
||||
}
|
||||
}
|
||||
for (size_t j = 0; j < outer_count; j++)
|
||||
{
|
||||
alloc->dealloc(keep_alive[j]);
|
||||
}
|
||||
}
|
||||
|
||||
void test_random_allocation()
|
||||
{
|
||||
auto* alloc = ThreadAlloc::get();
|
||||
std::unordered_set<void*> allocated;
|
||||
|
||||
constexpr size_t count = 10000;
|
||||
constexpr size_t outer_count = 10;
|
||||
void* objects[count];
|
||||
for (size_t i = 0; i < count; i++)
|
||||
objects[i] = nullptr;
|
||||
|
||||
// Randomly allocate and deallocate objects
|
||||
xoroshiro::p128r32 r;
|
||||
size_t alloc_count = 0;
|
||||
for (size_t j = 0; j < outer_count; j++)
|
||||
{
|
||||
auto just_dealloc = r.next() % 2 == 1;
|
||||
auto duration = r.next() % count;
|
||||
for (size_t i = 0; i < duration; i++)
|
||||
{
|
||||
auto index = r.next();
|
||||
auto& cell = objects[index % count];
|
||||
if (cell != nullptr)
|
||||
{
|
||||
alloc->dealloc(cell);
|
||||
allocated.erase(cell);
|
||||
cell = nullptr;
|
||||
alloc_count--;
|
||||
}
|
||||
if (!just_dealloc)
|
||||
{
|
||||
cell = alloc->alloc(16);
|
||||
auto pair = allocated.insert(cell);
|
||||
// Check not already allocated
|
||||
assert(pair.second);
|
||||
UNUSED(pair);
|
||||
alloc_count++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (alloc_count == 0 && just_dealloc)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Deallocate all the remaining objects
|
||||
for (size_t i = 0; i < count; i++)
|
||||
if (objects[i] != nullptr)
|
||||
alloc->dealloc(objects[i]);
|
||||
}
|
||||
|
||||
void test_calloc()
|
||||
{
|
||||
auto* alloc = ThreadAlloc::get();
|
||||
|
||||
for (size_t size = 16; size <= (1 << 24); size <<= 1)
|
||||
{
|
||||
void* p = alloc->alloc(size);
|
||||
memset(p, 0xFF, size);
|
||||
alloc->dealloc(p, size);
|
||||
|
||||
p = alloc->alloc<YesZero>(size);
|
||||
|
||||
for (size_t i = 0; i < size; i++)
|
||||
{
|
||||
if (((char*)p)[i] != 0)
|
||||
abort();
|
||||
}
|
||||
|
||||
alloc->dealloc(p, size);
|
||||
}
|
||||
|
||||
current_alloc_pool()->debug_check_empty();
|
||||
}
|
||||
|
||||
void test_double_alloc()
|
||||
{
|
||||
auto* a1 = current_alloc_pool()->acquire();
|
||||
auto* a2 = current_alloc_pool()->acquire();
|
||||
|
||||
const size_t n = (1 << 16) / 32;
|
||||
|
||||
for (size_t k = 0; k < 4; k++)
|
||||
{
|
||||
std::unordered_set<void*> set1;
|
||||
std::unordered_set<void*> set2;
|
||||
|
||||
for (size_t i = 0; i < (n * 2); i++)
|
||||
{
|
||||
void* p = a1->alloc(20);
|
||||
assert(set1.find(p) == set1.end());
|
||||
set1.insert(p);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < (n * 2); i++)
|
||||
{
|
||||
void* p = a2->alloc(20);
|
||||
assert(set2.find(p) == set2.end());
|
||||
set2.insert(p);
|
||||
}
|
||||
|
||||
while (!set1.empty())
|
||||
{
|
||||
auto it = set1.begin();
|
||||
a2->dealloc(*it, 20);
|
||||
set1.erase(it);
|
||||
}
|
||||
|
||||
while (!set2.empty())
|
||||
{
|
||||
auto it = set2.begin();
|
||||
a1->dealloc(*it, 20);
|
||||
set2.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
current_alloc_pool()->release(a1);
|
||||
current_alloc_pool()->release(a2);
|
||||
current_alloc_pool()->debug_check_empty();
|
||||
}
|
||||
|
||||
void test_external_pointer()
|
||||
{
|
||||
// Malloc does not have an external pointer querying mechanism.
|
||||
auto* alloc = ThreadAlloc::get();
|
||||
|
||||
for (uint8_t sc = 0; sc < NUM_SIZECLASSES; sc++)
|
||||
{
|
||||
size_t size = sizeclass_to_size(sc);
|
||||
void* p1 = alloc->alloc(size);
|
||||
|
||||
for (size_t offset = 0; offset < size; offset += 17)
|
||||
{
|
||||
void* p2 = (void*)((size_t)p1 + offset);
|
||||
void* p3 = Alloc::external_pointer(p2);
|
||||
void* p4 = Alloc::external_pointer<End>(p2);
|
||||
UNUSED(p3);
|
||||
UNUSED(p4);
|
||||
assert(p1 == p3);
|
||||
assert((size_t)p4 == (size_t)p1 + size - 1);
|
||||
}
|
||||
|
||||
alloc->dealloc(p1, size);
|
||||
}
|
||||
|
||||
current_alloc_pool()->debug_check_empty();
|
||||
};
|
||||
|
||||
void check_offset(void* base, void* interior)
|
||||
{
|
||||
void* calced_base = Alloc::external_pointer((void*)interior);
|
||||
if (calced_base != (void*)base)
|
||||
abort();
|
||||
}
|
||||
|
||||
void check_external_pointer_large(size_t* base)
|
||||
{
|
||||
size_t size = *base;
|
||||
char* curr = (char*)base;
|
||||
for (size_t offset = 0; offset < size; offset += 1 << 24)
|
||||
{
|
||||
check_offset(base, (void*)(curr + offset));
|
||||
check_offset(base, (void*)(curr + offset + (1 << 24) - 1));
|
||||
}
|
||||
}
|
||||
|
||||
void test_external_pointer_large()
|
||||
{
|
||||
xoroshiro::p128r64 r;
|
||||
|
||||
auto* alloc = ThreadAlloc::get();
|
||||
|
||||
constexpr size_t count_log = snmalloc::bits::is64() ? 5 : 3;
|
||||
constexpr size_t count = 1 << count_log;
|
||||
// Pre allocate all the objects
|
||||
size_t* objects[count];
|
||||
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
size_t b = snmalloc::bits::is64() ? 28 : 26;
|
||||
size_t rand = r.next() & ((1 << b) - 1);
|
||||
size_t size = (1 << 24) + rand;
|
||||
// store object
|
||||
objects[i] = (size_t*)alloc->alloc(size);
|
||||
// Store allocators size for this object
|
||||
*objects[i] = Alloc::alloc_size(objects[i]);
|
||||
|
||||
check_external_pointer_large(objects[i]);
|
||||
if (i > 0)
|
||||
check_external_pointer_large(objects[i - 1]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
check_external_pointer_large(objects[i]);
|
||||
}
|
||||
|
||||
// Deallocate everything
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
alloc->dealloc(objects[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void test_external_pointer_dealloc_bug()
|
||||
{
|
||||
auto* alloc = ThreadAlloc::get();
|
||||
constexpr size_t count = (SUPERSLAB_SIZE / SLAB_SIZE) * 2;
|
||||
void* allocs[count];
|
||||
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
allocs[i] = alloc->alloc(SLAB_SIZE / 2);
|
||||
}
|
||||
|
||||
for (size_t i = 1; i < count; i++)
|
||||
{
|
||||
alloc->dealloc(allocs[i]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
Alloc::external_pointer(allocs[i]);
|
||||
}
|
||||
|
||||
alloc->dealloc(allocs[0]);
|
||||
}
|
||||
|
||||
void test_alloc_16M()
|
||||
{
|
||||
auto* alloc = ThreadAlloc::get();
|
||||
// sizes >= 16M use large_alloc
|
||||
const size_t size = 16'000'000;
|
||||
|
||||
void* p1 = alloc->alloc(size);
|
||||
assert(Alloc::alloc_size(Alloc::external_pointer(p1)) >= size);
|
||||
alloc->dealloc(p1);
|
||||
}
|
||||
|
||||
void test_calloc_16M()
|
||||
{
|
||||
auto* alloc = ThreadAlloc::get();
|
||||
// sizes >= 16M use large_alloc
|
||||
const size_t size = 16'000'000;
|
||||
|
||||
void* p1 = alloc->alloc<YesZero>(size);
|
||||
assert(Alloc::alloc_size(Alloc::external_pointer(p1)) >= size);
|
||||
alloc->dealloc(p1);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
#ifdef USE_SYSTEMATIC_TESTING
|
||||
opt::Opt opt(argc, argv);
|
||||
size_t seed = opt.is<size_t>("--seed", 0);
|
||||
Virtual::systematic_bump_ptr() += seed << 17;
|
||||
#else
|
||||
UNUSED(argc);
|
||||
UNUSED(argv);
|
||||
#endif
|
||||
|
||||
test_external_pointer_dealloc_bug();
|
||||
test_external_pointer_large();
|
||||
test_alloc_dealloc_64k();
|
||||
test_random_allocation();
|
||||
test_calloc();
|
||||
test_double_alloc();
|
||||
test_external_pointer();
|
||||
test_alloc_16M();
|
||||
test_calloc_16M();
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
#include <snmalloc.h>
|
||||
|
||||
using namespace snmalloc;
|
||||
|
||||
// Check for all sizeclass that we correctly round every offset within
|
||||
// a superslab to the correct value, by comparing with the standard
|
||||
// unoptimised version using division.
|
||||
// Also check we correctly determine multiples using optimized check.
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
UNUSED(argc);
|
||||
UNUSED(argv);
|
||||
|
||||
for (size_t size_class = 0; size_class < NUM_SIZECLASSES; size_class++)
|
||||
{
|
||||
size_t rsize = sizeclass_to_size((uint8_t)size_class);
|
||||
for (size_t offset = 0; offset < SUPERSLAB_SIZE; offset++)
|
||||
{
|
||||
size_t rounded = (offset / rsize) * rsize;
|
||||
bool mod_0 = (offset % rsize) == 0;
|
||||
|
||||
size_t opt_rounded = round_by_sizeclass(rsize, offset);
|
||||
if (rounded != opt_rounded)
|
||||
abort();
|
||||
|
||||
bool opt_mod_0 = is_multiple_of_sizeclass(rsize, offset);
|
||||
if (opt_mod_0 != mod_0)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
#include <iostream>
|
||||
#include <snmalloc.h>
|
||||
|
||||
NOINLINE
|
||||
uint8_t size_to_sizeclass(size_t size)
|
||||
{
|
||||
return snmalloc::size_to_sizeclass(size);
|
||||
}
|
||||
|
||||
int main(int, char**)
|
||||
{
|
||||
bool failed = false;
|
||||
size_t size_low = 0;
|
||||
|
||||
std::cout << "0 has sizeclass: " << (size_t)snmalloc::size_to_sizeclass(0)
|
||||
<< std::endl;
|
||||
|
||||
std::cout << "sizeclass |-> [size_low, size_high] " << std::endl;
|
||||
|
||||
for (uint8_t sz = 0; sz < snmalloc::NUM_SIZECLASSES; sz++)
|
||||
{
|
||||
// Separate printing for small and medium sizeclasses
|
||||
if (sz == snmalloc::NUM_SMALL_CLASSES)
|
||||
std::cout << std::endl;
|
||||
|
||||
size_t size = snmalloc::sizeclass_to_size(sz);
|
||||
std::cout << (size_t)sz << " |-> "
|
||||
<< "[" << size_low + 1 << ", " << size << "]" << std::endl;
|
||||
|
||||
if (size < size_low)
|
||||
{
|
||||
std::cout << "Sizeclass " << (size_t)sz << " is " << size
|
||||
<< " which is less than " << size_low << std::endl;
|
||||
failed = true;
|
||||
}
|
||||
|
||||
for (size_t i = size_low + 1; i <= size; i++)
|
||||
{
|
||||
if (size_to_sizeclass(i) != sz)
|
||||
{
|
||||
std::cout << "Size " << i << " has sizeclass "
|
||||
<< (size_t)size_to_sizeclass(i) << " but expected sizeclass "
|
||||
<< (size_t)sz << std::endl;
|
||||
failed = true;
|
||||
}
|
||||
}
|
||||
|
||||
size_low = size;
|
||||
}
|
||||
|
||||
if (failed)
|
||||
abort();
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
#undef IS_ADDRESS_SPACE_CONSTRAINED
|
||||
#define OPEN_ENCLAVE
|
||||
#define OPEN_ENCLAVE_SIMULATION
|
||||
#define USE_RESERVE_MULTIPLE 1
|
||||
#define NO_BOOTSTRAP_ALLOCATOR
|
||||
#define IS_ADDRESS_SPACE_CONSTRAINED
|
||||
#define SNMALLOC_EXPOSE_PAGEMAP
|
||||
#define SNMALLOC_NAME_MANGLE(a) enclave_##a
|
||||
// Redefine the namespace, so we can have two versions.
|
||||
#define snmalloc snmalloc_enclave
|
||||
#include "../../../override/malloc.cc"
|
|
@ -1,7 +0,0 @@
|
|||
#undef IS_ADDRESS_SPACE_CONSTRAINED
|
||||
#define SNMALLOC_NAME_MANGLE(a) host_##a
|
||||
#define NO_BOOTSTRAP_ALLOCATOR
|
||||
#define SNMALLOC_EXPOSE_PAGEMAP
|
||||
// Redefine the namespace, so we can have two versions.
|
||||
#define snmalloc snmalloc_host
|
||||
#include "../../../override/malloc.cc"
|
|
@ -1,63 +0,0 @@
|
|||
#include "../../../snmalloc.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
void* oe_base;
|
||||
void* oe_end;
|
||||
extern "C" const void* __oe_get_heap_base()
|
||||
{
|
||||
return oe_base;
|
||||
}
|
||||
|
||||
extern "C" const void* __oe_get_heap_end()
|
||||
{
|
||||
return oe_end;
|
||||
}
|
||||
|
||||
extern "C" void* oe_memset(void* p, int c, size_t size)
|
||||
{
|
||||
return memset(p, c, size);
|
||||
}
|
||||
|
||||
extern "C" void oe_abort()
|
||||
{
|
||||
abort();
|
||||
}
|
||||
|
||||
extern "C" void* host_malloc(size_t);
|
||||
extern "C" void host_free(void*);
|
||||
|
||||
extern "C" void* enclave_malloc(size_t);
|
||||
extern "C" void enclave_free(void*);
|
||||
|
||||
extern "C" void*
|
||||
enclave_snmalloc_pagemap_global_get(snmalloc::PagemapConfig const**);
|
||||
extern "C" void*
|
||||
host_snmalloc_pagemap_global_get(snmalloc::PagemapConfig const**);
|
||||
|
||||
using namespace snmalloc;
|
||||
int main()
|
||||
{
|
||||
MemoryProviderStateMixin<DefaultPal> mp;
|
||||
|
||||
size_t size = 1ULL << 28;
|
||||
oe_base = mp.reserve<true>(&size, 1);
|
||||
oe_end = (uint8_t*)oe_base + size;
|
||||
std::cout << "Allocated region " << oe_base << " - " << oe_end << std::endl;
|
||||
|
||||
// Call these functions to trigger asserts if the cast-to-self doesn't work.
|
||||
const PagemapConfig* c;
|
||||
enclave_snmalloc_pagemap_global_get(&c);
|
||||
host_snmalloc_pagemap_global_get(&c);
|
||||
|
||||
auto a = host_malloc(128);
|
||||
auto b = enclave_malloc(128);
|
||||
|
||||
std::cout << "Host alloc " << a << std::endl;
|
||||
std::cout << "Enclave alloc " << b << std::endl;
|
||||
|
||||
host_free(a);
|
||||
enclave_free(b);
|
||||
}
|
|
@ -1,249 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef USE_MEASURE
|
||||
# include "../ds/flaglock.h"
|
||||
|
||||
# include <algorithm>
|
||||
# include <iomanip>
|
||||
# include <iostream>
|
||||
# define MEASURE_TIME_MARKERS(id, minbits, maxbits, markers) \
|
||||
static constexpr const char* const id##_time_markers[] = markers; \
|
||||
static histogram::Global<histogram::Histogram<uint64_t, minbits, maxbits>> \
|
||||
id##_time_global(#id, __FILE__, __LINE__, id##_time_markers); \
|
||||
static thread_local histogram::Histogram<uint64_t, minbits, maxbits> \
|
||||
id##_time_local(id##_time_global); \
|
||||
histogram::MeasureTime<histogram::Histogram<uint64_t, minbits, maxbits>> \
|
||||
id##_time(id##_time_local);
|
||||
|
||||
# define MEASURE_TIME(id, minbits, maxbits) \
|
||||
MEASURE_TIME_MARKERS(id, minbits, maxbits, {nullptr})
|
||||
|
||||
# define MARKERS(...) \
|
||||
{ \
|
||||
__VA_ARGS__, nullptr \
|
||||
}
|
||||
|
||||
namespace histogram
|
||||
{
|
||||
using namespace snmalloc;
|
||||
|
||||
template<class H>
|
||||
class Global;
|
||||
|
||||
template<
|
||||
class V,
|
||||
size_t LOW_BITS,
|
||||
size_t HIGH_BITS,
|
||||
size_t INTERMEDIATE_BITS = LOW_BITS>
|
||||
class Histogram
|
||||
{
|
||||
public:
|
||||
using This = Histogram<V, LOW_BITS, HIGH_BITS, INTERMEDIATE_BITS>;
|
||||
friend Global<This>;
|
||||
|
||||
static_assert(LOW_BITS < HIGH_BITS, "LOW_BITS must be less than HIGH_BITS");
|
||||
|
||||
static constexpr V LOW = (V)((size_t)1 << LOW_BITS);
|
||||
static constexpr V HIGH = (V)((size_t)1 << HIGH_BITS);
|
||||
static constexpr size_t BUCKETS =
|
||||
((HIGH_BITS - LOW_BITS) << INTERMEDIATE_BITS) + 2;
|
||||
|
||||
private:
|
||||
V high = (std::numeric_limits<V>::min)();
|
||||
size_t overflow;
|
||||
size_t count[BUCKETS];
|
||||
|
||||
Global<This>* global;
|
||||
|
||||
public:
|
||||
Histogram() : global(nullptr) {}
|
||||
Histogram(Global<This>& g) : global(&g) {}
|
||||
|
||||
~Histogram()
|
||||
{
|
||||
if (global != nullptr)
|
||||
global->add(*this);
|
||||
}
|
||||
|
||||
void record(V value)
|
||||
{
|
||||
if (value > high)
|
||||
high = value;
|
||||
|
||||
if (value >= HIGH)
|
||||
{
|
||||
overflow++;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto i = get_index(value);
|
||||
assert(i < BUCKETS);
|
||||
count[i]++;
|
||||
}
|
||||
}
|
||||
|
||||
V get_high()
|
||||
{
|
||||
return high;
|
||||
}
|
||||
|
||||
size_t get_overflow()
|
||||
{
|
||||
return overflow;
|
||||
}
|
||||
|
||||
size_t get_buckets()
|
||||
{
|
||||
return BUCKETS;
|
||||
}
|
||||
|
||||
size_t get_count(size_t index)
|
||||
{
|
||||
if (index >= BUCKETS)
|
||||
return 0;
|
||||
|
||||
return count[index];
|
||||
}
|
||||
|
||||
static std::pair<V, V> get_range(size_t index)
|
||||
{
|
||||
if (index >= BUCKETS)
|
||||
return std::make_pair(HIGH, HIGH);
|
||||
|
||||
if (index == 0)
|
||||
return std::make_pair(0, get_value(index));
|
||||
|
||||
return std::make_pair(get_value(index - 1) + 1, get_value(index));
|
||||
}
|
||||
|
||||
void add(This& that)
|
||||
{
|
||||
high = (std::max)(high, that.high);
|
||||
overflow += that.overflow;
|
||||
|
||||
for (size_t i = 0; i < BUCKETS; i++)
|
||||
count[i] += that.count[i];
|
||||
}
|
||||
|
||||
void print(std::ostream& o)
|
||||
{
|
||||
o << "\tHigh: " << high << std::endl
|
||||
<< "\tOverflow: " << overflow << std::endl;
|
||||
|
||||
size_t grand_total = overflow;
|
||||
for (size_t i = 0; i < BUCKETS; i++)
|
||||
grand_total += count[i];
|
||||
|
||||
size_t old_percentage = 0;
|
||||
size_t cumulative_total = 0;
|
||||
for (size_t i = 0; i < BUCKETS; i++)
|
||||
{
|
||||
auto r = get_range(i);
|
||||
|
||||
cumulative_total += count[i];
|
||||
|
||||
o << "\t" << std::setfill(' ') << std::setw(6) << std::get<0>(r) << ".."
|
||||
<< std::setfill(' ') << std::setw(6) << std::get<1>(r) << ": "
|
||||
<< std::setfill(' ') << std::setw(10) << count[i];
|
||||
|
||||
auto percentage = (cumulative_total * 100 / grand_total);
|
||||
if (percentage != old_percentage)
|
||||
{
|
||||
old_percentage = percentage;
|
||||
o << std::setfill(' ') << std::setw(20)
|
||||
<< (cumulative_total * 100 / grand_total) << "%";
|
||||
}
|
||||
|
||||
o << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
static size_t get_index(V value)
|
||||
{
|
||||
return bits::to_exp_mant<INTERMEDIATE_BITS, LOW_BITS - INTERMEDIATE_BITS>(
|
||||
value);
|
||||
}
|
||||
|
||||
static V get_value(size_t index)
|
||||
{
|
||||
return bits::
|
||||
from_exp_mant<INTERMEDIATE_BITS, LOW_BITS - INTERMEDIATE_BITS>(index);
|
||||
}
|
||||
};
|
||||
|
||||
template<class H>
|
||||
class Global
|
||||
{
|
||||
private:
|
||||
const char* name;
|
||||
const char* file;
|
||||
size_t line;
|
||||
const char* const* markers;
|
||||
|
||||
std::atomic_flag lock = ATOMIC_FLAG_INIT;
|
||||
H aggregate;
|
||||
|
||||
public:
|
||||
Global(
|
||||
const char* name_,
|
||||
const char* file_,
|
||||
size_t line_,
|
||||
const char* const* markers)
|
||||
: name(name_), file(file_), line(line_), markers(markers)
|
||||
{}
|
||||
|
||||
~Global()
|
||||
{
|
||||
print();
|
||||
}
|
||||
|
||||
void add(H& histogram)
|
||||
{
|
||||
FlagLock f(lock);
|
||||
aggregate.add(histogram);
|
||||
}
|
||||
|
||||
private:
|
||||
void print()
|
||||
{
|
||||
std::cout << name;
|
||||
|
||||
if (markers != nullptr)
|
||||
{
|
||||
std::cout << ": ";
|
||||
size_t i = 0;
|
||||
|
||||
while (markers[i] != nullptr)
|
||||
std::cout << markers[i++] << " ";
|
||||
}
|
||||
|
||||
std::cout << std::endl << file << ":" << line << std::endl;
|
||||
|
||||
aggregate.print(std::cout);
|
||||
}
|
||||
};
|
||||
|
||||
template<class H>
|
||||
class MeasureTime
|
||||
{
|
||||
private:
|
||||
H& histogram;
|
||||
uint64_t t;
|
||||
|
||||
public:
|
||||
MeasureTime(H& histogram_) : histogram(histogram_)
|
||||
{
|
||||
t = bits::benchmark_time_start();
|
||||
}
|
||||
|
||||
~MeasureTime()
|
||||
{
|
||||
histogram.record(bits::benchmark_time_end() - t);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#else
|
||||
# define MEASURE_TIME(id, minbits, maxbits)
|
||||
# define MEASURE_TIME_MARKERS(id, minbits, maxbits, markers)
|
||||
#endif
|
|
@ -1,14 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
|
||||
#define DO_TIME(name, code) \
|
||||
{ \
|
||||
auto start__ = std::chrono::high_resolution_clock::now(); \
|
||||
code auto finish__ = std::chrono::high_resolution_clock::now(); \
|
||||
auto diff__ = finish__ - start__; \
|
||||
std::cout << name << ": " << std::setw(12) << diff__.count() << " ns" \
|
||||
<< std::endl; \
|
||||
}
|
|
@ -1,91 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <type_traits>
|
||||
|
||||
namespace opt
|
||||
{
|
||||
class Opt
|
||||
{
|
||||
private:
|
||||
int argc;
|
||||
char** argv;
|
||||
|
||||
public:
|
||||
Opt(int argc, char** argv) : argc(argc), argv(argv) {}
|
||||
|
||||
bool has(const char* opt)
|
||||
{
|
||||
for (int i = 1; i < argc; i++)
|
||||
{
|
||||
if (!strcmp(opt, argv[i]))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
T is(const char* opt, T def)
|
||||
{
|
||||
size_t len = strlen(opt);
|
||||
|
||||
for (int i = 1; i < argc; i++)
|
||||
{
|
||||
const char* p = param(opt, len, i);
|
||||
|
||||
if (p != nullptr)
|
||||
{
|
||||
char* end = nullptr;
|
||||
T r;
|
||||
|
||||
if (std::is_unsigned<T>::value)
|
||||
r = (T)strtoull(p, &end, 10);
|
||||
else
|
||||
r = (T)strtoll(p, &end, 10);
|
||||
|
||||
if ((r == 0) && (end == p))
|
||||
return def;
|
||||
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
return def;
|
||||
}
|
||||
|
||||
const char* is(const char* opt, const char* def)
|
||||
{
|
||||
size_t len = strlen(opt);
|
||||
|
||||
for (int i = 1; i < argc; i++)
|
||||
{
|
||||
const char* p = param(opt, len, i);
|
||||
|
||||
if (p != nullptr)
|
||||
return p;
|
||||
}
|
||||
|
||||
return def;
|
||||
}
|
||||
|
||||
private:
|
||||
const char* param(const char* opt, size_t len, int i)
|
||||
{
|
||||
if (strncmp(opt, argv[i], len))
|
||||
return nullptr;
|
||||
|
||||
switch (argv[i][len])
|
||||
{
|
||||
case '\0':
|
||||
return (i < (argc - 1)) ? argv[i + 1] : nullptr;
|
||||
case '=':
|
||||
return &argv[i][len + 1];
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
|
@ -1,176 +0,0 @@
|
|||
#include "test/measuretime.h"
|
||||
#include "test/opt.h"
|
||||
#include "test/usage.h"
|
||||
#include "test/xoroshiro.h"
|
||||
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <snmalloc.h>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
using namespace snmalloc;
|
||||
|
||||
bool use_malloc = false;
|
||||
|
||||
template<void f(size_t id)>
|
||||
class ParallelTest
|
||||
{
|
||||
private:
|
||||
std::atomic<bool> flag = false;
|
||||
std::atomic<size_t> ready = 0;
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
std::atomic<size_t> complete = 0;
|
||||
|
||||
size_t cores;
|
||||
|
||||
void run(size_t id)
|
||||
{
|
||||
auto prev = ready.fetch_add(1);
|
||||
if (prev + 1 == cores)
|
||||
{
|
||||
start = bits::tick();
|
||||
flag = true;
|
||||
}
|
||||
while (!flag)
|
||||
bits::pause();
|
||||
|
||||
f(id);
|
||||
|
||||
prev = complete.fetch_add(1);
|
||||
if (prev + 1 == cores)
|
||||
{
|
||||
end = bits::tick();
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
ParallelTest(size_t cores) : cores(cores)
|
||||
{
|
||||
std::thread* t = new std::thread[cores];
|
||||
|
||||
for (size_t i = 0; i < cores; i++)
|
||||
{
|
||||
t[i] = std::thread(&ParallelTest::run, this, i);
|
||||
}
|
||||
// Wait for all the threads.
|
||||
for (size_t i = 0; i < cores; i++)
|
||||
{
|
||||
t[i].join();
|
||||
}
|
||||
|
||||
delete[] t;
|
||||
}
|
||||
|
||||
uint64_t time()
|
||||
{
|
||||
return end - start;
|
||||
}
|
||||
};
|
||||
|
||||
std::atomic<size_t*>* contention;
|
||||
size_t swapsize;
|
||||
size_t swapcount;
|
||||
|
||||
void test_tasks_f(size_t id)
|
||||
{
|
||||
Alloc* a = ThreadAlloc::get();
|
||||
xoroshiro::p128r32 r(id + 5000);
|
||||
|
||||
for (size_t n = 0; n < swapcount; n++)
|
||||
{
|
||||
size_t size = 16 + (r.next() % 1024);
|
||||
size_t* res = (size_t*)(use_malloc ? malloc(size) : a->alloc(size));
|
||||
|
||||
*res = size;
|
||||
size_t* out =
|
||||
contention[n % swapsize].exchange(res, std::memory_order_relaxed);
|
||||
|
||||
if (out != nullptr)
|
||||
{
|
||||
size = *out;
|
||||
if (use_malloc)
|
||||
free(out);
|
||||
else
|
||||
a->dealloc(out, size);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void test_tasks(size_t num_tasks, size_t count, size_t size)
|
||||
{
|
||||
Alloc* a = ThreadAlloc::get();
|
||||
|
||||
contention = new std::atomic<size_t*>[size];
|
||||
xoroshiro::p128r32 r;
|
||||
|
||||
for (size_t n = 0; n < size; n++)
|
||||
{
|
||||
size_t alloc_size = 16 + (r.next() % 1024);
|
||||
size_t* res =
|
||||
(size_t*)(use_malloc ? malloc(alloc_size) : a->alloc(alloc_size));
|
||||
*res = alloc_size;
|
||||
contention[n] = res;
|
||||
}
|
||||
swapcount = count;
|
||||
swapsize = size;
|
||||
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
Stats s0;
|
||||
current_alloc_pool()->aggregate_stats(s0);
|
||||
#endif
|
||||
|
||||
{
|
||||
ParallelTest<test_tasks_f> test(num_tasks);
|
||||
|
||||
std::cout << "Task test, " << num_tasks << " threads, " << count
|
||||
<< " swaps per thread " << test.time() << "ticks" << std::endl;
|
||||
|
||||
for (size_t n = 0; n < swapsize; n++)
|
||||
{
|
||||
if (contention[n] != nullptr)
|
||||
{
|
||||
if (use_malloc)
|
||||
free(contention[n]);
|
||||
else
|
||||
a->dealloc(contention[n], *contention[n]);
|
||||
}
|
||||
}
|
||||
|
||||
delete[] contention;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
current_alloc_pool()->debug_check_empty();
|
||||
#endif
|
||||
};
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
opt::Opt opt(argc, argv);
|
||||
size_t cores = opt.is<size_t>("--cores", 8);
|
||||
|
||||
size_t count = opt.is<size_t>("--swapcount", 1 << 20);
|
||||
size_t size = opt.is<size_t>("--swapsize", 1 << 18);
|
||||
use_malloc = opt.has("--use_malloc");
|
||||
|
||||
std::cout << "Allocator is " << (use_malloc ? "System" : "snmalloc")
|
||||
<< std::endl;
|
||||
|
||||
for (size_t i = cores; i > 0; i >>= 1)
|
||||
test_tasks(i, count, size);
|
||||
|
||||
if (opt.has("--stats"))
|
||||
{
|
||||
#ifdef USE_SNMALLOC_STATS
|
||||
Stats s;
|
||||
current_alloc_pool()->aggregate_stats(s);
|
||||
s.print<Alloc>(std::cout);
|
||||
#endif
|
||||
|
||||
usage::print_memory();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,81 +0,0 @@
|
|||
#include <snmalloc.h>
|
||||
#include <test/measuretime.h>
|
||||
#include <test/xoroshiro.h>
|
||||
#include <unordered_set>
|
||||
|
||||
using namespace snmalloc;
|
||||
|
||||
namespace test
|
||||
{
|
||||
static constexpr size_t count_log = 20;
|
||||
static constexpr size_t count = 1 << count_log;
|
||||
// Pre allocate all the objects
|
||||
size_t* objects[count];
|
||||
|
||||
NOINLINE void setup(xoroshiro::p128r64& r, Alloc* alloc)
|
||||
{
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
size_t rand = (size_t)r.next();
|
||||
size_t offset = bits::clz(rand);
|
||||
if (offset > 30)
|
||||
offset = 30;
|
||||
size_t size = (rand & 15) << offset;
|
||||
if (size < 16)
|
||||
size = 16;
|
||||
// store object
|
||||
objects[i] = (size_t*)alloc->alloc(size);
|
||||
// Store allocators size for this object
|
||||
*objects[i] = Alloc::alloc_size(objects[i]);
|
||||
}
|
||||
}
|
||||
|
||||
NOINLINE void teardown(Alloc* alloc)
|
||||
{
|
||||
// Deallocate everything
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
alloc->dealloc(objects[i]);
|
||||
}
|
||||
|
||||
current_alloc_pool()->debug_check_empty();
|
||||
}
|
||||
|
||||
void test_external_pointer(xoroshiro::p128r64& r)
|
||||
{
|
||||
auto* alloc = ThreadAlloc::get();
|
||||
|
||||
setup(r, alloc);
|
||||
|
||||
DO_TIME("External pointer queries ", {
|
||||
for (size_t i = 0; i < 10000000; i++)
|
||||
{
|
||||
size_t rand = (size_t)r.next();
|
||||
size_t oid = rand & (((size_t)1 << count_log) - 1);
|
||||
size_t* external_ptr = objects[oid];
|
||||
size_t size = *external_ptr;
|
||||
size_t offset = (size >> 4) * (rand & 15);
|
||||
size_t interior_ptr = ((size_t)external_ptr) + offset;
|
||||
void* calced_external = Alloc::external_pointer((void*)interior_ptr);
|
||||
if (calced_external != external_ptr)
|
||||
abort();
|
||||
}
|
||||
});
|
||||
|
||||
teardown(alloc);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int, char**)
|
||||
{
|
||||
xoroshiro::p128r64 r;
|
||||
#if NDEBUG
|
||||
size_t nn = 30;
|
||||
#else
|
||||
size_t nn = 3;
|
||||
#endif
|
||||
|
||||
for (size_t n = 0; n < nn; n++)
|
||||
test::test_external_pointer(r);
|
||||
return 0;
|
||||
}
|
|
@ -1,83 +0,0 @@
|
|||
#include <snmalloc.h>
|
||||
#include <test/measuretime.h>
|
||||
#include <unordered_set>
|
||||
|
||||
using namespace snmalloc;
|
||||
|
||||
template<ZeroMem zero_mem>
|
||||
void test_alloc_dealloc(size_t count, size_t size, bool write)
|
||||
{
|
||||
auto* alloc = ThreadAlloc::get();
|
||||
|
||||
DO_TIME(
|
||||
"Count: " << std::setw(6) << count << ", Size: " << std::setw(6) << size
|
||||
<< ", ZeroMem: " << (zero_mem == YesZero) << ", Write: " << write,
|
||||
{
|
||||
std::unordered_set<void*> set;
|
||||
|
||||
// alloc 1.5x objects
|
||||
for (size_t i = 0; i < ((count * 3) / 2); i++)
|
||||
{
|
||||
void* p = alloc->alloc<zero_mem>(size);
|
||||
assert(set.find(p) == set.end());
|
||||
|
||||
if (write)
|
||||
*(int*)p = 4;
|
||||
|
||||
set.insert(p);
|
||||
}
|
||||
|
||||
// free 0.25x of the objects
|
||||
for (size_t i = 0; i < (count / 4); i++)
|
||||
{
|
||||
auto it = set.begin();
|
||||
void* p = *it;
|
||||
alloc->dealloc(p, size);
|
||||
set.erase(it);
|
||||
assert(set.find(p) == set.end());
|
||||
}
|
||||
|
||||
// alloc 1x objects
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
void* p = alloc->alloc<zero_mem>(size);
|
||||
assert(set.find(p) == set.end());
|
||||
|
||||
if (write)
|
||||
*(int*)p = 4;
|
||||
|
||||
set.insert(p);
|
||||
}
|
||||
|
||||
// free everything
|
||||
while (!set.empty())
|
||||
{
|
||||
auto it = set.begin();
|
||||
alloc->dealloc(*it, size);
|
||||
set.erase(it);
|
||||
}
|
||||
});
|
||||
|
||||
current_alloc_pool()->debug_check_empty();
|
||||
}
|
||||
|
||||
int main(int, char**)
|
||||
{
|
||||
for (size_t size = 16; size <= 128; size <<= 1)
|
||||
{
|
||||
test_alloc_dealloc<NoZero>(1 << 15, size, false);
|
||||
test_alloc_dealloc<NoZero>(1 << 15, size, true);
|
||||
test_alloc_dealloc<YesZero>(1 << 15, size, false);
|
||||
test_alloc_dealloc<YesZero>(1 << 15, size, true);
|
||||
}
|
||||
|
||||
for (size_t size = 1 << 12; size <= 1 << 17; size <<= 1)
|
||||
{
|
||||
test_alloc_dealloc<NoZero>(1 << 10, size, false);
|
||||
test_alloc_dealloc<NoZero>(1 << 10, size, true);
|
||||
test_alloc_dealloc<YesZero>(1 << 10, size, false);
|
||||
test_alloc_dealloc<YesZero>(1 << 10, size, true);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if defined(_WIN32)
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
# define NOMINMAX
|
||||
# include <windows.h>
|
||||
// Needs to be included after windows.h
|
||||
# include <psapi.h>
|
||||
#endif
|
||||
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
|
||||
namespace usage
|
||||
{
|
||||
void print_memory()
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
PROCESS_MEMORY_COUNTERS_EX pmc;
|
||||
|
||||
if (!GetProcessMemoryInfo(
|
||||
GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc)))
|
||||
return;
|
||||
|
||||
std::cout << "Memory info:" << std::endl
|
||||
<< "\tPageFaultCount: " << pmc.PageFaultCount << std::endl
|
||||
<< "\tPeakWorkingSetSize: " << pmc.PeakWorkingSetSize << std::endl
|
||||
<< "\tWorkingSetSize: " << pmc.WorkingSetSize << std::endl
|
||||
<< "\tQuotaPeakPagedPoolUsage: " << pmc.QuotaPeakPagedPoolUsage
|
||||
<< std::endl
|
||||
<< "\tQuotaPagedPoolUsage: " << pmc.QuotaPagedPoolUsage
|
||||
<< std::endl
|
||||
<< "\tQuotaPeakNonPagedPoolUsage: "
|
||||
<< pmc.QuotaPeakNonPagedPoolUsage << std::endl
|
||||
<< "\tQuotaNonPagedPoolUsage: " << pmc.QuotaNonPagedPoolUsage
|
||||
<< std::endl
|
||||
<< "\tPagefileUsage: " << pmc.PagefileUsage << std::endl
|
||||
<< "\tPeakPagefileUsage: " << pmc.PeakPagefileUsage << std::endl
|
||||
<< "\tPrivateUsage: " << pmc.PrivateUsage << std::endl;
|
||||
#endif
|
||||
}
|
||||
};
|
|
@ -1,71 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
|
||||
namespace xoroshiro
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
template<typename STATE, typename RESULT, STATE A, STATE B, STATE C>
|
||||
class XorOshiro
|
||||
{
|
||||
private:
|
||||
static constexpr unsigned STATE_BITS = 8 * sizeof(STATE);
|
||||
static constexpr unsigned RESULT_BITS = 8 * sizeof(RESULT);
|
||||
|
||||
static_assert(
|
||||
STATE_BITS >= RESULT_BITS,
|
||||
"STATE must have at least as many bits as RESULT");
|
||||
|
||||
STATE x;
|
||||
STATE y;
|
||||
|
||||
static inline STATE rotl(STATE x, STATE k)
|
||||
{
|
||||
return (x << k) | (x >> (STATE_BITS - k));
|
||||
}
|
||||
|
||||
public:
|
||||
XorOshiro(STATE x_ = 5489, STATE y_ = 0) : x(x_), y(y_)
|
||||
{
|
||||
// If both zero, then this does not work
|
||||
if (x_ == 0 && y_ == 0)
|
||||
abort();
|
||||
|
||||
next();
|
||||
}
|
||||
|
||||
void set_state(STATE x_, STATE y_ = 0)
|
||||
{
|
||||
// If both zero, then this does not work
|
||||
if (x_ == 0 && y_ == 0)
|
||||
abort();
|
||||
|
||||
x = x_;
|
||||
y = y_;
|
||||
next();
|
||||
}
|
||||
|
||||
RESULT next()
|
||||
{
|
||||
STATE r = x + y;
|
||||
y ^= x;
|
||||
x = rotl(x, A) ^ y ^ (y << B);
|
||||
y = rotl(y, C);
|
||||
// If both zero, then this does not work
|
||||
if (x == 0 && y == 0)
|
||||
abort();
|
||||
return r >> (STATE_BITS - RESULT_BITS);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
using p128r64 = detail::XorOshiro<uint64_t, uint64_t, 55, 14, 36>;
|
||||
using p128r32 = detail::XorOshiro<uint64_t, uint32_t, 55, 14, 36>;
|
||||
using p64r32 = detail::XorOshiro<uint32_t, uint32_t, 27, 7, 20>;
|
||||
using p64r16 = detail::XorOshiro<uint32_t, uint16_t, 27, 7, 20>;
|
||||
using p32r16 = detail::XorOshiro<uint16_t, uint16_t, 13, 5, 10>;
|
||||
using p32r8 = detail::XorOshiro<uint16_t, uint8_t, 13, 5, 10>;
|
||||
using p16r8 = detail::XorOshiro<uint8_t, uint8_t, 4, 7, 3>;
|
||||
}
|
|
@ -90,15 +90,6 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"component": {
|
||||
"type": "git",
|
||||
"git": {
|
||||
"repositoryUrl": "https://github.com/microsoft/snmalloc",
|
||||
"commitHash": "9ca436c951d635bface96a0b9d73262ab4cb088e"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"Component": {
|
||||
"Type": "other",
|
||||
|
|
|
@ -85,19 +85,14 @@ if("virtual" IN_LIST TARGET)
|
|||
set_property(TARGET libbyz.host PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
target_include_directories(libbyz.host PRIVATE SYSTEM ${EVERCRYPT_INC})
|
||||
|
||||
set(SNMALLOC_ONLY_HEADER_LIBRARY ON)
|
||||
add_subdirectory(${CMAKE_SOURCE_DIR}/3rdparty/snmalloc EXCLUDE_FROM_ALL)
|
||||
|
||||
add_library(libcommontest STATIC
|
||||
${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/test/network_udp.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/test/network_udp_mt.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/test/ITimer.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/test/Time.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/test/Statistics.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/test/snmalloc.cpp
|
||||
)
|
||||
target_compile_options(libcommontest PRIVATE -stdlib=libc++)
|
||||
target_link_libraries(libcommontest PRIVATE snmalloc_lib)
|
||||
|
||||
target_include_directories(libcommontest PRIVATE
|
||||
${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
#define NO_BOOTSTRAP_ALLOCATOR
|
||||
|
||||
#include "snmalloc/src/override/malloc.cc"
|
Загрузка…
Ссылка в новой задаче