snmalloc is unused at the moment, it will be picked up again through (#563)

openenclave once it has been added there
This commit is contained in:
Amaury Chamayou 2019-11-15 11:03:20 +00:00 коммит произвёл GitHub
Родитель 4aa499e7d4
Коммит e66f5ce7b3
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
61 изменённых файлов: 0 добавлений и 8294 удалений

178
3rdparty/snmalloc/CMakeLists.txt поставляемый
Просмотреть файл

@ -1,178 +0,0 @@
cmake_minimum_required(VERSION 3.8)
project(snmalloc C CXX)
option(USE_SNMALLOC_STATS "Track allocation stats" OFF)
option(USE_MEASURE "Measure performance with histograms" OFF)
option(EXPOSE_EXTERNAL_PAGEMAP "Expose the global pagemap" OFF)
option(EXPOSE_EXTERNAL_RESERVE "Expose an interface to reserve memory using the default memory provider" OFF)
set(CACHE_FRIENDLY_OFFSET OFF CACHE STRING "Base offset to place linked-list nodes.")
# Provide as macro so other projects can reuse
macro(warnings_high)
if(MSVC)
# Force to always compile with W4
if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
endif()
add_compile_options(/WX /wd4127 /wd4324 /wd4201)
else()
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
add_compile_options(-Wsign-conversion)
endif ()
add_compile_options(-Wall -Wextra -Werror)
endif()
endmacro()
# The main target for snmalloc
add_library(snmalloc_lib INTERFACE)
target_include_directories(snmalloc_lib INTERFACE src/)
if(NOT MSVC)
find_package(Threads REQUIRED COMPONENTS snmalloc_lib)
target_link_libraries(snmalloc_lib INTERFACE ${CMAKE_THREAD_LIBS_INIT})
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
target_link_libraries(snmalloc_lib INTERFACE atomic)
endif()
target_compile_options(snmalloc_lib INTERFACE -mcx16)
else()
set(WIN8COMPAT FALSE CACHE BOOL "Avoid Windows 10 APIs")
if (WIN8COMPAT)
target_compile_definitions(snmalloc_lib INTERFACE -DWINVER=0x0603)
message(STATUS "snmalloc: Avoiding Windows 10 APIs")
else()
message(STATUS "snmalloc: Using Windows 10 APIs")
# VirtualAlloc2 is exposed by mincore.lib, not Kernel32.lib (as the
# documentation says)
target_link_libraries(snmalloc_lib INTERFACE mincore)
endif()
endif()
# Have to set this globally, as can't be set on an interface target.
set(CMAKE_CXX_STANDARD 17)
if(USE_SNMALLOC_STATS)
target_compile_definitions(snmalloc_lib INTERFACE -DUSE_SNMALLOC_STATS)
endif()
if(USE_MEASURE)
target_compile_definitions(snmalloc_lib INTERFACE -DUSE_MEASURE)
endif()
# To build with just the header library target define SNMALLOC_ONLY_HEADER_LIBRARY
# in containing Cmake file.
if(NOT DEFINED SNMALLOC_ONLY_HEADER_LIBRARY)
warnings_high()
if(MSVC)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi")
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG")
else()
add_compile_options(-march=native -fno-exceptions -fno-rtti -g)
endif()
macro(subdirlist result curdir)
file(GLOB children LIST_DIRECTORIES true RELATIVE ${curdir} ${curdir}/*)
set(dirlist "")
foreach(child ${children})
if(IS_DIRECTORY ${curdir}/${child})
list(APPEND dirlist ${child})
endif()
endforeach()
set(${result} ${dirlist})
endmacro()
macro(add_shim name)
add_library(${name} SHARED src/override/malloc.cc)
target_link_libraries(${name} snmalloc_lib)
target_compile_definitions(${name} PRIVATE "SNMALLOC_EXPORT=__attribute__((visibility(\"default\")))")
set_target_properties(${name} PROPERTIES CXX_VISIBILITY_PRESET hidden)
if(CACHE_FRIENDLY_OFFSET)
target_compile_definitions(${name} PRIVATE -DCACHE_FRIENDLY_OFFSET=${CACHE_FRIENDLY_OFFSET})
endif()
if(EXPOSE_EXTERNAL_PAGEMAP)
target_compile_definitions(${name} PRIVATE -DSNMALLOC_EXPOSE_PAGEMAP)
endif()
if(EXPOSE_EXTERNAL_RESERVE)
target_compile_definitions(${name} PRIVATE -DSNMALLOC_EXPOSE_RESERVE)
endif()
endmacro()
if(NOT MSVC)
add_shim(snmallocshim)
add_shim(snmallocshim-1mib)
target_compile_definitions(snmallocshim-1mib PRIVATE IS_ADDRESS_SPACE_CONSTRAINED)
endif()
enable_testing()
set(TESTDIR ${CMAKE_CURRENT_SOURCE_DIR}/src/test)
subdirlist(TEST_CATEGORIES ${TESTDIR})
list(REVERSE TEST_CATEGORIES)
foreach(TEST_CATEGORY ${TEST_CATEGORIES})
subdirlist(TESTS ${TESTDIR}/${TEST_CATEGORY})
foreach(TEST ${TESTS})
foreach(SUPER_SLAB_SIZE 1;16)
unset(SRC)
aux_source_directory(${TESTDIR}/${TEST_CATEGORY}/${TEST} SRC)
set(TESTNAME "${TEST_CATEGORY}-${TEST}-${SUPER_SLAB_SIZE}")
add_executable(${TESTNAME} ${SRC} src/override/new.cc)
if (${SUPER_SLAB_SIZE} EQUAL 1)
target_compile_definitions(${TESTNAME} PRIVATE IS_ADDRESS_SPACE_CONSTRAINED)
endif()
target_include_directories(${TESTNAME} PRIVATE src)
target_link_libraries(${TESTNAME} snmalloc_lib)
if (${TEST} MATCHES "release-.*")
message(STATUS "Adding test: ${TESTNAME} only for release configs")
add_test(NAME ${TESTNAME} COMMAND ${TESTNAME} CONFIGURATIONS "Release")
else()
message(STATUS "Adding test: ${TESTNAME}")
add_test(${TESTNAME} ${TESTNAME})
endif()
if (${TEST_CATEGORY} MATCHES "perf")
set_tests_properties(${TESTNAME} PROPERTIES PROCESSORS 3)
endif()
endforeach()
endforeach()
endforeach()
# The clang-format tool is installed under a variety of different names. Try
# to find a sensible one. Only look for 6.0 and 7.0 versions explicitly - we
# don't know whether our clang-format file will work with newer versions of the
# tool
set(CLANG_FORMAT_NAMES
clang-format-7.0
clang-format-6.0
clang-format70
clang-format60
clang-format)
# Loop over each of the possible names of clang-format and try to find one.
set(CLANG_FORMAT CLANG_FORMAT-NOTFOUND)
foreach (NAME IN ITEMS ${CLANG_FORMAT_NAMES})
if (${CLANG_FORMAT} STREQUAL "CLANG_FORMAT-NOTFOUND")
find_program(CLANG_FORMAT ${NAME})
endif ()
endforeach()
# If we've found a clang-format tool, generate a target for it, otherwise emit
# a warning.
if (${CLANG_FORMAT} STREQUAL "CLANG_FORMAT-NOTFOUND")
message(WARNING "Not generating clangformat target, no clang-format tool found")
else ()
message(STATUS "Generating clangformat target using ${CLANG_FORMAT}")
file(GLOB_RECURSE ALL_SOURCE_FILES *.cc *.h *.hh)
add_custom_target(
clangformat
COMMAND ${CLANG_FORMAT}
-i
${ALL_SOURCE_FILES})
endif()
endif()

21
3rdparty/snmalloc/LICENSE поставляемый
Просмотреть файл

@ -1,21 +0,0 @@
MIT License
Copyright (c) Microsoft Corporation. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE

113
3rdparty/snmalloc/src/ds/aba.h поставляемый
Просмотреть файл

@ -1,113 +0,0 @@
#pragma once
#include "bits.h"
namespace snmalloc
{
template<typename T, Construction c = RequiresInit>
class ABA
{
public:
#ifdef PLATFORM_IS_X86
struct alignas(2 * sizeof(std::size_t)) Linked
{
T* ptr;
uintptr_t aba;
};
struct Independent
{
std::atomic<T*> ptr;
std::atomic<uintptr_t> aba;
};
static_assert(
sizeof(Linked) == sizeof(Independent),
"Expecting identical struct sizes in union");
static_assert(
sizeof(Linked) == (2 * sizeof(std::size_t)),
"Expecting ABA to be the size of two pointers");
using Cmp = Linked;
#else
using Cmp = T*;
#endif
private:
#ifdef PLATFORM_IS_X86
union
{
alignas(2 * sizeof(std::size_t)) std::atomic<Linked> linked;
Independent independent;
};
#else
std::atomic<T*> ptr;
#endif
public:
ABA()
{
if constexpr (c == RequiresInit)
init(nullptr);
}
void init(T* x)
{
#ifdef PLATFORM_IS_X86
independent.ptr.store(x, std::memory_order_relaxed);
independent.aba.store(0, std::memory_order_relaxed);
#else
ptr.store(x, std::memory_order_relaxed);
#endif
}
T* peek()
{
return independent.ptr.load(std::memory_order_relaxed);
}
Cmp read()
{
return
#ifdef PLATFORM_IS_X86
Cmp{independent.ptr.load(std::memory_order_relaxed),
independent.aba.load(std::memory_order_relaxed)};
#else
ptr.load(std::memory_order_relaxed);
#endif
}
static T* load(Cmp& from)
{
#ifdef PLATFORM_IS_X86
return from.ptr;
#else
return from;
#endif
}
bool compare_exchange(Cmp& expect, T* value)
{
#ifdef PLATFORM_IS_X86
# if defined(_MSC_VER) && defined(PLATFORM_BITS_64)
return _InterlockedCompareExchange128(
(volatile __int64*)&linked,
expect.aba + 1,
(__int64)value,
(__int64*)&expect);
# else
# if defined(__GNUC__) && !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16)
#error You must compile with -mcx16 to enable 16-byte atomic compare and swap.
# endif
Cmp xchg{value, expect.aba + 1};
return linked.compare_exchange_weak(
expect, xchg, std::memory_order_relaxed, std::memory_order_relaxed);
# endif
#else
return ptr.compare_exchange_weak(
expect, value, std::memory_order_relaxed, std::memory_order_relaxed);
#endif
}
};
} // namespace snmalloc

42
3rdparty/snmalloc/src/ds/address.h поставляемый
Просмотреть файл

@ -1,42 +0,0 @@
#pragma once
#include <cstdint>
namespace snmalloc
{
/**
* The type used for an address. Currently, all addresses are assumed to be
* provenance-carrying values and so it is possible to cast back from the
* result of arithmetic on an address_t. Eventually, this will want to be
* separated into two types, one for raw addresses and one for addresses that
* can be cast back to pointers.
*/
using address_t = uintptr_t;
/**
* Perform pointer arithmetic and return the adjusted pointer.
*/
template<typename T>
inline T* pointer_offset(T* base, size_t diff)
{
return reinterpret_cast<T*>(reinterpret_cast<char*>(base) + diff);
}
/**
* Cast from a pointer type to an address.
*/
template<typename T>
inline address_t address_cast(T* ptr)
{
return reinterpret_cast<address_t>(ptr);
}
/**
* Cast from an address back to a pointer of the specified type. All uses of
* this will eventually need auditing for CHERI compatibility.
*/
template<typename T>
inline T* pointer_cast(address_t address)
{
return reinterpret_cast<T*>(address);
}
} // namespace snmalloc

477
3rdparty/snmalloc/src/ds/bits.h поставляемый
Просмотреть файл

@ -1,477 +0,0 @@
#pragma once
#include <limits>
#include <stddef.h>
#ifdef _MSC_VER
# include <immintrin.h>
# include <intrin.h>
# define ALWAYSINLINE __forceinline
# define NOINLINE __declspec(noinline)
# define HEADER_GLOBAL __declspec(selectany)
# define likely(x) !!(x)
# define unlikely(x) !!(x)
#else
# define likely(x) __builtin_expect(!!(x), 1)
# define unlikely(x) __builtin_expect(!!(x), 0)
# include <cpuid.h>
# include <emmintrin.h>
# define ALWAYSINLINE __attribute__((always_inline))
# define NOINLINE __attribute__((noinline))
# ifdef __clang__
# define HEADER_GLOBAL __attribute__((selectany))
# else
// GCC does not support selectany, weak is almost the correct
// attribute, but leaves the global variable preemptible.
# define HEADER_GLOBAL __attribute__((weak))
# endif
#endif
#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || \
defined(__amd64__) || defined(__x86_64__) || defined(_M_X64) || \
defined(_M_AMD64)
# define PLATFORM_IS_X86
# if defined(__linux__) && !defined(OPEN_ENCLAVE)
# include <x86intrin.h>
# endif
# if defined(__amd64__) || defined(__x86_64__) || defined(_M_X64) || \
defined(_M_AMD64)
# define PLATFORM_BITS_64
# else
# define PLATFORM_BITS_32
# endif
#endif
#if defined(_MSC_VER) && defined(PLATFORM_BITS_32)
# include <intsafe.h>
#endif
#ifndef __has_builtin
# define __has_builtin(x) 0
#endif
#define UNUSED(x) ((void)(x))
#if __has_builtin(__builtin_assume)
# define SNMALLOC_ASSUME(x) __builtin_assume(x)
#else
# define SNMALLOC_ASSUME(x) \
do \
{ \
} while (0)
#endif
// #define USE_LZCNT
#include "address.h"
#include <atomic>
#include <cassert>
#include <cstdint>
#include <type_traits>
#ifdef pause
# undef pause
#endif
namespace snmalloc
{
// Used to enable trivial constructors for
// class that zero init is sufficient.
// Supplying PreZeroed means the memory is pre-zeroed i.e. a global section
// RequiresInit is if the class needs to zero its fields.
enum Construction
{
PreZeroed,
RequiresInit
};
namespace bits
{
static constexpr size_t BITS = sizeof(size_t) * 8;
static constexpr bool is64()
{
return BITS == 64;
}
/**
* Returns a value of type T that has a single bit set,
*
* S is a template parameter because callers use either `int` or `size_t`
* and either is valid to represent a number in the range 0-63 (or 0-127 if
* we want to use `__uint128_t` as `T`).
*/
template<typename T = size_t, typename S>
constexpr T one_at_bit(S shift)
{
static_assert(std::is_integral_v<T>, "Type must be integral");
return (static_cast<T>(1)) << shift;
}
static constexpr size_t ADDRESS_BITS = is64() ? 48 : 32;
inline void pause()
{
#if defined(PLATFORM_IS_X86)
_mm_pause();
#else
# warning "Missing pause intrinsic"
#endif
}
inline uint64_t tick()
{
#if defined(PLATFORM_IS_X86)
# if defined(_MSC_VER)
return __rdtsc();
# elif defined(__clang__)
return __builtin_readcyclecounter();
# else
return __builtin_ia32_rdtsc();
# endif
#else
# error Define CPU tick for this platform
#endif
}
inline uint64_t tickp()
{
#if defined(PLATFORM_IS_X86)
# if defined(_MSC_VER)
unsigned int aux;
return __rdtscp(&aux);
# else
unsigned aux;
return __builtin_ia32_rdtscp(&aux);
# endif
#else
# error Define CPU tick for this platform
#endif
}
inline void halt_out_of_order()
{
#if defined(PLATFORM_IS_X86)
# if defined(_MSC_VER)
int cpu_info[4];
__cpuid(cpu_info, 0);
# else
unsigned int eax, ebx, ecx, edx;
__get_cpuid(0, &eax, &ebx, &ecx, &edx);
# endif
#else
# error Define CPU benchmark start time for this platform
#endif
}
inline uint64_t benchmark_time_start()
{
halt_out_of_order();
return tick();
}
inline uint64_t benchmark_time_end()
{
uint64_t t = tickp();
halt_out_of_order();
return t;
}
inline size_t clz(size_t x)
{
#if defined(_MSC_VER)
# ifdef USE_LZCNT
# ifdef PLATFORM_BITS_64
return __lzcnt64(x);
# else
return __lzcnt((uint32_t)x);
# endif
# else
unsigned long index;
# ifdef PLATFORM_BITS_64
_BitScanReverse64(&index, x);
# else
_BitScanReverse(&index, (unsigned long)x);
# endif
return BITS - index - 1;
# endif
#else
return static_cast<size_t>(__builtin_clzl(x));
#endif
}
inline constexpr size_t rotr_const(size_t x, size_t n)
{
size_t nn = n & (BITS - 1);
return (x >> nn) |
(x << ((static_cast<size_t>(-static_cast<int>(nn))) & (BITS - 1)));
}
inline constexpr size_t rotl_const(size_t x, size_t n)
{
size_t nn = n & (BITS - 1);
return (x << nn) |
(x >> ((static_cast<size_t>(-static_cast<int>(nn))) & (BITS - 1)));
}
inline size_t rotr(size_t x, size_t n)
{
#if defined(_MSC_VER)
# ifdef PLATFORM_BITS_64
return _rotr64(x, (int)n);
# else
return _rotr((uint32_t)x, (int)n);
# endif
#else
return rotr_const(x, n);
#endif
}
inline size_t rotl(size_t x, size_t n)
{
#if defined(_MSC_VER)
# ifdef PLATFORM_BITS_64
return _rotl64(x, (int)n);
# else
return _rotl((uint32_t)x, (int)n);
# endif
#else
return rotl_const(x, n);
#endif
}
constexpr size_t clz_const(size_t x)
{
size_t n = 0;
for (int i = BITS - 1; i >= 0; i--)
{
size_t mask = one_at_bit(i);
if ((x & mask) == mask)
return n;
n++;
}
return n;
}
inline size_t ctz(size_t x)
{
#if defined(_MSC_VER)
# ifdef PLATFORM_BITS_64
return _tzcnt_u64(x);
# else
return _tzcnt_u32((uint32_t)x);
# endif
#else
return static_cast<size_t>(__builtin_ctzl(x));
#endif
}
constexpr size_t ctz_const(size_t x)
{
size_t n = 0;
for (size_t i = 0; i < BITS; i++)
{
size_t mask = one_at_bit(i);
if ((x & mask) == mask)
return n;
n++;
}
return n;
}
inline size_t umul(size_t x, size_t y, bool& overflow)
{
#if __has_builtin(__builtin_mul_overflow)
size_t prod;
overflow = __builtin_mul_overflow(x, y, &prod);
return prod;
#elif defined(_MSC_VER)
# if defined(PLATFORM_BITS_64)
size_t high_prod;
size_t prod = _umul128(x, y, &high_prod);
overflow = high_prod != 0;
return prod;
# else
size_t prod;
overflow = S_OK != UIntMult(x, y, &prod);
return prod;
# endif
#else
size_t prod = x * y;
overflow = y && (x > ((size_t)-1 / y));
return prod;
#endif
}
inline size_t next_pow2(size_t x)
{
// Correct for numbers [0..MAX_SIZE >> 1).
// Returns 1 for x > (MAX_SIZE >> 1).
if (x <= 2)
return x;
return one_at_bit(BITS - clz(x - 1));
}
inline size_t next_pow2_bits(size_t x)
{
// Correct for numbers [1..MAX_SIZE].
// Returns 64 for 0. Approximately 2 cycles.
return BITS - clz(x - 1);
}
constexpr size_t next_pow2_const(size_t x)
{
if (x <= 2)
return x;
return one_at_bit(BITS - clz_const(x - 1));
}
constexpr size_t next_pow2_bits_const(size_t x)
{
return BITS - clz_const(x - 1);
}
static inline size_t align_down(size_t value, size_t alignment)
{
assert(next_pow2(alignment) == alignment);
size_t align_1 = alignment - 1;
value &= ~align_1;
return value;
}
static inline size_t align_up(size_t value, size_t alignment)
{
assert(next_pow2(alignment) == alignment);
size_t align_1 = alignment - 1;
value += align_1;
value &= ~align_1;
return value;
}
template<size_t alignment>
static inline bool is_aligned_block(void* p, size_t size)
{
assert(next_pow2(alignment) == alignment);
return ((static_cast<size_t>(address_cast(p)) | size) &
(alignment - 1)) == 0;
}
/************************************************
*
* Map large range of strictly positive integers
* into an exponent and mantissa pair.
*
* The reverse mapping is given by first adding one to the value, and then
* extracting the bottom MANTISSA bits as m, and the rest as e.
* Then each value maps as:
*
* e | m | value
* ---------------------------------
* 0 | x1 ... xm | 0..00 x1 .. xm
* 1 | x1 ... xm | 0..01 x1 .. xm
* 2 | x1 ... xm | 0..1 x1 .. xm 0
* 3 | x1 ... xm | 0.1 x1 .. xm 00
*
* The forward mapping maps a value to the
* smallest exponent and mantissa with a
* reverse mapping not less than the value.
*
* The e and m in the forward mapping and reverse are not the same, and the
* initial increment in from_exp_mant and the decrement in to_exp_mant
* handle the different ways it is calculating and using the split.
* This is due to the rounding of bits below the mantissa in the
* representation, which is confusing but leads to the fastest code.
*
* Does not work for value=0.
***********************************************/
template<size_t MANTISSA_BITS, size_t LOW_BITS = 0>
static size_t to_exp_mant(size_t value)
{
size_t LEADING_BIT = one_at_bit(MANTISSA_BITS + LOW_BITS) >> 1;
size_t MANTISSA_MASK = one_at_bit(MANTISSA_BITS) - 1;
value = value - 1;
size_t e =
bits::BITS - MANTISSA_BITS - LOW_BITS - clz(value | LEADING_BIT);
size_t b = (e == 0) ? 0 : 1;
size_t m = (value >> (LOW_BITS + e - b)) & MANTISSA_MASK;
return (e << MANTISSA_BITS) + m;
}
template<size_t MANTISSA_BITS, size_t LOW_BITS = 0>
constexpr static size_t to_exp_mant_const(size_t value)
{
size_t LEADING_BIT = one_at_bit(MANTISSA_BITS + LOW_BITS) >> 1;
size_t MANTISSA_MASK = one_at_bit(MANTISSA_BITS) - 1;
value = value - 1;
size_t e =
bits::BITS - MANTISSA_BITS - LOW_BITS - clz_const(value | LEADING_BIT);
size_t b = (e == 0) ? 0 : 1;
size_t m = (value >> (LOW_BITS + e - b)) & MANTISSA_MASK;
return (e << MANTISSA_BITS) + m;
}
template<size_t MANTISSA_BITS, size_t LOW_BITS = 0>
constexpr static size_t from_exp_mant(size_t m_e)
{
if (MANTISSA_BITS > 0)
{
m_e = m_e + 1;
size_t MANTISSA_MASK = one_at_bit(MANTISSA_BITS) - 1;
size_t m = m_e & MANTISSA_MASK;
size_t e = m_e >> MANTISSA_BITS;
size_t b = e == 0 ? 0 : 1;
size_t shifted_e = e - b;
size_t extended_m = (m + (b << MANTISSA_BITS));
return extended_m << (shifted_e + LOW_BITS);
}
return one_at_bit(m_e + LOW_BITS);
}
/**
* Implementation of `std::min`
*
* `std::min` is in `<algorithm>`, so pulls in a lot of unneccessary code
* We write our own to reduce the code that potentially needs reviewing.
**/
template<typename T>
constexpr inline T min(T t1, T t2)
{
return t1 < t2 ? t1 : t2;
}
/**
* Implementation of `std::max`
*
* `std::max` is in `<algorithm>`, so pulls in a lot of unneccessary code
* We write our own to reduce the code that potentially needs reviewing.
**/
template<typename T>
constexpr inline T max(T t1, T t2)
{
return t1 > t2 ? t1 : t2;
}
} // namespace bits
} // namespace snmalloc

55
3rdparty/snmalloc/src/ds/csv.h поставляемый
Просмотреть файл

@ -1,55 +0,0 @@
#pragma once
#include <iostream>
#include <string>
namespace snmalloc
{
class CSVStream
{
private:
std::ostream* out;
bool first = true;
public:
class Endl
{};
Endl endl;
CSVStream(std::ostream* o) : out(o) {}
void preprint()
{
if (!first)
{
*out << ", ";
}
else
{
first = false;
}
}
CSVStream& operator<<(const std::string& str)
{
preprint();
*out << str;
return *this;
}
CSVStream& operator<<(uint64_t u)
{
preprint();
*out << u;
return *this;
}
CSVStream& operator<<(Endl)
{
*out << std::endl;
first = true;
return *this;
}
};
} // namespace snmalloc

172
3rdparty/snmalloc/src/ds/dllist.h поставляемый
Просмотреть файл

@ -1,172 +0,0 @@
#pragma once
#include <cassert>
#include <cstdint>
#include <type_traits>
namespace snmalloc
{
/**
* Invalid pointer class. This is similar to `std::nullptr_t`, but allows
* other values.
*/
template<address_t Sentinel>
struct InvalidPointer
{
/**
* Equality comparison. Two invalid pointer values with the same sentinel
* are always the same, invalid pointer values with different sentinels are
* always different.
*/
template<uintptr_t OtherSentinel>
constexpr bool operator==(const InvalidPointer<OtherSentinel>&)
{
return Sentinel == OtherSentinel;
}
/**
* Equality comparison. Two invalid pointer values with the same sentinel
* are always the same, invalid pointer values with different sentinels are
* always different.
*/
template<uintptr_t OtherSentinel>
constexpr bool operator!=(const InvalidPointer<OtherSentinel>&)
{
return Sentinel != OtherSentinel;
}
/**
* Implicit conversion, creates a pointer with the value of the sentinel.
* On CHERI and other provenance-tracking systems, this is a
* provenance-free integer and so will trap if dereferenced, on other
* systems the sentinel should be a value in unmapped memory.
*/
template<typename T>
operator T*() const
{
return reinterpret_cast<T*>(Sentinel);
}
/**
* Implicit conversion to an address, returns the sentinel value.
*/
operator address_t() const
{
return Sentinel;
}
};
template<class T, class Terminator = std::nullptr_t>
class DLList
{
private:
static_assert(
std::is_same<decltype(T::prev), T*>::value, "T->prev must be a T*");
static_assert(
std::is_same<decltype(T::next), T*>::value, "T->next must be a T*");
T* head = Terminator();
public:
bool is_empty()
{
return head == Terminator();
}
T* get_head()
{
return head;
}
T* pop()
{
T* item = head;
if (item != Terminator())
remove(item);
return item;
}
void insert(T* item)
{
#ifndef NDEBUG
debug_check_not_contains(item);
#endif
item->next = head;
item->prev = Terminator();
if (head != Terminator())
head->prev = item;
head = item;
#ifndef NDEBUG
debug_check();
#endif
}
void remove(T* item)
{
#ifndef NDEBUG
debug_check_contains(item);
#endif
if (item->next != Terminator())
item->next->prev = item->prev;
if (item->prev != Terminator())
item->prev->next = item->next;
else
head = item->next;
#ifndef NDEBUG
debug_check();
#endif
}
void debug_check_contains(T* item)
{
#ifndef NDEBUG
debug_check();
T* curr = head;
while (curr != item)
{
assert(curr != Terminator());
curr = curr->next;
}
#else
UNUSED(item);
#endif
}
void debug_check_not_contains(T* item)
{
#ifndef NDEBUG
debug_check();
T* curr = head;
while (curr != Terminator())
{
assert(curr != item);
curr = curr->next;
}
#else
UNUSED(item);
#endif
}
void debug_check()
{
#ifndef NDEBUG
T* item = head;
T* prev = Terminator();
while (item != Terminator())
{
assert(item->prev == prev);
prev = item;
item = item->next;
}
#endif
}
};
} // namespace snmalloc

24
3rdparty/snmalloc/src/ds/flaglock.h поставляемый
Просмотреть файл

@ -1,24 +0,0 @@
#pragma once
#include "bits.h"
namespace snmalloc
{
class FlagLock
{
private:
std::atomic_flag& lock;
public:
FlagLock(std::atomic_flag& lock) : lock(lock)
{
while (lock.test_and_set(std::memory_order_acquire))
bits::pause();
}
~FlagLock()
{
lock.clear(std::memory_order_release);
}
};
} // namespace snmalloc

90
3rdparty/snmalloc/src/ds/helpers.h поставляемый
Просмотреть файл

@ -1,90 +0,0 @@
#pragma once
#include "bits.h"
#include "flaglock.h"
namespace snmalloc
{
/*
* In some use cases we need to run before any of the C++ runtime has been
* initialised. This singleton class is design to not depend on the runtime.
*/
template<class Object, Object init() noexcept>
class Singleton
{
public:
/**
* If argument is non-null, then it is assigned the value
* true, if this is the first call to get.
* At most one call will be first.
*/
inline static Object& get(bool* first = nullptr)
{
static std::atomic_flag flag;
static std::atomic<bool> initialised;
static Object obj;
// If defined should be initially false;
assert(first == nullptr || *first == false);
if (!initialised.load(std::memory_order_acquire))
{
FlagLock lock(flag);
if (!initialised)
{
obj = init();
initialised.store(true, std::memory_order_release);
if (first != nullptr)
*first = true;
}
}
return obj;
}
};
/**
* Wrapper for wrapping values.
*
* Wraps on read. This allows code to trust the value is in range, even when
* there is a memory corruption.
**/
template<size_t length, typename T>
class Mod
{
static_assert(
length == bits::next_pow2_const(length), "Must be a power of two.");
private:
T value;
public:
operator T()
{
return static_cast<T>(value & (length - 1));
}
Mod& operator=(const T v)
{
value = v;
return *this;
}
};
template<size_t length, typename T>
class ModArray
{
static constexpr size_t rlength = bits::next_pow2_const(length);
T array[rlength];
public:
constexpr const T& operator[](const size_t i) const
{
return array[i & (rlength - 1)];
}
constexpr T& operator[](const size_t i)
{
return array[i & (rlength - 1)];
}
};
} // namespace snmalloc

75
3rdparty/snmalloc/src/ds/mpmcstack.h поставляемый
Просмотреть файл

@ -1,75 +0,0 @@
#pragma once
#include "aba.h"
namespace snmalloc
{
template<class T, Construction c = RequiresInit>
class MPMCStack
{
using ABAT = ABA<T, c>;
private:
static_assert(
std::is_same<decltype(T::next), std::atomic<T*>>::value,
"T->next must be a std::atomic<T*>");
ABAT stack;
public:
void push(T* item)
{
return push(item, item);
}
void push(T* first, T* last)
{
// Pushes an item on the stack.
auto cmp = stack.read();
do
{
T* top = ABAT::load(cmp);
last->next.store(top, std::memory_order_release);
} while (!stack.compare_exchange(cmp, first));
}
T* pop()
{
// Returns the next item. If the returned value is decommitted, it is
// possible for the read of top->next to segfault.
auto cmp = stack.read();
T* top;
T* next;
do
{
top = ABAT::load(cmp);
if (top == nullptr)
break;
next = top->next.load(std::memory_order_acquire);
} while (!stack.compare_exchange(cmp, next));
return top;
}
T* pop_all()
{
// Returns all items as a linked list, leaving an empty stack.
auto cmp = stack.read();
T* top;
do
{
top = ABAT::load(cmp);
if (top == nullptr)
break;
} while (!stack.compare_exchange(cmp, nullptr));
return top;
}
};
} // namespace snmalloc

82
3rdparty/snmalloc/src/ds/mpscq.h поставляемый
Просмотреть файл

@ -1,82 +0,0 @@
#pragma once
#include "bits.h"
#include "helpers.h"
namespace snmalloc
{
template<class T>
class MPSCQ
{
private:
static_assert(
std::is_same<decltype(T::next), std::atomic<T*>>::value,
"T->next must be a std::atomic<T*>");
std::atomic<T*> back;
T* front;
public:
void invariant()
{
#ifndef NDEBUG
assert(back != nullptr);
assert(front != nullptr);
#endif
}
void init(T* stub)
{
stub->next.store(nullptr, std::memory_order_relaxed);
front = stub;
back.store(stub, std::memory_order_relaxed);
invariant();
}
T* destroy()
{
T* fnt = front;
back.store(nullptr, std::memory_order_relaxed);
front = nullptr;
return fnt;
}
inline bool is_empty()
{
T* bk = back.load(std::memory_order_relaxed);
return bk == front;
}
void enqueue(T* first, T* last)
{
// Pushes a list of messages to the queue. Each message from first to
// last should be linked together through their next pointers.
invariant();
last->next.store(nullptr, std::memory_order_relaxed);
std::atomic_thread_fence(std::memory_order_release);
T* prev = back.exchange(last, std::memory_order_relaxed);
prev->next.store(first, std::memory_order_relaxed);
}
T* dequeue()
{
// Returns the front message, or null if not possible to return a message.
invariant();
T* first = front;
T* next = first->next.load(std::memory_order_relaxed);
if (next != nullptr)
{
front = next;
assert(front);
std::atomic_thread_fence(std::memory_order_acquire);
invariant();
return first;
}
return nullptr;
}
};
} // namespace snmalloc

1253
3rdparty/snmalloc/src/mem/alloc.h поставляемый

Разница между файлами не показана из-за своего большого размера Загрузить разницу

146
3rdparty/snmalloc/src/mem/allocconfig.h поставляемый
Просмотреть файл

@ -1,146 +0,0 @@
#pragma once
#include "../ds/bits.h"
namespace snmalloc
{
// 0 intermediate bits results in power of 2 small allocs. 1 intermediate
// bit gives additional sizeclasses at the midpoint between each power of 2.
// 2 intermediate bits gives 3 intermediate sizeclasses, etc.
static constexpr size_t INTERMEDIATE_BITS =
#ifdef USE_INTERMEDIATE_BITS
USE_INTERMEDIATE_BITS
#else
2
#endif
;
// Return remote small allocs when the local cache reaches this size.
static constexpr size_t REMOTE_CACHE =
#ifdef USE_REMOTE_CACHE
USE_REMOTE_CACHE
#else
1 << 20
#endif
;
// Handle at most this many object from the remote dealloc queue at a time.
static constexpr size_t REMOTE_BATCH =
#ifdef USE_REMOTE_BATCH
REMOTE_BATCH
#else
64
#endif
;
// Specifies smaller slab and super slab sizes for address space
// constrained scenarios.
static constexpr size_t ADDRESS_SPACE_CONSTRAINED =
#ifdef IS_ADDRESS_SPACE_CONSTRAINED
true
#else
// In 32 bit uses smaller superslab.
(!bits::is64())
#endif
;
static constexpr size_t RESERVE_MULTIPLE =
#ifdef USE_RESERVE_MULTIPLE
USE_RESERVE_MULTIPLE
#else
bits::is64() ? 16 : 2
#endif
;
enum DecommitStrategy
{
/**
* Never decommit memory.
*/
DecommitNone,
/**
* Decommit superslabs when they are entirely empty.
*/
DecommitSuper,
/**
* Decommit all slabs once they are empty.
*/
DecommitAll,
/**
* Decommit superslabs only when we are informed of memory pressure by the
* OS, do not decommit anything in normal operation.
*/
DecommitSuperLazy
};
static constexpr DecommitStrategy decommit_strategy =
#ifdef USE_DECOMMIT_STRATEGY
USE_DECOMMIT_STRATEGY
#elif defined(_WIN32) && !defined(OPEN_ENCLAVE)
DecommitSuperLazy
#else
DecommitSuper
#endif
;
// The remaining values are derived, not configurable.
static constexpr size_t POINTER_BITS =
bits::next_pow2_bits_const(sizeof(uintptr_t));
// Used to isolate values on cache lines to prevent false sharing.
static constexpr size_t CACHELINE_SIZE = 64;
// Used to keep Superslab metadata committed.
static constexpr size_t OS_PAGE_SIZE = 0x1000;
static constexpr size_t PAGE_ALIGNED_SIZE = OS_PAGE_SIZE << INTERMEDIATE_BITS;
// Some system headers (e.g. Linux' sys/user.h, FreeBSD's machine/param.h)
// define `PAGE_SIZE` as a macro. We don't use `PAGE_SIZE` as our variable
// name, to avoid conflicts, but if we do see a macro definition then check
// that our value matches the platform's expected value.
#ifdef PAGE_SIZE
static_assert(
PAGE_SIZE == OS_PAGE_SIZE,
"Page size from system header does not match snmalloc config page size.");
#endif
// Minimum allocation size is space for two pointers.
static constexpr size_t MIN_ALLOC_BITS = bits::is64() ? 4 : 3;
static constexpr size_t MIN_ALLOC_SIZE = 1 << MIN_ALLOC_BITS;
// Slabs are 64 kb.
static constexpr size_t SLAB_BITS = ADDRESS_SPACE_CONSTRAINED ? 14 : 16;
static constexpr size_t SLAB_SIZE = 1 << SLAB_BITS;
static constexpr size_t SLAB_MASK = ~(SLAB_SIZE - 1);
// Superslabs are composed of this many slabs. Slab offsets are encoded as
// a byte, so the maximum count is 256. This must be a power of two to
// allow fast masking to find a superslab start address.
static constexpr size_t SLAB_COUNT_BITS = ADDRESS_SPACE_CONSTRAINED ? 6 : 8;
static constexpr size_t SLAB_COUNT = 1 << SLAB_COUNT_BITS;
static constexpr size_t SUPERSLAB_SIZE = SLAB_SIZE * SLAB_COUNT;
static constexpr size_t SUPERSLAB_MASK = ~(SUPERSLAB_SIZE - 1);
static constexpr size_t SUPERSLAB_BITS = SLAB_BITS + SLAB_COUNT_BITS;
static constexpr size_t RESERVE_SIZE = SUPERSLAB_SIZE * RESERVE_MULTIPLE;
static_assert((1ULL << SUPERSLAB_BITS) == SUPERSLAB_SIZE, "Sanity check");
// Number of slots for remote deallocation.
static constexpr size_t REMOTE_SLOT_BITS = 6;
static constexpr size_t REMOTE_SLOTS = 1 << REMOTE_SLOT_BITS;
static constexpr size_t REMOTE_MASK = REMOTE_SLOTS - 1;
static_assert(
INTERMEDIATE_BITS < MIN_ALLOC_BITS,
"INTERMEDIATE_BITS must be less than MIN_ALLOC_BITS");
static_assert(
MIN_ALLOC_SIZE >= (sizeof(void*) * 2),
"MIN_ALLOC_SIZE must be sufficient for two pointers");
static_assert(
SLAB_BITS <= (sizeof(uint16_t) * 8),
"SLAB_BITS must not be more than the bits in a uint16_t");
static_assert(
SLAB_COUNT == bits::next_pow2_const(SLAB_COUNT),
"SLAB_COUNT must be a power of 2");
static_assert(
SLAB_COUNT <= (UINT8_MAX + 1), "SLAB_COUNT must fit in a uint8_t");
} // namespace snmalloc

19
3rdparty/snmalloc/src/mem/allocslab.h поставляемый
Просмотреть файл

@ -1,19 +0,0 @@
#pragma once
#include "../mem/baseslab.h"
#include "remoteallocator.h"
namespace snmalloc
{
class Allocslab : public Baseslab
{
protected:
RemoteAllocator* allocator;
public:
RemoteAllocator* get_allocator()
{
return allocator;
}
};
} // namespace snmalloc

397
3rdparty/snmalloc/src/mem/allocstats.h поставляемый
Просмотреть файл

@ -1,397 +0,0 @@
#pragma once
#include "../ds/bits.h"
#include <cstdint>
#ifdef USE_SNMALLOC_STATS
# include "../ds/csv.h"
# include "sizeclass.h"
# include <cstring>
# include <iostream>
#endif
namespace snmalloc
{
template<size_t N, size_t LARGE_N>
struct AllocStats
{
struct CurrentMaxPair
{
size_t current = 0;
size_t max = 0;
size_t used = 0;
void inc()
{
current++;
used++;
if (current > max)
max++;
}
void dec()
{
current--;
}
bool is_empty()
{
return current == 0;
}
bool is_unused()
{
return max == 0;
}
void add(CurrentMaxPair& that)
{
current += that.current;
max += that.max;
used += that.used;
}
#ifdef USE_SNMALLOC_STATS
void print(CSVStream& csv, size_t multiplier = 1)
{
csv << current * multiplier << max * multiplier << used * multiplier;
}
#endif
};
struct Stats
{
CurrentMaxPair count;
CurrentMaxPair slab_count;
uint64_t time = bits::tick();
uint64_t ticks = 0;
double online_average = 0;
bool is_empty()
{
return count.is_empty();
}
void add(Stats& that)
{
count.add(that.count);
slab_count.add(that.slab_count);
}
void addToRunningAverage()
{
uint64_t now = bits::tick();
if (slab_count.current != 0)
{
double occupancy = static_cast<double>(count.current) /
static_cast<double>(slab_count.current);
uint64_t duration = now - time;
if (ticks == 0)
online_average = occupancy;
else
online_average += ((occupancy - online_average) * duration) / ticks;
ticks += duration;
}
time = now;
}
#ifdef USE_SNMALLOC_STATS
void
print(CSVStream& csv, size_t multiplier = 1, size_t slab_multiplier = 1)
{
// Keep in sync with header lower down
count.print(csv, multiplier);
slab_count.print(csv, slab_multiplier);
size_t average = static_cast<size_t>(online_average * multiplier);
csv << average << (slab_multiplier - average) * slab_count.max
<< csv.endl;
}
#endif
};
#ifdef USE_SNMALLOC_STATS
static constexpr size_t BUCKETS_BITS = 4;
static constexpr size_t BUCKETS = 1 << BUCKETS_BITS;
static constexpr size_t TOTAL_BUCKETS =
bits::to_exp_mant_const<BUCKETS_BITS>(
bits::one_at_bit(bits::ADDRESS_BITS - 1));
Stats sizeclass[N];
Stats large[LARGE_N];
size_t remote_freed = 0;
size_t remote_posted = 0;
size_t remote_received = 0;
size_t superslab_push_count = 0;
size_t superslab_pop_count = 0;
size_t superslab_fresh_count = 0;
size_t segment_count = 0;
size_t bucketed_requests[TOTAL_BUCKETS] = {};
#endif
void alloc_request(size_t size)
{
UNUSED(size);
#ifdef USE_SNMALLOC_STATS
bucketed_requests[bits::to_exp_mant<BUCKETS_BITS>(size)]++;
#endif
}
bool is_empty()
{
#ifdef USE_SNMALLOC_STATS
for (size_t i = 0; i < N; i++)
{
if (!sizeclass[i].is_empty())
return false;
}
for (size_t i = 0; i < LARGE_N; i++)
{
if (!large[i].is_empty())
return false;
}
return (remote_freed == remote_posted);
#else
return true;
#endif
}
void sizeclass_alloc(uint8_t sc)
{
UNUSED(sc);
#ifdef USE_SNMALLOC_STATS
sizeclass[sc].addToRunningAverage();
sizeclass[sc].count.inc();
#endif
}
void sizeclass_dealloc(uint8_t sc)
{
UNUSED(sc);
#ifdef USE_SNMALLOC_STATS
sizeclass[sc].addToRunningAverage();
sizeclass[sc].count.dec();
#endif
}
void large_alloc(size_t sc)
{
UNUSED(sc);
#ifdef USE_SNMALLOC_STATS
large[sc].count.inc();
#endif
}
void sizeclass_alloc_slab(uint8_t sc)
{
UNUSED(sc);
#ifdef USE_SNMALLOC_STATS
sizeclass[sc].addToRunningAverage();
sizeclass[sc].slab_count.inc();
#endif
}
void sizeclass_dealloc_slab(uint8_t sc)
{
UNUSED(sc);
#ifdef USE_SNMALLOC_STATS
sizeclass[sc].addToRunningAverage();
sizeclass[sc].slab_count.dec();
#endif
}
void large_dealloc(size_t sc)
{
UNUSED(sc);
#ifdef USE_SNMALLOC_STATS
large[sc].count.dec();
#endif
}
void segment_create()
{
#ifdef USE_SNMALLOC_STATS
segment_count++;
#endif
}
void superslab_pop()
{
#ifdef USE_SNMALLOC_STATS
superslab_pop_count++;
#endif
}
void superslab_push()
{
#ifdef USE_SNMALLOC_STATS
superslab_push_count++;
#endif
}
void superslab_fresh()
{
#ifdef USE_SNMALLOC_STATS
superslab_fresh_count++;
#endif
}
void remote_free(uint8_t sc)
{
UNUSED(sc);
#ifdef USE_SNMALLOC_STATS
remote_freed += sizeclass_to_size(sc);
#endif
}
void remote_post()
{
#ifdef USE_SNMALLOC_STATS
remote_posted = remote_freed;
#endif
}
void remote_receive(uint8_t sc)
{
UNUSED(sc);
#ifdef USE_SNMALLOC_STATS
remote_received += sizeclass_to_size(sc);
#endif
}
void add(AllocStats<N, LARGE_N>& that)
{
UNUSED(that);
#ifdef USE_SNMALLOC_STATS
for (size_t i = 0; i < N; i++)
sizeclass[i].add(that.sizeclass[i]);
for (size_t i = 0; i < LARGE_N; i++)
large[i].add(that.large[i]);
for (size_t i = 0; i < TOTAL_BUCKETS; i++)
bucketed_requests[i] += that.bucketed_requests[i];
remote_freed += that.remote_freed;
remote_posted += that.remote_posted;
remote_received += that.remote_received;
superslab_pop_count += that.superslab_pop_count;
superslab_push_count += that.superslab_push_count;
superslab_fresh_count += that.superslab_fresh_count;
segment_count += that.segment_count;
#endif
}
#ifdef USE_SNMALLOC_STATS
template<class Alloc>
void print(std::ostream& o, uint64_t dumpid = 0, uint64_t allocatorid = 0)
{
UNUSED(o);
UNUSED(dumpid);
UNUSED(allocatorid);
CSVStream csv(&o);
if (dumpid == 0)
{
// Output headers for initial dump
// Keep in sync with data dump
csv << "GlobalStats"
<< "DumpID"
<< "AllocatorID"
<< "Remote freed"
<< "Remote posted"
<< "Remote received"
<< "Superslab pop"
<< "Superslab push"
<< "Superslab fresh"
<< "Segments" << csv.endl;
csv << "BucketedStats"
<< "DumpID"
<< "AllocatorID"
<< "Size group"
<< "Size"
<< "Current count"
<< "Max count"
<< "Total Allocs"
<< "Current Slab bytes"
<< "Max Slab bytes"
<< "Total slab allocs"
<< "Average Slab Usage"
<< "Average wasted space" << csv.endl;
csv << "AllocSizes"
<< "DumpID"
<< "AllocatorID"
<< "ClassID"
<< "Low size"
<< "High size"
<< "Count" << csv.endl;
}
for (uint8_t i = 0; i < N; i++)
{
if (sizeclass[i].count.is_unused())
continue;
sizeclass[i].addToRunningAverage();
csv << "BucketedStats" << dumpid << allocatorid << i
<< sizeclass_to_size(i);
sizeclass[i].print(csv, sizeclass_to_size(i));
}
for (uint8_t i = 0; i < LARGE_N; i++)
{
if (large[i].count.is_unused())
continue;
csv << "BucketedStats" << dumpid << allocatorid << (i + N)
<< large_sizeclass_to_size(i);
large[i].print(csv, large_sizeclass_to_size(i));
}
size_t low = 0;
size_t high = 0;
for (size_t i = 0; i < TOTAL_BUCKETS; i++)
{
low = high + 1;
high = bits::from_exp_mant<BUCKETS_BITS>(i);
if (bucketed_requests[i] == 0)
continue;
csv << "AllocSizes" << dumpid << allocatorid << i << low << high
<< bucketed_requests[i] << csv.endl;
}
csv << "GlobalStats" << dumpid << allocatorid << remote_freed
<< remote_posted << remote_received << superslab_pop_count
<< superslab_push_count << superslab_fresh_count << segment_count
<< csv.endl;
}
#endif
};
} // namespace snmalloc

32
3rdparty/snmalloc/src/mem/baseslab.h поставляемый
Просмотреть файл

@ -1,32 +0,0 @@
#pragma once
#include "../ds/mpmcstack.h"
#include "allocconfig.h"
namespace snmalloc
{
enum SlabKind
{
Fresh = 0,
Large,
Medium,
Super,
/**
* If the decommit policy is lazy, slabs are moved to this state when all
* pages other than the first one have been decommitted.
*/
Decommitted
};
class Baseslab
{
protected:
SlabKind kind;
public:
SlabKind get_kind()
{
return kind;
}
};
} // namespace snmalloc

179
3rdparty/snmalloc/src/mem/globalalloc.h поставляемый
Просмотреть файл

@ -1,179 +0,0 @@
#pragma once
#include "../ds/helpers.h"
#include "alloc.h"
#include "pool.h"
namespace snmalloc
{
template<class MemoryProvider>
class AllocPool : Pool<Allocator<MemoryProvider>, MemoryProvider>
{
using Alloc = Allocator<MemoryProvider>;
using Parent = Pool<Allocator<MemoryProvider>, MemoryProvider>;
public:
static AllocPool* make(MemoryProvider& mp)
{
static_assert(
sizeof(AllocPool) == sizeof(Parent),
"You cannot add fields to this class.");
// This cast is safe due to the static assert.
return static_cast<AllocPool*>(Parent::make(mp));
}
static AllocPool* make() noexcept
{
return make(default_memory_provider);
}
Alloc* acquire()
{
return Parent::acquire(Parent::memory_provider);
}
void release(Alloc* a)
{
Parent::release(a);
}
public:
void aggregate_stats(Stats& stats)
{
auto* alloc = Parent::iterate();
while (alloc != nullptr)
{
stats.add(alloc->stats());
alloc = Parent::iterate(alloc);
}
}
#ifdef USE_SNMALLOC_STATS
void print_all_stats(std::ostream& o, uint64_t dumpid = 0)
{
auto alloc = Parent::iterate();
while (alloc != nullptr)
{
alloc->stats().template print<Alloc>(o, dumpid, alloc->id());
alloc = Parent::iterate(alloc);
}
}
#else
void print_all_stats(void*& o, uint64_t dumpid = 0)
{
UNUSED(o);
UNUSED(dumpid);
}
#endif
void cleanup_unused()
{
#ifndef USE_MALLOC
// Call this periodically to free and coalesce memory allocated by
// allocators that are not currently in use by any thread.
// One atomic operation to extract the stack, another to restore it.
// Handling the message queue for each stack is non-atomic.
auto* first = Parent::extract();
auto* alloc = first;
decltype(alloc) last;
if (alloc != nullptr)
{
while (alloc != nullptr)
{
alloc->handle_message_queue();
last = alloc;
alloc = Parent::extract(alloc);
}
restore(first, last);
}
#endif
}
void debug_check_empty()
{
#ifndef USE_MALLOC
// This is a debugging function. It checks that all memory from all
// allocators has been freed.
size_t alloc_count = 0;
auto* alloc = Parent::iterate();
// Count the linked allocators.
while (alloc != nullptr)
{
alloc = Parent::iterate(alloc);
alloc_count++;
}
bool done = false;
while (!done)
{
done = true;
alloc = Parent::iterate();
while (alloc != nullptr)
{
// Destroy the message queue so that it has no stub message.
Remote* p = alloc->message_queue().destroy();
while (p != nullptr)
{
Remote* next = p->non_atomic_next;
alloc->handle_dealloc_remote(p);
p = next;
}
// Place the static stub message on the queue.
alloc->init_message_queue();
// Post all remotes, including forwarded ones. If any allocator posts,
// repeat the loop.
if (alloc->remote.size > 0)
{
alloc->stats().remote_post();
alloc->remote.post(alloc->id());
done = false;
}
alloc = Parent::iterate(alloc);
}
}
alloc = Parent::iterate();
size_t empty_count = 0;
while (alloc != nullptr)
{
// Check that the allocator has freed all memory.
if (alloc->stats().is_empty())
empty_count++;
alloc = Parent::iterate(alloc);
}
if (alloc_count != empty_count)
error("Incorrect number of allocators");
#endif
}
};
inline AllocPool<GlobalVirtual>*& current_alloc_pool()
{
return Singleton<
AllocPool<GlobalVirtual>*,
AllocPool<GlobalVirtual>::make>::get();
}
template<class MemoryProvider>
inline AllocPool<MemoryProvider>* make_alloc_pool(MemoryProvider& mp)
{
return AllocPool<MemoryProvider>::make(mp);
}
using Alloc = Allocator<GlobalVirtual>;
} // namespace snmalloc

418
3rdparty/snmalloc/src/mem/largealloc.h поставляемый
Просмотреть файл

@ -1,418 +0,0 @@
#pragma once
#include "../ds/flaglock.h"
#include "../ds/helpers.h"
#include "../ds/mpmcstack.h"
#include "../pal/pal.h"
#include "allocstats.h"
#include "baseslab.h"
#include "sizeclass.h"
#include <new>
namespace snmalloc
{
template<class PAL>
class MemoryProviderStateMixin;
class Largeslab : public Baseslab
{
// This is the view of a contiguous memory area when it is being kept
// in the global size-classed caches of available contiguous memory areas.
private:
template<class a, Construction c>
friend class MPMCStack;
template<class PAL>
friend class MemoryProviderStateMixin;
std::atomic<Largeslab*> next;
public:
void init()
{
kind = Large;
}
};
/**
* A slab that has been decommitted. The first page remains committed and
* the only fields that are guaranteed to exist are the kind and next
* pointer from the superclass.
*/
struct Decommittedslab : public Largeslab
{
/**
* Constructor. Expected to be called via placement new into some memory
* that was formerly a superslab or large allocation and is now just some
* spare address space.
*/
Decommittedslab()
{
kind = Decommitted;
}
};
// This represents the state that the large allcoator needs to add to the
// global state of the allocator. This is currently stored in the memory
// provider, so we add this in.
template<class PAL>
class MemoryProviderStateMixin : public PAL
{
std::atomic_flag lock = ATOMIC_FLAG_INIT;
address_t bump;
size_t remaining;
void new_block()
{
size_t size = SUPERSLAB_SIZE;
void* r = reserve<false>(&size, SUPERSLAB_SIZE);
if (size < SUPERSLAB_SIZE)
error("out of memory");
PAL::template notify_using<NoZero>(r, OS_PAGE_SIZE);
bump = address_cast(r);
remaining = size;
}
/**
* The last time we saw a low memory notification.
*/
std::atomic<uint64_t> last_low_memory_epoch = 0;
std::atomic_flag lazy_decommit_guard;
void lazy_decommit()
{
// If another thread is try to do lazy decommit, let it continue. If
// we try to parallelise this, we'll most likely end up waiting on the
// same page table locks.
if (!lazy_decommit_guard.test_and_set())
{
return;
}
// When we hit low memory, iterate over size classes and decommit all of
// the memory that we can. Start with the small size classes so that we
// hit cached superslabs first.
// FIXME: We probably shouldn't do this all at once.
for (size_t large_class = 0; large_class < NUM_LARGE_CLASSES;
large_class++)
{
if (!PAL::expensive_low_memory_check())
{
break;
}
size_t rsize = bits::one_at_bit(SUPERSLAB_BITS) << large_class;
size_t decommit_size = rsize - OS_PAGE_SIZE;
// Grab all of the chunks of this size class.
auto* slab = large_stack[large_class].pop_all();
while (slab)
{
// Decommit all except for the first page and then put it back on
// the stack.
if (slab->get_kind() != Decommitted)
{
PAL::notify_not_using(
pointer_offset(slab, OS_PAGE_SIZE), decommit_size);
}
// Once we've removed these from the stack, there will be no
// concurrent accesses and removal should have established a
// happens-before relationship, so it's safe to use relaxed loads
// here.
auto next = slab->next.load(std::memory_order_relaxed);
large_stack[large_class].push(new (slab) Decommittedslab());
slab = next;
}
}
lazy_decommit_guard.clear();
}
public:
/**
* Stack of large allocations that have been returned for reuse.
*/
ModArray<NUM_LARGE_CLASSES, MPMCStack<Largeslab, PreZeroed>> large_stack;
/**
* Primitive allocator for structure that are required before
* the allocator can be running.
*/
template<typename T, size_t alignment, typename... Args>
T* alloc_chunk(Args&&... args)
{
// Cache line align
size_t size = bits::align_up(sizeof(T), 64);
void* p;
{
FlagLock f(lock);
auto aligned_bump = bits::align_up(bump, alignment);
if ((aligned_bump - bump) > remaining)
{
new_block();
}
else
{
remaining -= aligned_bump - bump;
bump = aligned_bump;
}
if (remaining < size)
{
new_block();
}
p = pointer_cast<void>(bump);
bump += size;
remaining -= size;
}
auto page_start = bits::align_down(address_cast(p), OS_PAGE_SIZE);
auto page_end = bits::align_up(address_cast(p) + size, OS_PAGE_SIZE);
PAL::template notify_using<NoZero>(
pointer_cast<void>(page_start), page_end - page_start);
return new (p) T(std::forward<Args...>(args)...);
}
/**
* Query whether the PAL supports a specific feature.
*/
template<PalFeatures F>
constexpr static bool pal_supports()
{
return (PAL::pal_features & F) == F;
}
/**
* Returns the number of low memory notifications that have been received
* (over the lifetime of this process). If the underlying system does not
* support low memory notifications, this will return 0.
*/
ALWAYSINLINE
uint64_t low_memory_epoch()
{
if constexpr (pal_supports<LowMemoryNotification>())
{
return PAL::low_memory_epoch();
}
else
{
return 0;
}
}
template<bool committed>
void* reserve(size_t* size, size_t align) noexcept
{
if constexpr (pal_supports<AlignedAllocation>())
{
return PAL::template reserve<committed>(size, align);
}
else
{
size_t request = *size;
// Add align, so we can guarantee to provide at least size.
request += align;
// Alignment must be a power of 2.
assert(align == bits::next_pow2(align));
void* p = PAL::template reserve<committed>(&request);
*size = request;
auto p0 = address_cast(p);
auto start = bits::align_up(p0, align);
if (start > p0)
{
uintptr_t end = bits::align_down(p0 + request, align);
*size = end - start;
PAL::notify_not_using(p, start - p0);
PAL::notify_not_using(pointer_cast<void>(end), (p0 + request) - end);
p = pointer_cast<void>(start);
}
return p;
}
}
ALWAYSINLINE void lazy_decommit_if_needed()
{
#ifdef TEST_LAZY_DECOMMIT
static_assert(
TEST_LAZY_DECOMMIT > 0,
"TEST_LAZY_DECOMMIT must be a positive integer value.");
static std::atomic<uint64_t> counter;
auto c = counter++;
if (c % TEST_LAZY_DECOMMIT == 0)
{
lazy_decommit();
}
#else
if constexpr (decommit_strategy == DecommitSuperLazy)
{
auto new_epoch = low_memory_epoch();
auto old_epoch = last_low_memory_epoch.load(std::memory_order_acquire);
if (new_epoch > old_epoch)
{
// Try to update the epoch to the value that we've seen. If
// another thread has seen a newer epoch than us (or done the same
// update) let them win.
do
{
if (last_low_memory_epoch.compare_exchange_strong(
old_epoch, new_epoch))
{
lazy_decommit();
}
} while (old_epoch <= new_epoch);
}
}
#endif
}
};
using Stats = AllocStats<NUM_SIZECLASSES, NUM_LARGE_CLASSES>;
enum AllowReserve
{
NoReserve,
YesReserve
};
template<class MemoryProvider>
class LargeAlloc
{
void* reserved_start = nullptr;
void* reserved_end = nullptr;
public:
// This will be a zero-size structure if stats are not enabled.
Stats stats;
MemoryProvider& memory_provider;
LargeAlloc(MemoryProvider& mp) : memory_provider(mp) {}
template<AllowReserve allow_reserve>
bool reserve_memory(size_t need, size_t add)
{
if ((address_cast(reserved_start) + need) > address_cast(reserved_end))
{
if constexpr (allow_reserve == YesReserve)
{
stats.segment_create();
reserved_start =
memory_provider.template reserve<false>(&add, SUPERSLAB_SIZE);
reserved_end = pointer_offset(reserved_start, add);
reserved_start = pointer_cast<void>(
bits::align_up(address_cast(reserved_start), SUPERSLAB_SIZE));
if (add < need)
return false;
}
else
{
return false;
}
}
return true;
}
template<ZeroMem zero_mem = NoZero, AllowReserve allow_reserve = YesReserve>
void* alloc(size_t large_class, size_t size)
{
size_t rsize = bits::one_at_bit(SUPERSLAB_BITS) << large_class;
if (size == 0)
size = rsize;
void* p = memory_provider.large_stack[large_class].pop();
memory_provider.lazy_decommit_if_needed();
if (p == nullptr)
{
assert(reserved_start <= reserved_end);
size_t add;
if ((rsize + SUPERSLAB_SIZE) < RESERVE_SIZE)
add = RESERVE_SIZE;
else
add = rsize + SUPERSLAB_SIZE;
if (!reserve_memory<allow_reserve>(rsize, add))
return nullptr;
p = reserved_start;
reserved_start = pointer_offset(p, rsize);
stats.superslab_fresh();
// All memory is zeroed since it comes from reserved space.
memory_provider.template notify_using<NoZero>(p, size);
}
else
{
stats.superslab_pop();
if constexpr (decommit_strategy == DecommitSuperLazy)
{
if (static_cast<Baseslab*>(p)->get_kind() == Decommitted)
{
// The first page is already in "use" for the stack element,
// this will need zeroing for a YesZero call.
if constexpr (zero_mem == YesZero)
memory_provider.template zero<true>(p, OS_PAGE_SIZE);
// Notify we are using the rest of the allocation.
// Passing zero_mem ensures the PAL provides zeroed pages if
// required.
memory_provider.template notify_using<zero_mem>(
pointer_offset(p, OS_PAGE_SIZE),
bits::align_up(size, OS_PAGE_SIZE) - OS_PAGE_SIZE);
}
else
{
if constexpr (zero_mem == YesZero)
memory_provider.template zero<true>(
p, bits::align_up(size, OS_PAGE_SIZE));
}
}
if ((decommit_strategy != DecommitNone) || (large_class > 0))
{
// The first page is already in "use" for the stack element,
// this will need zeroing for a YesZero call.
if constexpr (zero_mem == YesZero)
memory_provider.template zero<true>(p, OS_PAGE_SIZE);
// Notify we are using the rest of the allocation.
// Passing zero_mem ensures the PAL provides zeroed pages if required.
memory_provider.template notify_using<zero_mem>(
pointer_offset(p, OS_PAGE_SIZE),
bits::align_up(size, OS_PAGE_SIZE) - OS_PAGE_SIZE);
}
else
{
// This is a superslab that has not been decommitted.
if constexpr (zero_mem == YesZero)
memory_provider.template zero<true>(
p, bits::align_up(size, OS_PAGE_SIZE));
}
}
return p;
}
void dealloc(void* p, size_t large_class)
{
stats.superslab_push();
memory_provider.large_stack[large_class].push(static_cast<Largeslab*>(p));
memory_provider.lazy_decommit_if_needed();
}
};
using GlobalVirtual = MemoryProviderStateMixin<Pal>;
/**
* The memory provider that will be used if no other provider is explicitly
* passed as an argument.
*/
HEADER_GLOBAL GlobalVirtual default_memory_provider;
} // namespace snmalloc

132
3rdparty/snmalloc/src/mem/mediumslab.h поставляемый
Просмотреть файл

@ -1,132 +0,0 @@
#pragma once
#include "../ds/dllist.h"
#include "allocconfig.h"
#include "allocslab.h"
#include "sizeclass.h"
namespace snmalloc
{
class Mediumslab : public Allocslab
{
// This is the view of a 16 mb area when it is being used to allocate
// medium sized classes: 64 kb to 16 mb, non-inclusive.
private:
friend DLList<Mediumslab>;
// Keep the allocator pointer on a separate cache line. It is read by
// other threads, and does not change, so we avoid false sharing.
alignas(CACHELINE_SIZE) Mediumslab* next;
Mediumslab* prev;
uint16_t free;
uint8_t head;
uint8_t sizeclass;
uint16_t stack[SLAB_COUNT - 1];
public:
static constexpr uint32_t header_size()
{
static_assert(
sizeof(Mediumslab) < OS_PAGE_SIZE,
"Mediumslab header size must be less than the page size");
static_assert(
sizeof(Mediumslab) < SLAB_SIZE,
"Mediumslab header size must be less than the slab size");
// Always use a full page as the header, in order to get page sized
// alignment of individual allocations.
return OS_PAGE_SIZE;
}
static Mediumslab* get(void* p)
{
return pointer_cast<Mediumslab>(address_cast(p) & SUPERSLAB_MASK);
}
void init(RemoteAllocator* alloc, uint8_t sc, size_t rsize)
{
assert(sc >= NUM_SMALL_CLASSES);
assert((sc - NUM_SMALL_CLASSES) < NUM_MEDIUM_CLASSES);
allocator = alloc;
head = 0;
// If this was previously a Mediumslab of the same sizeclass, don't
// initialise the allocation stack.
if ((kind != Medium) || (sizeclass != sc))
{
sizeclass = sc;
uint16_t ssize = static_cast<uint16_t>(rsize >> 8);
kind = Medium;
free = medium_slab_free(sc);
for (uint16_t i = free; i > 0; i--)
stack[free - i] =
static_cast<uint16_t>((SUPERSLAB_SIZE >> 8) - (i * ssize));
}
else
{
assert(free == medium_slab_free(sc));
}
}
uint8_t get_sizeclass()
{
return sizeclass;
}
template<ZeroMem zero_mem, typename MemoryProvider>
void* alloc(size_t size, MemoryProvider& memory_provider)
{
assert(!full());
uint16_t index = stack[head++];
void* p = pointer_offset(this, (static_cast<size_t>(index) << 8));
free--;
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, OS_PAGE_SIZE));
size = bits::align_up(size, OS_PAGE_SIZE);
if constexpr (decommit_strategy == DecommitAll)
memory_provider.template notify_using<zero_mem>(p, size);
else if constexpr (zero_mem == YesZero)
memory_provider.template zero<true>(p, size);
return p;
}
template<typename MemoryProvider>
bool dealloc(void* p, MemoryProvider& memory_provider)
{
assert(head > 0);
// Returns true if the Mediumslab was full before this deallocation.
bool was_full = full();
free++;
stack[--head] = pointer_to_index(p);
if constexpr (decommit_strategy == DecommitAll)
memory_provider.notify_not_using(p, sizeclass_to_size(sizeclass));
return was_full;
}
bool full()
{
return free == 0;
}
bool empty()
{
return head == 0;
}
private:
uint16_t pointer_to_index(void* p)
{
// Get the offset from the slab for a memory location.
return static_cast<uint16_t>(
((address_cast(p) - address_cast(this))) >> 8);
}
};
} // namespace snmalloc

169
3rdparty/snmalloc/src/mem/metaslab.h поставляемый
Просмотреть файл

@ -1,169 +0,0 @@
#pragma once
#include "../ds/dllist.h"
#include "../ds/helpers.h"
#include "sizeclass.h"
namespace snmalloc
{
class Slab;
struct SlabLink
{
SlabLink* prev;
SlabLink* next;
Slab* get_slab()
{
return pointer_cast<Slab>(address_cast(this) & SLAB_MASK);
}
};
using SlabList = DLList<SlabLink>;
static_assert(
sizeof(SlabLink) <= MIN_ALLOC_SIZE,
"Need to be able to pack a SlabLink into any free small alloc");
// The Metaslab represent the status of a single slab.
// This can be either a short or a standard slab.
class Metaslab
{
private:
// How many entries are used in this slab.
uint16_t used = 0;
public:
// Bump free list of unused entries in this sizeclass.
// If the bottom bit is 1, then this represents a bump_ptr
// of where we have allocated up to in this slab. Otherwise,
// it represents the location of the first block in the free
// list. The free list is chained through deallocated blocks.
// It is terminated with a bump ptr.
//
// Note that, the first entry in a slab is never bump allocated
// but is used for the link. This means that 1 represents the fully
// bump allocated slab.
Mod<SLAB_SIZE, uint16_t> head;
// When a slab has free space it will be on the has space list for
// that size class. We use an empty block in this slab to be the
// doubly linked node into that size class's free list.
Mod<SLAB_SIZE, uint16_t> link;
uint8_t sizeclass;
// Initially zero to encode the superslabs relative list of slabs.
uint8_t next = 0;
void add_use()
{
used++;
}
void sub_use()
{
used--;
}
void set_unused()
{
used = 0;
}
bool is_unused()
{
return used == 0;
}
bool is_full()
{
return link == 1;
}
void set_full()
{
assert(head == 1);
assert(link != 1);
link = 1;
}
SlabLink* get_link(Slab* slab)
{
return reinterpret_cast<SlabLink*>(pointer_offset(slab, link));
}
bool valid_head(bool is_short)
{
size_t size = sizeclass_to_size(sizeclass);
size_t slab_start = get_initial_link(sizeclass, is_short);
size_t all_high_bits = ~static_cast<size_t>(1);
size_t head_start =
remove_cache_friendly_offset(head & all_high_bits, sizeclass);
return ((head_start - slab_start) % size) == 0;
}
void debug_slab_invariant(bool is_short, Slab* slab)
{
#if !defined(NDEBUG) && !defined(SNMALLOC_CHEAP_CHECKS)
size_t size = sizeclass_to_size(sizeclass);
size_t offset = get_initial_link(sizeclass, is_short);
size_t accounted_for = used * size + offset;
if (is_full())
{
// All the blocks must be used.
assert(SLAB_SIZE == accounted_for);
// There is no free list to validate
// 'link' value is not important if full.
return;
}
// Block is not full
assert(SLAB_SIZE > accounted_for);
// Walk bump-free-list-segment accounting for unused space
uint16_t curr = head;
while ((curr & 1) != 1)
{
// Check we are looking at a correctly aligned block
uint16_t start = remove_cache_friendly_offset(curr, sizeclass);
assert((start - offset) % size == 0);
// Account for free elements in free list
accounted_for += size;
assert(SLAB_SIZE >= accounted_for);
// We should never reach the link node in the free list.
assert(curr != link);
// Iterate bump/free list segment
curr = *reinterpret_cast<uint16_t*>(pointer_offset(slab, curr));
}
if (curr != 1)
{
// Check we terminated traversal on a correctly aligned block
uint16_t start = remove_cache_friendly_offset(curr & ~1, sizeclass);
assert((start - offset) % size == 0);
// Account for to be bump allocated space
accounted_for += SLAB_SIZE - (curr - 1);
// The link should be the first allocation as we
// haven't completely filled this block at any point.
assert(link == get_initial_link(sizeclass, is_short));
}
assert(!is_full());
// Add the link node.
accounted_for += size;
// All space accounted for
assert(SLAB_SIZE == accounted_for);
#else
UNUSED(slab);
UNUSED(is_short);
#endif
}
};
} // namespace snmalloc

360
3rdparty/snmalloc/src/mem/pagemap.h поставляемый
Просмотреть файл

@ -1,360 +0,0 @@
#pragma once
#include "../ds/bits.h"
#include "../ds/helpers.h"
#include <atomic>
#include <utility>
namespace snmalloc
{
static constexpr size_t PAGEMAP_NODE_BITS = 16;
static constexpr size_t PAGEMAP_NODE_SIZE = 1ULL << PAGEMAP_NODE_BITS;
/**
* Structure describing the configuration of a pagemap. When querying a
* pagemap from a different instantiation of snmalloc, the pagemap is exposed
* as a `void*`. This structure allows the caller to check whether the
* pagemap is of the format that they expect.
*/
struct PagemapConfig
{
/**
* The version of the pagemap structure. This is always 1 in existing
* versions of snmalloc. This will be incremented every time the format
* changes in an incompatible way. Changes to the format may add fields to
* the end of this structure.
*/
uint32_t version;
/**
* Is this a flat pagemap? If this field is false, the pagemap is the
* hierarchical structure.
*/
bool is_flat_pagemap;
/**
* Number of bytes in a pointer.
*/
uint8_t sizeof_pointer;
/**
* The number of bits of the address used to index into the pagemap.
*/
uint64_t pagemap_bits;
/**
* The size (in bytes) of a pagemap entry.
*/
size_t size_of_entry;
};
template<size_t GRANULARITY_BITS, typename T, T default_content>
class Pagemap
{
private:
static constexpr size_t COVERED_BITS =
bits::ADDRESS_BITS - GRANULARITY_BITS;
static constexpr size_t CONTENT_BITS =
bits::next_pow2_bits_const(sizeof(T));
static_assert(
PAGEMAP_NODE_BITS - CONTENT_BITS < COVERED_BITS,
"Should use the FlatPageMap as it does not require a tree");
static constexpr size_t BITS_FOR_LEAF = PAGEMAP_NODE_BITS - CONTENT_BITS;
static constexpr size_t ENTRIES_PER_LEAF = 1 << BITS_FOR_LEAF;
static constexpr size_t LEAF_MASK = ENTRIES_PER_LEAF - 1;
static constexpr size_t BITS_PER_INDEX_LEVEL =
PAGEMAP_NODE_BITS - POINTER_BITS;
static constexpr size_t ENTRIES_PER_INDEX_LEVEL = 1 << BITS_PER_INDEX_LEVEL;
static constexpr size_t ENTRIES_MASK = ENTRIES_PER_INDEX_LEVEL - 1;
static constexpr size_t INDEX_BITS =
BITS_FOR_LEAF > COVERED_BITS ? 0 : COVERED_BITS - BITS_FOR_LEAF;
static constexpr size_t INDEX_LEVELS = INDEX_BITS / BITS_PER_INDEX_LEVEL;
static constexpr size_t TOPLEVEL_BITS =
INDEX_BITS - (INDEX_LEVELS * BITS_PER_INDEX_LEVEL);
static constexpr size_t TOPLEVEL_ENTRIES = 1 << TOPLEVEL_BITS;
static constexpr size_t TOPLEVEL_SHIFT =
(INDEX_LEVELS * BITS_PER_INDEX_LEVEL) + BITS_FOR_LEAF + GRANULARITY_BITS;
// Value used to represent when a node is being added too
static constexpr InvalidPointer<1> LOCKED_ENTRY{};
struct Leaf
{
std::atomic<T> values[ENTRIES_PER_LEAF];
};
struct PagemapEntry
{
std::atomic<PagemapEntry*> entries[ENTRIES_PER_INDEX_LEVEL];
};
static_assert(
sizeof(PagemapEntry) == sizeof(Leaf), "Should be the same size");
static_assert(
sizeof(PagemapEntry) == PAGEMAP_NODE_SIZE, "Should be the same size");
// Init removed as not required as this is only ever a global
// cl is generating a memset of zero, which will be a problem
// in libc/ucrt bring up. On ucrt this will run after the first
// allocation.
// TODO: This is fragile that it is not being memset, and we should review
// to ensure we don't get bitten by this in the future.
std::atomic<PagemapEntry*> top[TOPLEVEL_ENTRIES]; // = {nullptr};
template<bool create_addr>
inline PagemapEntry* get_node(std::atomic<PagemapEntry*>* e, bool& result)
{
// The page map nodes are all allocated directly from the OS zero
// initialised with a system call. We don't need any ordered to guarantee
// to see that correctly.
PagemapEntry* value = e->load(std::memory_order_relaxed);
if ((value == nullptr) || (value == LOCKED_ENTRY))
{
if constexpr (create_addr)
{
value = nullptr;
if (e->compare_exchange_strong(
value, LOCKED_ENTRY, std::memory_order_relaxed))
{
auto& v = default_memory_provider;
value = v.alloc_chunk<PagemapEntry, OS_PAGE_SIZE>();
e->store(value, std::memory_order_release);
}
else
{
while (address_cast(e->load(std::memory_order_relaxed)) ==
LOCKED_ENTRY)
{
bits::pause();
}
value = e->load(std::memory_order_acquire);
}
}
else
{
result = false;
return nullptr;
}
}
result = true;
return value;
}
template<bool create_addr>
inline std::pair<Leaf*, size_t> get_leaf_index(uintptr_t addr, bool& result)
{
#ifdef FreeBSD_KERNEL
// Zero the top 16 bits - kernel addresses all have them set, but the
// data structure assumes that they're zero.
addr &= 0xffffffffffffULL;
#endif
size_t ix = addr >> TOPLEVEL_SHIFT;
size_t shift = TOPLEVEL_SHIFT;
std::atomic<PagemapEntry*>* e = &top[ix];
for (size_t i = 0; i < INDEX_LEVELS; i++)
{
PagemapEntry* value = get_node<create_addr>(e, result);
if (!result)
return std::pair(nullptr, 0);
shift -= BITS_PER_INDEX_LEVEL;
ix = (addr >> shift) & ENTRIES_MASK;
e = &value->entries[ix];
if constexpr (INDEX_LEVELS == 1)
{
UNUSED(i);
break;
}
i++;
if (i == INDEX_LEVELS)
break;
}
Leaf* leaf = reinterpret_cast<Leaf*>(get_node<create_addr>(e, result));
if (!result)
return std::pair(nullptr, 0);
shift -= BITS_FOR_LEAF;
ix = (addr >> shift) & LEAF_MASK;
return std::pair(leaf, ix);
}
template<bool create_addr>
inline std::atomic<T>* get_addr(uintptr_t p, bool& success)
{
auto leaf_ix = get_leaf_index<create_addr>(p, success);
return &(leaf_ix.first->values[leaf_ix.second]);
}
std::atomic<T>* get_ptr(uintptr_t p)
{
bool success;
return get_addr<true>(p, success);
}
public:
/**
* The pagemap configuration describing this instantiation of the template.
*/
static constexpr PagemapConfig config = {
1, false, sizeof(uintptr_t), GRANULARITY_BITS, sizeof(T)};
/**
* Cast a `void*` to a pointer to this template instantiation, given a
* config describing the configuration. Return null if the configuration
* passed does not correspond to this template instantiation.
*
* This intended to allow code that depends on the pagemap having a
* specific representation to fail gracefully.
*/
static Pagemap* cast_to_pagemap(void* pm, const PagemapConfig* c)
{
if (
(c->version != 1) || (c->is_flat_pagemap) ||
(c->sizeof_pointer != sizeof(uintptr_t)) ||
(c->pagemap_bits != GRANULARITY_BITS) ||
(c->size_of_entry != sizeof(T)) || (!std::is_integral_v<T>))
{
return nullptr;
}
return static_cast<Pagemap*>(pm);
}
/**
* Returns the index of a pagemap entry within a given page. This is used
* in code that propagates changes to the pagemap elsewhere.
*/
size_t index_for_address(uintptr_t p)
{
bool success;
return (OS_PAGE_SIZE - 1) &
reinterpret_cast<size_t>(get_addr<true>(p, success));
}
/**
* Returns the address of the page containing
*/
void* page_for_address(uintptr_t p)
{
bool success;
return reinterpret_cast<void*>(
~(OS_PAGE_SIZE - 1) &
reinterpret_cast<uintptr_t>(get_addr<true>(p, success)));
}
T get(uintptr_t p)
{
bool success;
auto addr = get_addr<false>(p, success);
if (!success)
return default_content;
return addr->load(std::memory_order_relaxed);
}
void set(uintptr_t p, T x)
{
bool success;
auto addr = get_addr<true>(p, success);
addr->store(x, std::memory_order_relaxed);
}
void set_range(uintptr_t p, T x, size_t length)
{
bool success;
do
{
auto leaf_ix = get_leaf_index<true>(p, success);
size_t ix = leaf_ix.second;
auto last = bits::min(LEAF_MASK + 1, ix + length);
auto diff = last - ix;
for (; ix < last; ix++)
{
SNMALLOC_ASSUME(leaf_ix.first != nullptr);
leaf_ix.first->values[ix] = x;
}
length = length - diff;
p = p + (diff << GRANULARITY_BITS);
} while (length > 0);
}
};
/**
* Simple pagemap that for each GRANULARITY_BITS of the address range
* stores a T.
**/
template<size_t GRANULARITY_BITS, typename T>
class FlatPagemap
{
private:
static constexpr size_t COVERED_BITS =
bits::ADDRESS_BITS - GRANULARITY_BITS;
static constexpr size_t CONTENT_BITS =
bits::next_pow2_bits_const(sizeof(T));
static constexpr size_t ENTRIES = 1ULL << (COVERED_BITS + CONTENT_BITS);
static constexpr size_t SHIFT = GRANULARITY_BITS;
std::atomic<T> top[ENTRIES];
public:
/**
* The pagemap configuration describing this instantiation of the template.
*/
static constexpr PagemapConfig config = {
1, true, sizeof(uintptr_t), GRANULARITY_BITS, sizeof(T)};
/**
* Cast a `void*` to a pointer to this template instantiation, given a
* config describing the configuration. Return null if the configuration
* passed does not correspond to this template instantiation.
*
* This intended to allow code that depends on the pagemap having a
* specific representation to fail gracefully.
*/
static FlatPagemap* cast_to_pagemap(void* pm, const PagemapConfig* c)
{
if (
(c->version != 1) || (!c->is_flat_pagemap) ||
(c->sizeof_pointer != sizeof(uintptr_t)) ||
(c->pagemap_bits != GRANULARITY_BITS) ||
(c->size_of_entry != sizeof(T)) || (!std::is_integral_v<T>))
{
return nullptr;
}
return static_cast<FlatPagemap*>(pm);
}
T get(uintptr_t p)
{
return top[p >> SHIFT].load(std::memory_order_relaxed);
}
void set(uintptr_t p, T x)
{
top[p >> SHIFT].store(x, std::memory_order_relaxed);
}
void set_range(uintptr_t p, T x, size_t length)
{
size_t index = p >> SHIFT;
do
{
top[index].store(x, std::memory_order_relaxed);
index++;
length--;
} while (length > 0);
}
};
} // namespace snmalloc

98
3rdparty/snmalloc/src/mem/pool.h поставляемый
Просмотреть файл

@ -1,98 +0,0 @@
#pragma once
#include "../ds/flaglock.h"
#include "../ds/mpmcstack.h"
#include "pooled.h"
namespace snmalloc
{
/**
* Pool of a particular type of object.
*
* This pool will never return objects to the OS. It maintains a list of all
* objects ever allocated that can be iterated (not concurrency safe). Pooled
* types can be acquired from the pool, and released back to the pool. This is
* concurrency safe.
*
* This is used to bootstrap the allocation of allocators.
**/
template<class T, class MemoryProvider = GlobalVirtual>
class Pool
{
private:
friend Pooled<T>;
template<typename TT>
friend class MemoryProviderStateMixin;
std::atomic_flag lock = ATOMIC_FLAG_INIT;
MPMCStack<T, PreZeroed> stack;
T* list = nullptr;
Pool(MemoryProvider& m) : memory_provider(m) {}
public:
MemoryProvider& memory_provider;
static Pool* make(MemoryProvider& memory_provider) noexcept
{
return memory_provider.template alloc_chunk<Pool, 0, MemoryProvider&>(
memory_provider);
}
static Pool* make() noexcept
{
return make(default_memory_provider);
}
template<typename... Args>
T* acquire(Args&&... args)
{
T* p = stack.pop();
if (p != nullptr)
return p;
p = memory_provider
.template alloc_chunk<T, bits::next_pow2_const(sizeof(T))>(
std::forward<Args...>(args)...);
FlagLock f(lock);
p->list_next = list;
list = p;
return p;
}
void release(T* p)
{
// The object's destructor is not run. If the object is "reallocated", it
// is returned without the constructor being run, so the object is reused
// without re-initialisation.
stack.push(p);
}
T* extract(T* p = nullptr)
{
// Returns a linked list of all objects in the stack, emptying the stack.
if (p == nullptr)
return stack.pop_all();
return p->next;
}
void restore(T* first, T* last)
{
// Pushes a linked list of objects onto the stack. Use to put a linked
// list returned by extract back onto the stack.
stack.push(first, last);
}
T* iterate(T* p = nullptr)
{
if (p == nullptr)
return list;
return p->list_next;
}
};
} // namespace snmalloc

21
3rdparty/snmalloc/src/mem/pooled.h поставляемый
Просмотреть файл

@ -1,21 +0,0 @@
#pragma once
#include "../ds/bits.h"
namespace snmalloc
{
template<class T>
class Pooled
{
private:
template<class TT, class MemoryProvider>
friend class Pool;
template<class TT, Construction c>
friend class MPMCStack;
/// Used by the pool for chaining together entries when not in use.
std::atomic<T*> next = nullptr;
/// Used by the pool to keep the list of all entries ever created.
T* list_next;
};
} // namespace snmalloc

50
3rdparty/snmalloc/src/mem/remoteallocator.h поставляемый
Просмотреть файл

@ -1,50 +0,0 @@
#pragma once
#include "../ds/mpscq.h"
#include "../mem/allocconfig.h"
#include "../mem/sizeclass.h"
#include <atomic>
namespace snmalloc
{
struct Remote
{
using alloc_id_t = size_t;
union
{
std::atomic<Remote*> next;
Remote* non_atomic_next;
};
alloc_id_t allocator_id;
void set_target_id(alloc_id_t id)
{
allocator_id = id;
}
alloc_id_t target_id()
{
return allocator_id;
}
};
static_assert(
sizeof(Remote) <= MIN_ALLOC_SIZE,
"Needs to be able to fit in smallest allocation.");
struct RemoteAllocator
{
using alloc_id_t = Remote::alloc_id_t;
// Store the message queue on a separate cacheline. It is mutable data that
// is read by other threads.
alignas(CACHELINE_SIZE) MPSCQ<Remote> message_queue;
alloc_id_t id()
{
return static_cast<alloc_id_t>(
reinterpret_cast<uintptr_t>(&message_queue));
}
};
} // namespace snmalloc

171
3rdparty/snmalloc/src/mem/sizeclass.h поставляемый
Просмотреть файл

@ -1,171 +0,0 @@
#pragma once
#include "allocconfig.h"
namespace snmalloc
{
constexpr static uint16_t get_initial_bumpptr(uint8_t sc, bool is_short);
constexpr static uint16_t get_initial_link(uint8_t sc, bool is_short);
constexpr static size_t sizeclass_to_size(uint8_t sizeclass);
constexpr static size_t sizeclass_to_cache_friendly_mask(uint8_t sizeclass);
constexpr static size_t sizeclass_to_inverse_cache_friendly_mask(uint8_t sc);
constexpr static uint16_t medium_slab_free(uint8_t sizeclass);
static inline uint8_t size_to_sizeclass(size_t size)
{
// Don't use sizeclasses that are not a multiple of the alignment.
// For example, 24 byte allocations can be
// problematic for some data due to alignment issues.
return static_cast<uint8_t>(
bits::to_exp_mant<INTERMEDIATE_BITS, MIN_ALLOC_BITS>(size));
}
constexpr static inline uint8_t size_to_sizeclass_const(size_t size)
{
// Don't use sizeclasses that are not a multiple of the alignment.
// For example, 24 byte allocations can be
// problematic for some data due to alignment issues.
return static_cast<uint8_t>(
bits::to_exp_mant_const<INTERMEDIATE_BITS, MIN_ALLOC_BITS>(size));
}
constexpr static inline size_t large_sizeclass_to_size(uint8_t large_class)
{
return bits::one_at_bit(large_class + SUPERSLAB_BITS);
}
// Small classes range from [MIN, SLAB], i.e. inclusive.
static constexpr size_t NUM_SMALL_CLASSES =
size_to_sizeclass_const(bits::one_at_bit(SLAB_BITS)) + 1;
static constexpr size_t NUM_SIZECLASSES =
size_to_sizeclass_const(SUPERSLAB_SIZE);
// Medium classes range from (SLAB, SUPERSLAB), i.e. non-inclusive.
static constexpr size_t NUM_MEDIUM_CLASSES =
NUM_SIZECLASSES - NUM_SMALL_CLASSES;
// Large classes range from [SUPERSLAB, ADDRESS_SPACE).
static constexpr size_t NUM_LARGE_CLASSES =
bits::ADDRESS_BITS - SUPERSLAB_BITS;
inline static size_t round_by_sizeclass(size_t rsize, size_t offset)
{
// check_same<NUM_LARGE_CLASSES, Globals::num_large_classes>();
// Must be called with a rounded size.
assert(sizeclass_to_size(size_to_sizeclass(rsize)) == rsize);
// Only works up to certain offsets, exhaustively tested upto
// SUPERSLAB_SIZE.
assert(offset <= SUPERSLAB_SIZE);
size_t align = bits::ctz(rsize);
size_t divider = rsize >> align;
// Maximum of 24 bits for 16MiB super/medium slab
if (INTERMEDIATE_BITS == 0 || divider == 1)
{
assert(divider == 1);
return offset & ~(rsize - 1);
}
if constexpr (bits::is64() && INTERMEDIATE_BITS <= 2)
{
// Only works for 64 bit multiplication, as the following will overflow in
// 32bit.
// The code is using reciprocal division, with a shift of 26 bits, this
// is considerably more bits than we need in the result. If SUPERSLABS
// get larger then we should review this code.
static_assert(SUPERSLAB_BITS <= 24, "The following code assumes 24 bits");
static constexpr size_t shift = 26;
size_t back_shift = shift + align;
static constexpr size_t mul_shift = 1ULL << shift;
static constexpr uint32_t constants[8] = {0,
mul_shift,
0,
(mul_shift / 3) + 1,
0,
(mul_shift / 5) + 1,
0,
(mul_shift / 7) + 1};
return ((constants[divider] * offset) >> back_shift) * rsize;
}
else
// Use 32-bit division as considerably faster than 64-bit, and
// everything fits into 32bits here.
return static_cast<uint32_t>(offset / rsize) * rsize;
}
inline static bool is_multiple_of_sizeclass(size_t rsize, size_t offset)
{
// Must be called with a rounded size.
assert(sizeclass_to_size(size_to_sizeclass(rsize)) == rsize);
// Only works up to certain offsets, exhaustively tested upto
// SUPERSLAB_SIZE.
assert(offset <= SUPERSLAB_SIZE);
size_t align = bits::ctz(rsize);
size_t divider = rsize >> align;
// Maximum of 24 bits for 16MiB super/medium slab
if (INTERMEDIATE_BITS == 0 || divider == 1)
{
assert(divider == 1);
return (offset & (rsize - 1)) == 0;
}
if constexpr (bits::is64() && INTERMEDIATE_BITS <= 2)
{
// Only works for 64 bit multiplication, as the following will overflow in
// 32bit.
// The code is using reciprocal division, with a shift of 26 bits, this
// is considerably more bits than we need in the result. If SUPERSLABS
// get larger then we should review this code.
static_assert(SUPERSLAB_BITS <= 24, "The following code assumes 24 bits");
static constexpr size_t shift = 31;
static constexpr size_t mul_shift = 1ULL << shift;
static constexpr uint32_t constants[8] = {0,
mul_shift,
0,
(mul_shift / 3) + 1,
0,
(mul_shift / 5) + 1,
0,
(mul_shift / 7) + 1};
// There is a long chain of zeros after the backshift
// However, not all zero so just check a range.
// This is exhaustively tested for the current use case
return (((constants[divider] * offset)) &
(((1ULL << (align + 3)) - 1) << (shift - 3))) == 0;
}
else
// Use 32-bit division as considerably faster than 64-bit, and
// everything fits into 32bits here.
return static_cast<uint32_t>(offset % rsize) == 0;
}
#ifdef CACHE_FRIENDLY_OFFSET
inline static void* remove_cache_friendly_offset(void* p, uint8_t sizeclass)
{
size_t mask = sizeclass_to_inverse_cache_friendly_mask(sizeclass);
return p = (void*)((uintptr_t)p & mask);
}
inline static uint16_t
remove_cache_friendly_offset(uint16_t relative, uint8_t sizeclass)
{
size_t mask = sizeclass_to_inverse_cache_friendly_mask(sizeclass);
return relative & mask;
}
#else
inline static void* remove_cache_friendly_offset(void* p, uint8_t sizeclass)
{
UNUSED(sizeclass);
return p;
}
inline static uint16_t
remove_cache_friendly_offset(uint16_t relative, uint8_t sizeclass)
{
UNUSED(sizeclass);
return relative;
}
#endif
} // namespace snmalloc

115
3rdparty/snmalloc/src/mem/sizeclasstable.h поставляемый
Просмотреть файл

@ -1,115 +0,0 @@
#pragma once
#include "../ds/helpers.h"
#include "superslab.h"
namespace snmalloc
{
struct SizeClassTable
{
ModArray<NUM_SIZECLASSES, size_t> size;
ModArray<NUM_SIZECLASSES, size_t> cache_friendly_mask;
ModArray<NUM_SIZECLASSES, size_t> inverse_cache_friendly_mask;
ModArray<NUM_SMALL_CLASSES, uint16_t> bump_ptr_start;
ModArray<NUM_SMALL_CLASSES, uint16_t> short_bump_ptr_start;
ModArray<NUM_SMALL_CLASSES, uint16_t> initial_link_ptr;
ModArray<NUM_SMALL_CLASSES, uint16_t> short_initial_link_ptr;
ModArray<NUM_MEDIUM_CLASSES, uint16_t> medium_slab_slots;
constexpr SizeClassTable()
: size(),
cache_friendly_mask(),
inverse_cache_friendly_mask(),
bump_ptr_start(),
short_bump_ptr_start(),
initial_link_ptr(),
short_initial_link_ptr(),
medium_slab_slots()
{
for (uint8_t sizeclass = 0; sizeclass < NUM_SIZECLASSES; sizeclass++)
{
size[sizeclass] =
bits::from_exp_mant<INTERMEDIATE_BITS, MIN_ALLOC_BITS>(sizeclass);
size_t alignment = bits::min(
bits::one_at_bit(bits::ctz_const(size[sizeclass])), OS_PAGE_SIZE);
cache_friendly_mask[sizeclass] = (alignment - 1);
inverse_cache_friendly_mask[sizeclass] = ~(alignment - 1);
}
size_t header_size = sizeof(Superslab);
size_t short_slab_size = SLAB_SIZE - header_size;
for (uint8_t i = 0; i < NUM_SMALL_CLASSES; i++)
{
// We align to the end of the block to remove special cases for the
// short block. Calculate remainders
size_t short_correction = short_slab_size % size[i];
size_t correction = SLAB_SIZE % size[i];
// First element in the block is the link
initial_link_ptr[i] = static_cast<uint16_t>(correction);
short_initial_link_ptr[i] =
static_cast<uint16_t>(header_size + short_correction);
// Move to object after link.
auto short_after_link = short_initial_link_ptr[i] + size[i];
size_t after_link = initial_link_ptr[i] + size[i];
// Bump ptr has bottom bit set.
// In case we only have one object on this slab check for wrap around.
short_bump_ptr_start[i] =
static_cast<uint16_t>((short_after_link + 1) % SLAB_SIZE);
bump_ptr_start[i] = static_cast<uint16_t>((after_link + 1) % SLAB_SIZE);
}
for (uint8_t i = NUM_SMALL_CLASSES; i < NUM_SIZECLASSES; i++)
{
medium_slab_slots[i - NUM_SMALL_CLASSES] = static_cast<uint16_t>(
(SUPERSLAB_SIZE - Mediumslab::header_size()) / size[i]);
}
}
};
static constexpr SizeClassTable sizeclass_metadata = SizeClassTable();
static inline constexpr uint16_t
get_initial_bumpptr(uint8_t sc, bool is_short)
{
if (is_short)
return sizeclass_metadata.short_bump_ptr_start[sc];
return sizeclass_metadata.bump_ptr_start[sc];
}
static inline constexpr uint16_t get_initial_link(uint8_t sc, bool is_short)
{
if (is_short)
return sizeclass_metadata.short_initial_link_ptr[sc];
return sizeclass_metadata.initial_link_ptr[sc];
}
constexpr static inline size_t sizeclass_to_size(uint8_t sizeclass)
{
return sizeclass_metadata.size[sizeclass];
}
constexpr static inline size_t
sizeclass_to_cache_friendly_mask(uint8_t sizeclass)
{
return sizeclass_metadata.cache_friendly_mask[sizeclass];
}
constexpr static inline size_t
sizeclass_to_inverse_cache_friendly_mask(uint8_t sizeclass)
{
return sizeclass_metadata.inverse_cache_friendly_mask[sizeclass];
}
constexpr static inline uint16_t medium_slab_free(uint8_t sizeclass)
{
return sizeclass_metadata
.medium_slab_slots[(sizeclass - NUM_SMALL_CLASSES)];
}
} // namespace snmalloc

166
3rdparty/snmalloc/src/mem/slab.h поставляемый
Просмотреть файл

@ -1,166 +0,0 @@
#pragma once
#include "superslab.h"
namespace snmalloc
{
class Slab
{
private:
uint16_t pointer_to_index(void* p)
{
// Get the offset from the slab for a memory location.
return static_cast<uint16_t>(address_cast(p) - address_cast(this));
}
public:
static Slab* get(void* p)
{
return pointer_cast<Slab>(address_cast(p) & SLAB_MASK);
}
Metaslab& get_meta()
{
Superslab* super = Superslab::get(this);
return super->get_meta(this);
}
SlabLink* get_link()
{
return get_meta().get_link(this);
}
template<ZeroMem zero_mem, typename MemoryProvider>
void* alloc(SlabList* sc, size_t rsize, MemoryProvider& memory_provider)
{
// Read the head from the metadata stored in the superslab.
Metaslab& meta = get_meta();
uint16_t head = meta.head;
assert(rsize == sizeclass_to_size(meta.sizeclass));
meta.debug_slab_invariant(is_short(), this);
assert(sc->get_head() == (SlabLink*)((size_t)this + meta.link));
assert(!meta.is_full());
meta.add_use();
void* p;
if ((head & 1) == 0)
{
void* node = pointer_offset(this, head);
// Read the next slot from the memory that's about to be allocated.
uint16_t next = *static_cast<uint16_t*>(node);
meta.head = next;
p = remove_cache_friendly_offset(node, meta.sizeclass);
}
else
{
if (meta.head == 1)
{
p = pointer_offset(this, meta.link);
sc->pop();
meta.set_full();
}
else
{
// This slab is being bump allocated.
p = pointer_offset(this, head - 1);
meta.head = (head + static_cast<uint16_t>(rsize)) & (SLAB_SIZE - 1);
}
}
meta.debug_slab_invariant(is_short(), this);
if constexpr (zero_mem == YesZero)
{
if (rsize < PAGE_ALIGNED_SIZE)
memory_provider.zero(p, rsize);
else
memory_provider.template zero<true>(p, rsize);
}
return p;
}
bool is_start_of_object(Superslab* super, void* p)
{
Metaslab& meta = super->get_meta(this);
return is_multiple_of_sizeclass(
sizeclass_to_size(meta.sizeclass),
address_cast(this) + SLAB_SIZE - address_cast(p));
}
// Returns true, if it alters get_status.
template<typename MemoryProvider>
inline typename Superslab::Action dealloc(
SlabList* sc, Superslab* super, void* p, MemoryProvider& memory_provider)
{
Metaslab& meta = super->get_meta(this);
bool was_full = meta.is_full();
meta.debug_slab_invariant(is_short(), this);
meta.sub_use();
if (was_full)
{
// We are not on the sizeclass list.
if (!meta.is_unused())
{
// Update the head and the sizeclass link.
uint16_t index = pointer_to_index(p);
assert(meta.head == 1);
meta.link = index;
// Push on the list of slabs for this sizeclass.
sc->insert(meta.get_link(this));
meta.debug_slab_invariant(is_short(), this);
}
else
{
// Dealloc on the superslab.
if (is_short())
return super->dealloc_short_slab(memory_provider);
return super->dealloc_slab(this, memory_provider);
}
}
else if (meta.is_unused())
{
// Remove from the sizeclass list and dealloc on the superslab.
sc->remove(meta.get_link(this));
if (is_short())
return super->dealloc_short_slab(memory_provider);
return super->dealloc_slab(this, memory_provider);
}
else
{
#ifndef NDEBUG
sc->debug_check_contains(meta.get_link(this));
#endif
// Update the head and the next pointer in the free list.
uint16_t head = meta.head;
uint16_t current = pointer_to_index(p);
// Set the head to the memory being deallocated.
meta.head = current;
assert(meta.valid_head(is_short()));
// Set the next pointer to the previous head.
*static_cast<uint16_t*>(p) = head;
meta.debug_slab_invariant(is_short(), this);
}
return Superslab::NoSlabReturn;
}
bool is_short()
{
return (address_cast(this) & SUPERSLAB_MASK) == address_cast(this);
}
};
} // namespace snmalloc

68
3rdparty/snmalloc/src/mem/slowalloc.h поставляемый
Просмотреть файл

@ -1,68 +0,0 @@
#include "globalalloc.h"
namespace snmalloc
{
/**
* RAII wrapper around an `Alloc`. This class gets an allocator from the
* global pool and wraps it so that `Alloc` methods can be called
* directly via the `->` operator on this class. When this object is
* destroyed, it returns the allocator to the global pool.
*
* This does not depend on thread-local storage working, so can be used for
* bootstrapping.
*/
struct SlowAllocator
{
/**
* The allocator that this wrapper will use.
*/
Alloc* alloc;
/**
* Constructor. Claims an allocator from the global pool
*/
SlowAllocator() : alloc(current_alloc_pool()->acquire()) {}
/**
* Copying is not supported, it could easily lead to accidental sharing of
* allocators.
*/
SlowAllocator(const SlowAllocator&) = delete;
/**
* Moving is not supported, though it would be easy to add if there's a use
* case for it.
*/
SlowAllocator(SlowAllocator&&) = delete;
/**
* Copying is not supported, it could easily lead to accidental sharing of
* allocators.
*/
SlowAllocator& operator=(const SlowAllocator&) = delete;
/**
* Moving is not supported, though it would be easy to add if there's a use
* case for it.
*/
SlowAllocator& operator=(SlowAllocator&&) = delete;
/**
* Destructor. Returns the allocator to the pool.
*/
~SlowAllocator()
{
current_alloc_pool()->release(alloc);
}
/**
* Arrow operator, allows methods exposed by `Alloc` to be called on the
* wrapper.
*/
Alloc* operator->()
{
return alloc;
}
};
/**
* Returns a new slow allocator. When the `SlowAllocator` goes out of scope,
* the underlying `Alloc` will be returned to the pool.
*/
inline SlowAllocator get_slow_allocator()
{
return SlowAllocator{};
}
} // namespace snmalloc

245
3rdparty/snmalloc/src/mem/superslab.h поставляемый
Просмотреть файл

@ -1,245 +0,0 @@
#pragma once
#include "../ds/helpers.h"
#include "allocslab.h"
#include "metaslab.h"
#include <new>
namespace snmalloc
{
class Superslab : public Allocslab
{
// This is the view of a 16 mb superslab when it is being used to allocate
// 64 kb slabs.
private:
friend DLList<Superslab>;
// Keep the allocator pointer on a separate cache line. It is read by
// other threads, and does not change, so we avoid false sharing.
alignas(CACHELINE_SIZE)
// The superslab is kept on a doubly linked list of superslabs which
// have some space.
Superslab* next;
Superslab* prev;
// This is a reference to the first unused slab in the free slab list
// It is does not contain the short slab, which is handled using a bit
// in the "used" field below. The list is terminated by pointing to
// the short slab.
// The head linked list has an absolute pointer for head, but the next
// pointers stores in the metaslabs are relative pointers, that is they
// are the relative offset to the next entry minus 1. This means that
// all zeros is a list that chains through all the blocks, so the zero
// initialised memory requires no more work.
Mod<SLAB_COUNT, uint8_t> head;
// Represents twice the number of full size slabs used
// plus 1 for the short slab. i.e. using 3 slabs and the
// short slab would be 6 + 1 = 7
uint16_t used;
ModArray<SLAB_COUNT, Metaslab> meta;
// Used size_t as results in better code in MSVC
size_t slab_to_index(Slab* slab)
{
auto res = ((address_cast(slab) - address_cast(this)) >> SLAB_BITS);
assert(res == (uint8_t)res);
return res;
}
public:
enum Status
{
Full,
Available,
OnlyShortSlabAvailable,
Empty
};
enum Action
{
NoSlabReturn = 0,
NoStatusChange = 1,
StatusChange = 2
};
static Superslab* get(void* p)
{
return pointer_cast<Superslab>(address_cast(p) & SUPERSLAB_MASK);
}
static bool is_short_sizeclass(uint8_t sizeclass)
{
constexpr uint8_t h = size_to_sizeclass_const(sizeof(Superslab));
return sizeclass <= h;
}
void init(RemoteAllocator* alloc)
{
allocator = alloc;
if (kind != Super)
{
// If this wasn't previously a Superslab, we need to set up the
// header.
kind = Super;
// Point head at the first non-short slab.
head = 1;
if (kind != Fresh)
{
// If this wasn't previously Fresh, we need to zero some things.
used = 0;
for (size_t i = 0; i < SLAB_COUNT; i++)
{
new (&(meta[i])) Metaslab();
}
}
#ifndef NDEBUG
auto curr = head;
for (size_t i = 0; i < SLAB_COUNT - used - 1; i++)
{
curr = (curr + meta[curr].next + 1) & (SLAB_COUNT - 1);
}
assert(curr == 0);
for (size_t i = 0; i < SLAB_COUNT; i++)
{
assert(meta[i].is_unused());
}
#endif
}
}
bool is_empty()
{
return used == 0;
}
bool is_full()
{
return (used == (((SLAB_COUNT - 1) << 1) + 1));
}
bool is_almost_full()
{
return (used >= ((SLAB_COUNT - 1) << 1));
}
Status get_status()
{
if (!is_almost_full())
{
if (!is_empty())
{
return Available;
}
return Empty;
}
if (!is_full())
{
return OnlyShortSlabAvailable;
}
return Full;
}
Metaslab& get_meta(Slab* slab)
{
return meta[slab_to_index(slab)];
}
template<typename MemoryProvider>
Slab* alloc_short_slab(uint8_t sizeclass, MemoryProvider& memory_provider)
{
if ((used & 1) == 1)
return alloc_slab(sizeclass, memory_provider);
meta[0].head = get_initial_bumpptr(sizeclass, true);
meta[0].sizeclass = sizeclass;
meta[0].link = get_initial_link(sizeclass, true);
if constexpr (decommit_strategy == DecommitAll)
{
memory_provider.template notify_using<NoZero>(
pointer_offset(this, OS_PAGE_SIZE), SLAB_SIZE - OS_PAGE_SIZE);
}
used++;
return (Slab*)this;
}
template<typename MemoryProvider>
Slab* alloc_slab(uint8_t sizeclass, MemoryProvider& memory_provider)
{
uint8_t h = head;
Slab* slab = pointer_cast<Slab>(
address_cast(this) + (static_cast<size_t>(h) << SLAB_BITS));
uint8_t n = meta[h].next;
meta[h].head = get_initial_bumpptr(sizeclass, false);
meta[h].sizeclass = sizeclass;
meta[h].link = get_initial_link(sizeclass, false);
head = h + n + 1;
used += 2;
if constexpr (decommit_strategy == DecommitAll)
{
memory_provider.template notify_using<NoZero>(slab, SLAB_SIZE);
}
return slab;
}
// Returns true, if this alters the value of get_status
template<typename MemoryProvider>
Action dealloc_slab(Slab* slab, MemoryProvider& memory_provider)
{
// This is not the short slab.
uint8_t index = static_cast<uint8_t>(slab_to_index(slab));
uint8_t n = head - index - 1;
meta[index].sizeclass = 0;
meta[index].next = n;
head = index;
bool was_almost_full = is_almost_full();
used -= 2;
if constexpr (decommit_strategy == DecommitAll)
memory_provider.notify_not_using(slab, SLAB_SIZE);
assert(meta[index].is_unused());
if (was_almost_full || is_empty())
return StatusChange;
return NoStatusChange;
}
// Returns true, if this alters the value of get_status
template<typename MemoryProvider>
Action dealloc_short_slab(MemoryProvider& memory_provider)
{
// This is the short slab.
if constexpr (decommit_strategy == DecommitAll)
{
memory_provider.notify_not_using(
pointer_offset(this, OS_PAGE_SIZE), SLAB_SIZE - OS_PAGE_SIZE);
}
bool was_full = is_full();
used--;
assert(meta[0].is_unused());
if (was_full || is_empty())
return StatusChange;
return NoStatusChange;
}
};
} // namespace snmalloc

313
3rdparty/snmalloc/src/mem/threadalloc.h поставляемый
Просмотреть файл

@ -1,313 +0,0 @@
#pragma once
#include "../ds/helpers.h"
#include "globalalloc.h"
#if defined(SNMALLOC_USE_THREAD_DESTRUCTOR) && \
defined(SNMALLOC_USE_THREAD_CLEANUP)
#error At most one out of SNMALLOC_USE_THREAD_CLEANUP and SNMALLOC_USE_THREAD_DESTRUCTOR may be defined.
#endif
#if !defined(_WIN32) && !defined(FreeBSD_KERNEL)
# include "pthread.h"
#endif
namespace snmalloc
{
extern "C" void _malloc_thread_cleanup(void);
#ifdef SNMALLOC_EXTERNAL_THREAD_ALLOC
/**
* Version of the `ThreadAlloc` interface that does no management of thread
* local state, and just assumes that "ThreadAllocUntyped::get" has been
* declared before including snmalloc.h. As it is included before, it cannot
* know the allocator type, hence the casting.
*
* This class is used only when snmalloc is compiled as part of a runtime,
* which has its own management of the thread local allocator pointer.
*/
class ThreadAllocUntypedWrapper
{
public:
static inline Alloc*& get()
{
return (Alloc*&)ThreadAllocUntyped::get();
}
};
#endif
/**
* Version of the `ThreadAlloc` interface that uses a hook provided by libc
* to destroy thread-local state. This is the ideal option, because it
* enforces ordering of destruction such that the malloc state is destroyed
* after anything that can allocate memory.
*
* This class is used only when snmalloc is compiled as part of a compatible
* libc (for example, FreeBSD libc).
*/
class ThreadAllocLibcCleanup
{
/**
* Libc will call `_malloc_thread_cleanup` just before a thread terminates.
* This function must be allowed to call back into this class to destroy
* the state.
*/
friend void _malloc_thread_cleanup(void);
/**
* Function called when the thread exits. This is guaranteed to be called
* precisely once per thread and releases the current allocator.
*/
static inline void exit()
{
if (auto* per_thread = get(false))
{
current_alloc_pool()->release(per_thread);
per_thread = nullptr;
}
}
public:
/**
* Returns a pointer to the allocator associated with this thread. If
* `create` is true, it will create an allocator if one does not exist,
* otherwise it will return `nullptr` in this case. This should be called
* with `create == false` only during thread teardown.
*
* The non-create case exists so that the `per_thread` variable can be a
* local static and not a global, allowing ODR to deduplicate it.
*/
static inline Alloc*& get(bool create = true)
{
static thread_local Alloc* per_thread;
if (!per_thread && create)
{
per_thread = current_alloc_pool()->acquire();
}
return per_thread;
}
};
/**
* Version of the `ThreadAlloc` interface that uses C++ `thread_local`
* destructors for cleanup. If a per-thread allocator is used during the
* destruction of other per-thread data, this class will create a new
* instance and register its destructor, so should eventually result in
* cleanup, but may result in allocators being returned to the global pool
* and then reacquired multiple times.
*
* This implementation depends on nothing outside of a working C++
* environment and so should be the simplest for initial bringup on an
* unsupported platform. It is currently used in the FreeBSD kernel version.
*/
class ThreadAllocThreadDestructor
{
/**
* A pointer to the allocator owned by this thread.
*/
Alloc* alloc;
/**
* Constructor. Acquires a new allocator and associates it with this
* object. There should be only one instance of this class per thread.
*/
ThreadAllocThreadDestructor() : alloc(current_alloc_pool()->acquire()) {}
/**
* Destructor. Releases the allocator owned by this thread.
*/
~ThreadAllocThreadDestructor()
{
current_alloc_pool()->release(alloc);
}
public:
/**
* Public interface, returns the allocator for this thread, constructing
* one if necessary.
*/
static inline Alloc*& get()
{
static thread_local ThreadAllocThreadDestructor per_thread;
return per_thread.alloc;
}
};
// When targeting the FreeBSD kernel, the pthread header exists, but the
// pthread symbols do not, so don't compile this because it will fail to
// link.
#ifndef FreeBSD_KERNEL
/**
* Version of the `ThreadAlloc` interface that uses thread-specific (POSIX
* threads) or Fiber-local (Windows) storage with an explicit destructor.
* Neither of the underlying mechanisms guarantee ordering, so the cleanup
* may be called before other cleanup functions or thread-local destructors.
*
* This implementation is used when using snmalloc as a library
* implementation of malloc, but not embedding it in C standard library.
* Using this implementation removes the dependency on a C++ runtime library.
*/
class ThreadAllocExplicitTLSCleanup
{
/**
* Cleanup function. This is registered with the operating system's
* thread- or fibre-local storage subsystem to clean up the per-thread
* allocator.
*/
static inline void
# ifdef _WIN32
NTAPI
# endif
thread_alloc_release(void* p)
{
Alloc** pp = static_cast<Alloc**>(p);
current_alloc_pool()->release(*pp);
*pp = nullptr;
}
# ifdef _WIN32
/**
* Key type used to identify fibre-local storage.
*/
using tls_key_t = DWORD;
/**
* On Windows, construct a new fibre-local storage allocation. This
* function must not be called more than once.
*/
static inline tls_key_t tls_key_create() noexcept
{
return FlsAlloc(thread_alloc_release);
}
/**
* On Windows, store a pointer to a `thread_local` pointer to an allocator
* into fibre-local storage. This function takes a pointer to the
* `thread_local` allocation, rather than to the pointee, so that the
* cleanup function can zero the pointer.
*
* This must not be called until after `tls_key_create` has returned.
*/
static inline void tls_set_value(tls_key_t key, Alloc** value)
{
FlsSetValue(key, static_cast<void*>(value));
}
# else
/**
* Key type used for thread-specific storage.
*/
using tls_key_t = pthread_key_t;
/**
* On POSIX systems, construct a new thread-specific storage allocation.
* This function must not be called more than once.
*/
static inline tls_key_t tls_key_create() noexcept
{
tls_key_t key;
pthread_key_create(&key, thread_alloc_release);
return key;
}
/**
* On POSIX systems, store a pointer to a `thread_local` pointer to an
* allocator into fibre-local storage. This function takes a pointer to
* the `thread_local` allocation, rather than to the pointee, so that the
* cleanup function can zero the pointer.
*
* This must not be called until after `tls_key_create` has returned.
*/
static inline void tls_set_value(tls_key_t key, Alloc** value)
{
pthread_setspecific(key, static_cast<void*>(value));
}
# endif
/**
* Private accessor to the per thread allocator
* Provides no checking for initialization
*/
static ALWAYSINLINE Alloc*& inner_get()
{
static thread_local Alloc* per_thread;
return per_thread;
}
# ifdef USE_SNMALLOC_STATS
static void print_stats()
{
Stats s;
current_alloc_pool()->aggregate_stats(s);
s.print<Alloc>(std::cout);
}
# endif
/**
* Private initialiser for the per thread allocator
*/
static NOINLINE Alloc*& inner_init()
{
Alloc*& per_thread = inner_get();
// If we don't have an allocator, construct one.
if (!per_thread)
{
// Construct the allocator and assign it to `per_thread` *before* doing
// anything else. This is important because `tls_key_create` may
// allocate memory and if we are providing the `malloc` implementation
// then this function must be re-entrant within a single thread. In
// this case, the second call to this function will simply return the
// allocator.
per_thread = current_alloc_pool()->acquire();
bool first = false;
tls_key_t key = Singleton<tls_key_t, tls_key_create>::get(&first);
// Associate the new allocator with the destructor.
tls_set_value(key, &per_thread);
# ifdef USE_SNMALLOC_STATS
// Allocator is up and running now, safe to call atexit.
if (first)
{
atexit(print_stats);
}
# else
UNUSED(first);
# endif
}
return per_thread;
}
public:
/**
* Public interface, returns the allocator for the current thread,
* constructing it if necessary.
*/
static ALWAYSINLINE Alloc*& get()
{
Alloc*& per_thread = inner_get();
if (per_thread != nullptr)
return per_thread;
// Slow path that performs initialization
return inner_init();
}
};
#endif
#ifdef SNMALLOC_USE_THREAD_CLEANUP
/**
* Entry point the allows libc to call into the allocator for per-thread
* cleanup.
*/
extern "C" void _malloc_thread_cleanup(void)
{
ThreadAllocLibcCleanup::exit();
}
using ThreadAlloc = ThreadAllocLibcCleanup;
#elif defined(SNMALLOC_USE_THREAD_DESTRUCTOR)
using ThreadAlloc = ThreadAllocThreadDestructor;
#elif defined(SNMALLOC_EXTERNAL_THREAD_ALLOC)
using ThreadAlloc = ThreadAllocUntypedWrapper;
#else
using ThreadAlloc = ThreadAllocExplicitTLSCleanup;
#endif
} // namespace snmalloc

269
3rdparty/snmalloc/src/override/malloc.cc поставляемый
Просмотреть файл

@ -1,269 +0,0 @@
#include "../mem/slowalloc.h"
#include "../snmalloc.h"
#include <errno.h>
#include <string.h>
using namespace snmalloc;
#ifndef SNMALLOC_EXPORT
# define SNMALLOC_EXPORT
#endif
#ifndef SNMALLOC_NAME_MANGLE
# define SNMALLOC_NAME_MANGLE(a) a
#endif
extern "C"
{
SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(__malloc_end_pointer)(void* ptr)
{
return Alloc::external_pointer<OnePastEnd>(ptr);
}
SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(malloc)(size_t size)
{
// Include size 0 in the first sizeclass.
size = ((size - 1) >> (bits::BITS - 1)) + size;
return ThreadAlloc::get()->alloc(size);
}
SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(free)(void* ptr)
{
if (ptr == nullptr)
return;
ThreadAlloc::get()->dealloc(ptr);
}
SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(calloc)(size_t nmemb, size_t size)
{
bool overflow = false;
size_t sz = bits::umul(size, nmemb, overflow);
if (overflow)
{
errno = ENOMEM;
return nullptr;
}
// Include size 0 in the first sizeclass.
sz = ((sz - 1) >> (bits::BITS - 1)) + sz;
return ThreadAlloc::get()->alloc<ZeroMem::YesZero>(sz);
}
SNMALLOC_EXPORT size_t SNMALLOC_NAME_MANGLE(malloc_usable_size)(void* ptr)
{
return Alloc::alloc_size(ptr);
}
SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(realloc)(void* ptr, size_t size)
{
if (size == (size_t)-1)
{
errno = ENOMEM;
return nullptr;
}
if (ptr == nullptr)
{
return SNMALLOC_NAME_MANGLE(malloc)(size);
}
if (size == 0)
{
SNMALLOC_NAME_MANGLE(free)(ptr);
return nullptr;
}
#ifndef NDEBUG
// This check is redundant, because the check in memcpy will fail if this
// is skipped, but it's useful for debugging.
if (Alloc::external_pointer<Start>(ptr) != ptr)
{
error(
"Calling realloc on pointer that is not to the start of an allocation");
}
#endif
size_t sz = Alloc::alloc_size(ptr);
// Keep the current allocation if the given size is in the same sizeclass.
if (sz == sizeclass_to_size(size_to_sizeclass(size)))
return ptr;
void* p = SNMALLOC_NAME_MANGLE(malloc)(size);
if (p != nullptr)
{
assert(p == Alloc::external_pointer<Start>(p));
sz = bits::min(size, sz);
memcpy(p, ptr, sz);
SNMALLOC_NAME_MANGLE(free)(ptr);
}
return p;
}
#ifndef __FreeBSD__
SNMALLOC_EXPORT void*
SNMALLOC_NAME_MANGLE(reallocarray)(void* ptr, size_t nmemb, size_t size)
{
bool overflow = false;
size_t sz = bits::umul(size, nmemb, overflow);
if (overflow)
{
errno = ENOMEM;
return nullptr;
}
return SNMALLOC_NAME_MANGLE(realloc)(ptr, sz);
}
#endif
SNMALLOC_EXPORT void*
SNMALLOC_NAME_MANGLE(aligned_alloc)(size_t alignment, size_t size)
{
assert((size % alignment) == 0);
(void)alignment;
return SNMALLOC_NAME_MANGLE(malloc)(size);
}
SNMALLOC_EXPORT void*
SNMALLOC_NAME_MANGLE(memalign)(size_t alignment, size_t size)
{
if (
(alignment == 0) || (alignment == size_t(-1)) ||
(alignment > SUPERSLAB_SIZE))
{
errno = EINVAL;
return nullptr;
}
if ((size + alignment) < size)
{
errno = ENOMEM;
return nullptr;
}
size = bits::max(size, alignment);
uint8_t sc = size_to_sizeclass(size);
if (sc >= NUM_SIZECLASSES)
{
// large allocs are 16M aligned.
return SNMALLOC_NAME_MANGLE(malloc)(size);
}
for (; sc < NUM_SIZECLASSES; sc++)
{
size = sizeclass_to_size(sc);
if ((size & (~size + 1)) >= alignment)
{
return SNMALLOC_NAME_MANGLE(aligned_alloc)(alignment, size);
}
}
return SNMALLOC_NAME_MANGLE(malloc)(SUPERSLAB_SIZE);
}
SNMALLOC_EXPORT int SNMALLOC_NAME_MANGLE(posix_memalign)(
void** memptr, size_t alignment, size_t size)
{
if (
((alignment % sizeof(uintptr_t)) != 0) ||
((alignment & (alignment - 1)) != 0) || (alignment == 0))
{
return EINVAL;
}
void* p = SNMALLOC_NAME_MANGLE(memalign)(alignment, size);
if (p == nullptr)
{
return ENOMEM;
}
*memptr = p;
return 0;
}
#ifndef __FreeBSD__
SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(valloc)(size_t size)
{
return SNMALLOC_NAME_MANGLE(memalign)(OS_PAGE_SIZE, size);
}
#endif
SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(pvalloc)(size_t size)
{
if (size == size_t(-1))
{
errno = ENOMEM;
return nullptr;
}
return SNMALLOC_NAME_MANGLE(memalign)(
OS_PAGE_SIZE, (size + OS_PAGE_SIZE - 1) & ~(OS_PAGE_SIZE - 1));
}
// Stub implementations for jemalloc compatibility.
// These are called by FreeBSD's libthr (pthreads) to notify malloc of
// various events. They are currently unused, though we may wish to reset
// statistics on fork if built with statistics.
SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(_malloc_prefork)(void) {}
SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(_malloc_postfork)(void) {}
SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(_malloc_first_thread)(void) {}
SNMALLOC_EXPORT int
SNMALLOC_NAME_MANGLE(mallctl)(const char*, void*, size_t*, void*, size_t)
{
return ENOENT;
}
#ifdef SNMALLOC_EXPOSE_PAGEMAP
/**
* Export the pagemap. The return value is a pointer to the pagemap
* structure. The argument is used to return a pointer to a `PagemapConfig`
* structure describing the type of the pagemap. Static methods on the
* concrete pagemap templates can then be used to safely cast the return from
* this function to the correct type. This allows us to preserve some
* semblance of ABI safety via a pure C API.
*/
SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(snmalloc_pagemap_global_get)(
PagemapConfig const** config)
{
if (config)
{
*config = &decltype(snmalloc::global_pagemap)::config;
assert(
decltype(snmalloc::global_pagemap)::cast_to_pagemap(
&snmalloc::global_pagemap, *config) == &snmalloc::global_pagemap);
}
return &snmalloc::global_pagemap;
}
#endif
#ifdef SNMALLOC_EXPOSE_RESERVE
SNMALLOC_EXPORT void*
SNMALLOC_NAME_MANGLE(snmalloc_reserve_shared)(size_t* size, size_t align)
{
return snmalloc::default_memory_provider.reserve<true>(size, align);
}
#endif
#if !defined(__PIC__) && !defined(NO_BOOTSTRAP_ALLOCATOR)
// The following functions are required to work before TLS is set up, in
// statically-linked programs. These temporarily grab an allocator from the
// pool and return it.
void* __je_bootstrap_malloc(size_t size)
{
return get_slow_allocator()->alloc(size);
}
void* __je_bootstrap_calloc(size_t nmemb, size_t size)
{
bool overflow = false;
size_t sz = bits::umul(size, nmemb, overflow);
if (overflow)
{
errno = ENOMEM;
return nullptr;
}
// Include size 0 in the first sizeclass.
sz = ((sz - 1) >> (bits::BITS - 1)) + sz;
return get_slow_allocator()->alloc<ZeroMem::YesZero>(sz);
}
void __je_bootstrap_free(void* ptr)
{
get_slow_allocator()->dealloc(ptr);
}
#endif
}

67
3rdparty/snmalloc/src/override/new.cc поставляемый
Просмотреть файл

@ -1,67 +0,0 @@
#include "../mem/alloc.h"
#include "../mem/threadalloc.h"
#include "../snmalloc.h"
#ifdef _WIN32
# define EXCEPTSPEC
#else
# ifdef _GLIBCXX_USE_NOEXCEPT
# define EXCEPTSPEC _GLIBCXX_USE_NOEXCEPT
# elif defined(_NOEXCEPT)
# define EXCEPTSPEC _NOEXCEPT
# else
# define EXCEPTSPEC
# endif
#endif
using namespace snmalloc;
void* operator new(size_t size)
{
return ThreadAlloc::get()->alloc(size);
}
void* operator new[](size_t size)
{
return ThreadAlloc::get()->alloc(size);
}
void* operator new(size_t size, std::nothrow_t&)
{
return ThreadAlloc::get()->alloc(size);
}
void* operator new[](size_t size, std::nothrow_t&)
{
return ThreadAlloc::get()->alloc(size);
}
void operator delete(void* p)EXCEPTSPEC
{
ThreadAlloc::get()->dealloc(p);
}
void operator delete(void* p, size_t size)EXCEPTSPEC
{
ThreadAlloc::get()->dealloc(p, size);
}
void operator delete(void* p, std::nothrow_t&)
{
ThreadAlloc::get()->dealloc(p);
}
void operator delete[](void* p) EXCEPTSPEC
{
ThreadAlloc::get()->dealloc(p);
}
void operator delete[](void* p, size_t size) EXCEPTSPEC
{
ThreadAlloc::get()->dealloc(p, size);
}
void operator delete[](void* p, std::nothrow_t&)
{
ThreadAlloc::get()->dealloc(p);
}

53
3rdparty/snmalloc/src/pal/pal.h поставляемый
Просмотреть файл

@ -1,53 +0,0 @@
#pragma once
#include "pal_consts.h"
namespace snmalloc
{
void error(const char* const str);
} // namespace snmalloc
// If simultating OE, then we need the underlying platform
#if !defined(OPEN_ENCLAVE) || defined(OPEN_ENCLAVE_SIMULATION)
# include "pal_apple.h"
# include "pal_free_bsd_kernel.h"
# include "pal_freebsd.h"
# include "pal_linux.h"
# include "pal_windows.h"
#endif
#include "pal_open_enclave.h"
#include "pal_plain.h"
namespace snmalloc
{
#if !defined(OPEN_ENCLAVE) || defined(OPEN_ENCLAVE_SIMULATION)
using DefaultPal =
# if defined(_WIN32)
PALWindows;
# elif defined(__APPLE__)
PALApple;
# elif defined(__linux__)
PALLinux;
# elif defined(FreeBSD_KERNEL)
PALFreeBSDKernel;
# elif defined(__FreeBSD__)
PALFBSD;
# else
# error Unsupported platform
# endif
#endif
using Pal =
#if defined(SNMALLOC_MEMORY_PROVIDER)
PALPlainMixin<SNMALLOC_MEMORY_PROVIDER>;
#elif defined(OPEN_ENCLAVE)
PALPlainMixin<PALOpenEnclave>;
#else
DefaultPal;
#endif
inline void error(const char* const str)
{
Pal::error(str);
}
} // namespace snmalloc

89
3rdparty/snmalloc/src/pal/pal_apple.h поставляемый
Просмотреть файл

@ -1,89 +0,0 @@
#pragma once
#ifdef __APPLE__
# include "../ds/bits.h"
# include "../mem/allocconfig.h"
# include <pthread.h>
# include <strings.h>
# include <sys/mman.h>
extern "C" int puts(const char* str);
namespace snmalloc
{
/**
* PAL implementation for Apple systems (macOS, iOS, watchOS, tvOS...).
*/
class PALApple
{
public:
/**
* Bitmap of PalFeatures flags indicating the optional features that this
* PAL supports.
*/
static constexpr uint64_t pal_features = 0;
static void error(const char* const str)
{
puts(str);
abort();
}
/// Notify platform that we will not be using these pages
void notify_not_using(void* p, size_t size) noexcept
{
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, size));
madvise(p, size, MADV_FREE);
}
/// Notify platform that we will be using these pages
template<ZeroMem zero_mem>
void notify_using(void* p, size_t size) noexcept
{
assert(
bits::is_aligned_block<OS_PAGE_SIZE>(p, size) || (zero_mem == NoZero));
if constexpr (zero_mem == YesZero)
zero(p, size);
}
/// OS specific function for zeroing memory
template<bool page_aligned = false>
void zero(void* p, size_t size) noexcept
{
if (page_aligned || bits::is_aligned_block<OS_PAGE_SIZE>(p, size))
{
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, size));
void* r = mmap(
p,
size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
-1,
0);
if (r != MAP_FAILED)
return;
}
bzero(p, size);
}
template<bool committed>
void* reserve(size_t* size) noexcept
{
void* p = mmap(
NULL,
*size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1,
0);
if (p == MAP_FAILED)
error("Out of memory");
return p;
}
};
}
#endif

45
3rdparty/snmalloc/src/pal/pal_consts.h поставляемый
Просмотреть файл

@ -1,45 +0,0 @@
#pragma once
namespace snmalloc
{
/**
* Flags in a bitfield of optional features that a PAL may support. These
* should be set in the PAL's `pal_features` static constexpr field.
*/
enum PalFeatures : uint64_t
{
/**
* This PAL supports low memory notifications. It must implement a
* `low_memory_epoch` method that returns a `uint64_t` of the number of
* times that a low-memory notification has been raised and an
* `expensive_low_memory_check()` method that returns a `bool` indicating
* whether low memory conditions are still in effect.
*/
LowMemoryNotification = (1 << 0),
/**
* This PAL natively supports allocation with a guaranteed alignment. If
* this is not supported, then we will over-allocate and round the
* allocation.
*
* A PAL that does supports this must expose a `request()` method that takes
* a size and alignment. A PAL that does *not* support it must expose a
* `request()` method that takes only a size.
*/
AlignedAllocation = (1 << 1)
};
/**
* Flag indicating whether requested memory should be zeroed.
*/
enum ZeroMem
{
/**
* Memory should not be zeroed, contents are undefined.
*/
NoZero,
/**
* Memory must be zeroed. This can be lazily allocated via a copy-on-write
* mechanism as long as any load from the memory returns zero.
*/
YesZero
};
} // namespace snmalloc

Просмотреть файл

@ -1,95 +0,0 @@
#pragma once
#include "../ds/bits.h"
#include "../mem/allocconfig.h"
#if defined(FreeBSD_KERNEL)
extern "C"
{
# include <sys/vmem.h>
# include <vm/vm.h>
# include <vm/vm_extern.h>
# include <vm/vm_kern.h>
# include <vm/vm_object.h>
# include <vm/vm_param.h>
}
namespace snmalloc
{
class PALFreeBSDKernel
{
vm_offset_t get_vm_offset(uint_ptr_t p)
{
return static_cast<vm_offset_t>(reinterpret_cast<uintptr_t>(p));
}
public:
/**
* Bitmap of PalFeatures flags indicating the optional features that this
* PAL supports.
*/
static constexpr uint64_t pal_features = AlignedAllocation;
void error(const char* const str)
{
panic("snmalloc error: %s", str);
}
/// Notify platform that we will not be using these pages
void notify_not_using(void* p, size_t size)
{
vm_offset_t addr = get_vm_offset(p);
kmem_unback(kernel_object, addr, size);
}
/// Notify platform that we will be using these pages
template<ZeroMem zero_mem>
void notify_using(void* p, size_t size)
{
vm_offset_t addr = get_vm_offset(p);
int flags = M_WAITOK | ((zero_mem == YesZero) ? M_ZERO : 0);
if (kmem_back(kernel_object, addr, size, flags) != KERN_SUCCESS)
{
error("Out of memory");
}
}
/// OS specific function for zeroing memory
template<bool page_aligned = false>
void zero(void* p, size_t size)
{
::bzero(p, size);
}
template<bool committed>
void* reserve(size_t* size, size_t align)
{
size_t request = *size;
vm_offset_t addr;
if (vmem_xalloc(
kernel_arena,
request,
align,
0,
0,
VMEM_ADDR_MIN,
VMEM_ADDR_MAX,
M_BESTFIT,
&addr))
{
return nullptr;
}
if (committed)
{
if (
kmem_back(kernel_object, addr, request, M_ZERO | M_WAITOK) !=
KERN_SUCCESS)
{
vmem_xfree(kernel_arena, addr, request);
return nullptr;
}
}
return get_vm_offset(addr);
}
};
}
#endif

95
3rdparty/snmalloc/src/pal/pal_freebsd.h поставляемый
Просмотреть файл

@ -1,95 +0,0 @@
#pragma once
#if defined(__FreeBSD__) && !defined(_KERNEL)
# include "../ds/bits.h"
# include "../mem/allocconfig.h"
# include <stdio.h>
# include <strings.h>
# include <sys/mman.h>
namespace snmalloc
{
class PALFBSD
{
public:
/**
* Bitmap of PalFeatures flags indicating the optional features that this
* PAL supports.
*/
static constexpr uint64_t pal_features = AlignedAllocation;
static void error(const char* const str)
{
puts(str);
abort();
}
/// Notify platform that we will not be using these pages
void notify_not_using(void* p, size_t size) noexcept
{
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, size));
madvise(p, size, MADV_FREE);
}
/// Notify platform that we will be using these pages
template<ZeroMem zero_mem>
void notify_using(void* p, size_t size) noexcept
{
assert(
bits::is_aligned_block<OS_PAGE_SIZE>(p, size) || (zero_mem == NoZero));
if constexpr (zero_mem == YesZero)
zero(p, size);
}
/// OS specific function for zeroing memory
template<bool page_aligned = false>
void zero(void* p, size_t size) noexcept
{
if (page_aligned || bits::is_aligned_block<OS_PAGE_SIZE>(p, size))
{
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, size));
void* r = mmap(
p,
size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
-1,
0);
if (r != MAP_FAILED)
return;
}
bzero(p, size);
}
template<bool committed>
void* reserve(const size_t* size, size_t align) noexcept
{
size_t request = *size;
// Alignment must be a power of 2.
assert(align == bits::next_pow2(align));
if (align == 0)
{
align = 1;
}
size_t log2align = bits::next_pow2_bits(align);
void* p = mmap(
nullptr,
request,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED(log2align),
-1,
0);
if (p == MAP_FAILED)
error("Out of memory");
return p;
}
};
} // namespace snmalloc
#endif

87
3rdparty/snmalloc/src/pal/pal_linux.h поставляемый
Просмотреть файл

@ -1,87 +0,0 @@
#pragma once
#if defined(__linux__)
# include "../ds/bits.h"
# include "../mem/allocconfig.h"
# include <string.h>
# include <sys/mman.h>
extern "C" int puts(const char* str);
namespace snmalloc
{
class PALLinux
{
public:
/**
* Bitmap of PalFeatures flags indicating the optional features that this
* PAL supports.
*/
static constexpr uint64_t pal_features = 0;
static void error(const char* const str)
{
puts(str);
abort();
}
/// Notify platform that we will not be using these pages
void notify_not_using(void* p, size_t size) noexcept
{
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, size));
// Do nothing. Don't call madvise here, as the system call slows the
// allocator down too much.
UNUSED(p);
UNUSED(size);
}
/// Notify platform that we will be using these pages
template<ZeroMem zero_mem>
void notify_using(void* p, size_t size) noexcept
{
assert(
bits::is_aligned_block<OS_PAGE_SIZE>(p, size) || (zero_mem == NoZero));
if constexpr (zero_mem == YesZero)
zero<true>(p, size);
else
{
UNUSED(p);
UNUSED(size);
}
}
/// OS specific function for zeroing memory
template<bool page_aligned = false>
void zero(void* p, size_t size) noexcept
{
if (page_aligned || bits::is_aligned_block<OS_PAGE_SIZE>(p, size))
{
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, size));
madvise(p, size, MADV_DONTNEED);
}
else
{
::memset(p, 0, size);
}
}
template<bool committed>
void* reserve(const size_t* size) noexcept
{
void* p = mmap(
nullptr,
*size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1,
0);
if (p == MAP_FAILED)
error("Out of memory");
return p;
}
};
} // namespace snmalloc
#endif

63
3rdparty/snmalloc/src/pal/pal_open_enclave.h поставляемый
Просмотреть файл

@ -1,63 +0,0 @@
#pragma once
#include "pal_plain.h"
#ifdef OPEN_ENCLAVE
extern "C" const void* __oe_get_heap_base();
extern "C" const void* __oe_get_heap_end();
extern "C" void* oe_memset(void* p, int c, size_t size);
extern "C" void oe_abort();
namespace snmalloc
{
class PALOpenEnclave
{
std::atomic<uintptr_t> oe_base;
public:
/**
* Bitmap of PalFeatures flags indicating the optional features that this
* PAL supports.
*/
static constexpr uint64_t pal_features = AlignedAllocation;
static void error(const char* const str)
{
UNUSED(str);
oe_abort();
}
template<bool committed>
void* reserve(size_t* size, size_t align) noexcept
{
if (oe_base == 0)
{
uintptr_t dummy = 0;
oe_base.compare_exchange_strong(dummy, (uintptr_t)__oe_get_heap_base());
}
uintptr_t old_base = oe_base;
uintptr_t old_base2 = old_base;
uintptr_t next_base;
auto end = (uintptr_t)__oe_get_heap_end();
do
{
old_base2 = old_base;
auto new_base = bits::align_up(old_base, align);
next_base = new_base + *size;
if (next_base > end)
error("Out of memory");
} while (oe_base.compare_exchange_strong(old_base, next_base));
*size = next_base - old_base2;
return (void*)old_base;
}
template<bool page_aligned = false>
void zero(void* p, size_t size) noexcept
{
oe_memset(p, 0, size);
}
};
}
#endif

32
3rdparty/snmalloc/src/pal/pal_plain.h поставляемый
Просмотреть файл

@ -1,32 +0,0 @@
#pragma once
#include "../ds/bits.h"
#include "../mem/allocconfig.h"
namespace snmalloc
{
// Can be extended
// Will require a reserve method in subclasses.
template<class State>
class PALPlainMixin : public State
{
public:
// Notify platform that we will not be using these pages
void notify_not_using(void*, size_t) noexcept {}
// Notify platform that we will not be using these pages
template<ZeroMem zero_mem>
void notify_using(void* p, size_t size) noexcept
{
if constexpr (zero_mem == YesZero)
{
State::zero(p, size);
}
else
{
UNUSED(p);
UNUSED(size);
}
}
};
} // namespace snmalloc

238
3rdparty/snmalloc/src/pal/pal_windows.h поставляемый
Просмотреть файл

@ -1,238 +0,0 @@
#pragma once
#include "../ds/bits.h"
#include "../mem/allocconfig.h"
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# define NOMINMAX
# include <windows.h>
// VirtualAlloc2 is exposed in RS5 headers.
# ifdef NTDDI_WIN10_RS5
# if (NTDDI_VERSION >= NTDDI_WIN10_RS5) && \
(WINVER >= _WIN32_WINNT_WIN10) && !defined(USE_SYSTEMATIC_TESTING)
# define PLATFORM_HAS_VIRTUALALLOC2
# endif
# endif
namespace snmalloc
{
class PALWindows
{
/**
* The number of times that the memory pressure notification has fired.
*/
static std::atomic<uint64_t> pressure_epoch;
/**
* A flag indicating that we have tried to register for low-memory
* notifications.
*/
static std::atomic<bool> registered_for_notifications;
static HANDLE lowMemoryObject;
/**
* Callback, used when the system delivers a low-memory notification. This
* simply increments an atomic counter each time the notification is raised.
*/
static void CALLBACK low_memory(_In_ PVOID, _In_ BOOLEAN)
{
pressure_epoch++;
}
public:
PALWindows()
{
// No error handling here - if this doesn't work, then we will just
// consume more memory. There's nothing sensible that we could do in
// error handling. We also leak both the low memory notification object
// handle and the wait object handle. We'll need them until the program
// exits, so there's little point doing anything else.
//
// We only try to register once. If this fails, give up. Even if we
// create multiple PAL objects, we don't want to get more than one
// callback.
if (!registered_for_notifications.exchange(true))
{
lowMemoryObject =
CreateMemoryResourceNotification(LowMemoryResourceNotification);
HANDLE waitObject;
RegisterWaitForSingleObject(
&waitObject,
lowMemoryObject,
low_memory,
nullptr,
INFINITE,
WT_EXECUTEDEFAULT);
}
}
/**
* Bitmap of PalFeatures flags indicating the optional features that this
* PAL supports. This PAL supports low-memory notifications.
*/
static constexpr uint64_t pal_features = LowMemoryNotification
# if defined(PLATFORM_HAS_VIRTUALALLOC2) || defined(USE_SYSTEMATIC_TESTING)
| AlignedAllocation
# endif
;
/**
* Counter values for the number of times that a low-pressure notification
* has been delivered. Callers should compare this with a previous value
* to see if the low memory state has been triggered since they last
* checked.
*/
uint64_t low_memory_epoch()
{
return pressure_epoch.load(std::memory_order_acquire);
}
/**
* Check whether the low memory state is still in effect. This is an
* expensive operation and should not be on any fast paths.
*/
bool expensive_low_memory_check()
{
BOOL result;
QueryMemoryResourceNotification(lowMemoryObject, &result);
return result;
}
static void error(const char* const str)
{
puts(str);
fflush(stdout);
abort();
}
/// Notify platform that we will not be using these pages
void notify_not_using(void* p, size_t size) noexcept
{
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, size));
BOOL ok = VirtualFree(p, size, MEM_DECOMMIT);
if (!ok)
error("VirtualFree failed");
}
/// Notify platform that we will be using these pages
template<ZeroMem zero_mem>
void notify_using(void* p, size_t size) noexcept
{
assert(
bits::is_aligned_block<OS_PAGE_SIZE>(p, size) || (zero_mem == NoZero));
void* r = VirtualAlloc(p, size, MEM_COMMIT, PAGE_READWRITE);
if (r == nullptr)
error("out of memory");
}
/// OS specific function for zeroing memory
template<bool page_aligned = false>
void zero(void* p, size_t size) noexcept
{
if (page_aligned || bits::is_aligned_block<OS_PAGE_SIZE>(p, size))
{
assert(bits::is_aligned_block<OS_PAGE_SIZE>(p, size));
notify_not_using(p, size);
notify_using<YesZero>(p, size);
}
else
::memset(p, 0, size);
}
# ifdef USE_SYSTEMATIC_TESTING
size_t& systematic_bump_ptr()
{
static size_t bump_ptr = (size_t)0x4000'0000'0000;
return bump_ptr;
}
template<bool committed>
void* reserve(size_t* size, size_t align) noexcept
{
DWORD flags = MEM_RESERVE;
if (committed)
flags |= MEM_COMMIT;
size_t retries = 1000;
void* p;
size_t request = *size;
do
{
p = VirtualAlloc(
(void*)systematic_bump_ptr(), request, flags, PAGE_READWRITE);
systematic_bump_ptr() += request;
retries--;
} while (p == nullptr && retries > 0);
uintptr_t aligned_p = bits::align_up((size_t)p, align);
if (aligned_p != (uintptr_t)p)
{
auto extra_bit = aligned_p - (uintptr_t)p;
uintptr_t end = (uintptr_t)p + request;
// Attempt to align end of the block.
VirtualAlloc((void*)end, extra_bit, flags, PAGE_READWRITE);
}
*size = request;
return p;
}
# elif defined(PLATFORM_HAS_VIRTUALALLOC2)
template<bool committed>
void* reserve(size_t* size, size_t align) noexcept
{
DWORD flags = MEM_RESERVE;
if (committed)
flags |= MEM_COMMIT;
// Windows doesn't let you request memory less than 64KB aligned. Most
// operating systems will simply give you something more aligned than you
// ask for, but Windows complains about invalid parameters.
const size_t min_align = 64 * 1024;
if (align < min_align)
align = min_align;
// If we're on Windows 10 or newer, we can use the VirtualAlloc2
// function. The FromApp variant is useable by UWP applications and
// cannot allocate executable memory.
MEM_ADDRESS_REQUIREMENTS addressReqs = {0};
MEM_EXTENDED_PARAMETER param = {0};
addressReqs.Alignment = align;
param.Type = MemExtendedParameterAddressRequirements;
param.Pointer = &addressReqs;
void* ret = VirtualAlloc2FromApp(
nullptr, nullptr, *size, flags, PAGE_READWRITE, &param, 1);
if (ret == nullptr)
{
error("Failed to allocate memory\n");
}
return ret;
}
# else
template<bool committed>
void* reserve(size_t* size) noexcept
{
DWORD flags = MEM_RESERVE;
if (committed)
flags |= MEM_COMMIT;
void* ret = VirtualAlloc(nullptr, *size, flags, PAGE_READWRITE);
if (ret == nullptr)
{
error("Failed to allocate memory\n");
}
return ret;
}
# endif
};
HEADER_GLOBAL std::atomic<uint64_t> PALWindows::pressure_epoch;
HEADER_GLOBAL std::atomic<bool> PALWindows::registered_for_notifications;
HEADER_GLOBAL HANDLE PALWindows::lowMemoryObject;
}
#endif

3
3rdparty/snmalloc/src/snmalloc.h поставляемый
Просмотреть файл

@ -1,3 +0,0 @@
#pragma once
#include "mem/threadalloc.h"

Просмотреть файл

@ -1,52 +0,0 @@
#define OPEN_ENCLAVE
#define OPEN_ENCLAVE_SIMULATION
#define USE_RESERVE_MULTIPLE 1
#include <iostream>
#include <snmalloc.h>
void* oe_base;
void* oe_end;
extern "C" const void* __oe_get_heap_base()
{
return oe_base;
}
extern "C" const void* __oe_get_heap_end()
{
return oe_end;
}
extern "C" void* oe_memset(void* p, int c, size_t size)
{
return memset(p, c, size);
}
extern "C" void oe_abort()
{
abort();
}
using namespace snmalloc;
int main()
{
MemoryProviderStateMixin<DefaultPal> mp;
size_t size = 1ULL << 28;
oe_base = mp.reserve<true>(&size, 0);
oe_end = (uint8_t*)oe_base + size;
std::cout << "Allocated region " << oe_base << " - " << oe_end << std::endl;
auto a = ThreadAlloc::get();
for (size_t i = 0; i < 1000; i++)
{
auto r1 = a->alloc(100);
std::cout << "Allocated object " << r1 << std::endl;
if (oe_base > r1)
abort();
if (oe_end < r1)
abort();
}
}

Просмотреть файл

@ -1,119 +0,0 @@
#include <stdio.h>
#define SNMALLOC_NAME_MANGLE(a) our_##a
#include "../../../override/malloc.cc"
using namespace snmalloc;
void check_result(size_t size, size_t align, void* p, int err, bool null)
{
if (errno != err)
abort();
if (null)
{
if (p != nullptr)
abort();
}
else
{
if (our_malloc_usable_size(p) < size)
abort();
if (((uintptr_t)p % align) != 0)
abort();
our_free(p);
}
}
void test_calloc(size_t nmemb, size_t size, int err, bool null)
{
fprintf(stderr, "calloc(%d, %d)\n", (int)nmemb, (int)size);
errno = 0;
void* p = our_calloc(nmemb, size);
if ((p != nullptr) && (errno == 0))
{
for (size_t i = 0; i < (size * nmemb); i++)
{
if (((uint8_t*)p)[i] != 0)
abort();
}
}
check_result(nmemb * size, 1, p, err, null);
}
void test_realloc(void* p, size_t size, int err, bool null)
{
fprintf(stderr, "realloc(%p(%d), %d)\n", p, int(size), (int)size);
errno = 0;
p = our_realloc(p, size);
check_result(size, 1, p, err, null);
}
void test_posix_memalign(size_t size, size_t align, int err, bool null)
{
fprintf(stderr, "posix_memalign(&p, %d, %d)\n", (int)align, (int)size);
void* p = nullptr;
errno = our_posix_memalign(&p, align, size);
check_result(size, align, p, err, null);
}
void test_memalign(size_t size, size_t align, int err, bool null)
{
fprintf(stderr, "memalign(%d, %d)\n", (int)align, (int)size);
errno = 0;
void* p = our_memalign(align, size);
check_result(size, align, p, err, null);
}
int main(int argc, char** argv)
{
UNUSED(argc);
UNUSED(argv);
constexpr int SUCCESS = 0;
test_calloc(0, 0, SUCCESS, false);
for (uint8_t sc = 0; sc < NUM_SIZECLASSES; sc++)
{
const size_t size = sizeclass_to_size(sc);
bool overflow = false;
for (size_t n = 1; bits::umul(size, n, overflow) <= SUPERSLAB_SIZE; n *= 5)
{
if (overflow)
break;
test_calloc(n, size, SUCCESS, false);
test_calloc(n, 0, SUCCESS, false);
}
test_calloc(0, size, SUCCESS, false);
test_realloc(our_malloc(size), size, SUCCESS, false);
test_realloc(our_malloc(size), 0, SUCCESS, true);
test_realloc(nullptr, size, SUCCESS, false);
test_realloc(our_malloc(size), (size_t)-1, ENOMEM, true);
}
test_posix_memalign(0, 0, EINVAL, true);
test_posix_memalign((size_t)-1, 0, EINVAL, true);
for (size_t align = sizeof(size_t); align <= SUPERSLAB_SIZE; align <<= 1)
{
for (uint8_t sc = 0; sc < NUM_SIZECLASSES; sc++)
{
const size_t size = sizeclass_to_size(sc);
test_posix_memalign(size, align, SUCCESS, false);
test_posix_memalign(size, 0, EINVAL, true);
test_memalign(size, align, SUCCESS, false);
}
test_posix_memalign(0, align, SUCCESS, false);
test_posix_memalign((size_t)-1, align, ENOMEM, true);
test_posix_memalign(0, align + 1, EINVAL, true);
}
return 0;
}

Просмотреть файл

@ -1,313 +0,0 @@
#include <snmalloc.h>
#include <test/opt.h>
#include <test/xoroshiro.h>
#include <unordered_set>
using namespace snmalloc;
void test_alloc_dealloc_64k()
{
auto* alloc = ThreadAlloc::get();
constexpr size_t count = 1 << 12;
constexpr size_t outer_count = 12;
void* garbage[count];
void* keep_alive[outer_count];
for (size_t j = 0; j < outer_count; j++)
{
// Allocate 64k of 16byte allocs
// This will fill the short slab, and then start a new slab.
for (size_t i = 0; i < count; i++)
{
garbage[i] = alloc->alloc(16);
}
// Allocate one object on the second slab
keep_alive[j] = alloc->alloc(16);
for (size_t i = 0; i < count; i++)
{
alloc->dealloc(garbage[i]);
}
}
for (size_t j = 0; j < outer_count; j++)
{
alloc->dealloc(keep_alive[j]);
}
}
void test_random_allocation()
{
auto* alloc = ThreadAlloc::get();
std::unordered_set<void*> allocated;
constexpr size_t count = 10000;
constexpr size_t outer_count = 10;
void* objects[count];
for (size_t i = 0; i < count; i++)
objects[i] = nullptr;
// Randomly allocate and deallocate objects
xoroshiro::p128r32 r;
size_t alloc_count = 0;
for (size_t j = 0; j < outer_count; j++)
{
auto just_dealloc = r.next() % 2 == 1;
auto duration = r.next() % count;
for (size_t i = 0; i < duration; i++)
{
auto index = r.next();
auto& cell = objects[index % count];
if (cell != nullptr)
{
alloc->dealloc(cell);
allocated.erase(cell);
cell = nullptr;
alloc_count--;
}
if (!just_dealloc)
{
cell = alloc->alloc(16);
auto pair = allocated.insert(cell);
// Check not already allocated
assert(pair.second);
UNUSED(pair);
alloc_count++;
}
else
{
if (alloc_count == 0 && just_dealloc)
break;
}
}
}
// Deallocate all the remaining objects
for (size_t i = 0; i < count; i++)
if (objects[i] != nullptr)
alloc->dealloc(objects[i]);
}
void test_calloc()
{
auto* alloc = ThreadAlloc::get();
for (size_t size = 16; size <= (1 << 24); size <<= 1)
{
void* p = alloc->alloc(size);
memset(p, 0xFF, size);
alloc->dealloc(p, size);
p = alloc->alloc<YesZero>(size);
for (size_t i = 0; i < size; i++)
{
if (((char*)p)[i] != 0)
abort();
}
alloc->dealloc(p, size);
}
current_alloc_pool()->debug_check_empty();
}
void test_double_alloc()
{
auto* a1 = current_alloc_pool()->acquire();
auto* a2 = current_alloc_pool()->acquire();
const size_t n = (1 << 16) / 32;
for (size_t k = 0; k < 4; k++)
{
std::unordered_set<void*> set1;
std::unordered_set<void*> set2;
for (size_t i = 0; i < (n * 2); i++)
{
void* p = a1->alloc(20);
assert(set1.find(p) == set1.end());
set1.insert(p);
}
for (size_t i = 0; i < (n * 2); i++)
{
void* p = a2->alloc(20);
assert(set2.find(p) == set2.end());
set2.insert(p);
}
while (!set1.empty())
{
auto it = set1.begin();
a2->dealloc(*it, 20);
set1.erase(it);
}
while (!set2.empty())
{
auto it = set2.begin();
a1->dealloc(*it, 20);
set2.erase(it);
}
}
current_alloc_pool()->release(a1);
current_alloc_pool()->release(a2);
current_alloc_pool()->debug_check_empty();
}
void test_external_pointer()
{
// Malloc does not have an external pointer querying mechanism.
auto* alloc = ThreadAlloc::get();
for (uint8_t sc = 0; sc < NUM_SIZECLASSES; sc++)
{
size_t size = sizeclass_to_size(sc);
void* p1 = alloc->alloc(size);
for (size_t offset = 0; offset < size; offset += 17)
{
void* p2 = (void*)((size_t)p1 + offset);
void* p3 = Alloc::external_pointer(p2);
void* p4 = Alloc::external_pointer<End>(p2);
UNUSED(p3);
UNUSED(p4);
assert(p1 == p3);
assert((size_t)p4 == (size_t)p1 + size - 1);
}
alloc->dealloc(p1, size);
}
current_alloc_pool()->debug_check_empty();
};
void check_offset(void* base, void* interior)
{
void* calced_base = Alloc::external_pointer((void*)interior);
if (calced_base != (void*)base)
abort();
}
void check_external_pointer_large(size_t* base)
{
size_t size = *base;
char* curr = (char*)base;
for (size_t offset = 0; offset < size; offset += 1 << 24)
{
check_offset(base, (void*)(curr + offset));
check_offset(base, (void*)(curr + offset + (1 << 24) - 1));
}
}
void test_external_pointer_large()
{
xoroshiro::p128r64 r;
auto* alloc = ThreadAlloc::get();
constexpr size_t count_log = snmalloc::bits::is64() ? 5 : 3;
constexpr size_t count = 1 << count_log;
// Pre allocate all the objects
size_t* objects[count];
for (size_t i = 0; i < count; i++)
{
size_t b = snmalloc::bits::is64() ? 28 : 26;
size_t rand = r.next() & ((1 << b) - 1);
size_t size = (1 << 24) + rand;
// store object
objects[i] = (size_t*)alloc->alloc(size);
// Store allocators size for this object
*objects[i] = Alloc::alloc_size(objects[i]);
check_external_pointer_large(objects[i]);
if (i > 0)
check_external_pointer_large(objects[i - 1]);
}
for (size_t i = 0; i < count; i++)
{
check_external_pointer_large(objects[i]);
}
// Deallocate everything
for (size_t i = 0; i < count; i++)
{
alloc->dealloc(objects[i]);
}
}
void test_external_pointer_dealloc_bug()
{
auto* alloc = ThreadAlloc::get();
constexpr size_t count = (SUPERSLAB_SIZE / SLAB_SIZE) * 2;
void* allocs[count];
for (size_t i = 0; i < count; i++)
{
allocs[i] = alloc->alloc(SLAB_SIZE / 2);
}
for (size_t i = 1; i < count; i++)
{
alloc->dealloc(allocs[i]);
}
for (size_t i = 0; i < count; i++)
{
Alloc::external_pointer(allocs[i]);
}
alloc->dealloc(allocs[0]);
}
void test_alloc_16M()
{
auto* alloc = ThreadAlloc::get();
// sizes >= 16M use large_alloc
const size_t size = 16'000'000;
void* p1 = alloc->alloc(size);
assert(Alloc::alloc_size(Alloc::external_pointer(p1)) >= size);
alloc->dealloc(p1);
}
void test_calloc_16M()
{
auto* alloc = ThreadAlloc::get();
// sizes >= 16M use large_alloc
const size_t size = 16'000'000;
void* p1 = alloc->alloc<YesZero>(size);
assert(Alloc::alloc_size(Alloc::external_pointer(p1)) >= size);
alloc->dealloc(p1);
}
int main(int argc, char** argv)
{
#ifdef USE_SYSTEMATIC_TESTING
opt::Opt opt(argc, argv);
size_t seed = opt.is<size_t>("--seed", 0);
Virtual::systematic_bump_ptr() += seed << 17;
#else
UNUSED(argc);
UNUSED(argv);
#endif
test_external_pointer_dealloc_bug();
test_external_pointer_large();
test_alloc_dealloc_64k();
test_random_allocation();
test_calloc();
test_double_alloc();
test_external_pointer();
test_alloc_16M();
test_calloc_16M();
return 0;
}

Просмотреть файл

@ -1,33 +0,0 @@
#include <snmalloc.h>
using namespace snmalloc;
// Check for all sizeclass that we correctly round every offset within
// a superslab to the correct value, by comparing with the standard
// unoptimised version using division.
// Also check we correctly determine multiples using optimized check.
int main(int argc, char** argv)
{
UNUSED(argc);
UNUSED(argv);
for (size_t size_class = 0; size_class < NUM_SIZECLASSES; size_class++)
{
size_t rsize = sizeclass_to_size((uint8_t)size_class);
for (size_t offset = 0; offset < SUPERSLAB_SIZE; offset++)
{
size_t rounded = (offset / rsize) * rsize;
bool mod_0 = (offset % rsize) == 0;
size_t opt_rounded = round_by_sizeclass(rsize, offset);
if (rounded != opt_rounded)
abort();
bool opt_mod_0 = is_multiple_of_sizeclass(rsize, offset);
if (opt_mod_0 != mod_0)
abort();
}
}
return 0;
}

Просмотреть файл

@ -1,53 +0,0 @@
#include <iostream>
#include <snmalloc.h>
NOINLINE
uint8_t size_to_sizeclass(size_t size)
{
return snmalloc::size_to_sizeclass(size);
}
int main(int, char**)
{
bool failed = false;
size_t size_low = 0;
std::cout << "0 has sizeclass: " << (size_t)snmalloc::size_to_sizeclass(0)
<< std::endl;
std::cout << "sizeclass |-> [size_low, size_high] " << std::endl;
for (uint8_t sz = 0; sz < snmalloc::NUM_SIZECLASSES; sz++)
{
// Separate printing for small and medium sizeclasses
if (sz == snmalloc::NUM_SMALL_CLASSES)
std::cout << std::endl;
size_t size = snmalloc::sizeclass_to_size(sz);
std::cout << (size_t)sz << " |-> "
<< "[" << size_low + 1 << ", " << size << "]" << std::endl;
if (size < size_low)
{
std::cout << "Sizeclass " << (size_t)sz << " is " << size
<< " which is less than " << size_low << std::endl;
failed = true;
}
for (size_t i = size_low + 1; i <= size; i++)
{
if (size_to_sizeclass(i) != sz)
{
std::cout << "Size " << i << " has sizeclass "
<< (size_t)size_to_sizeclass(i) << " but expected sizeclass "
<< (size_t)sz << std::endl;
failed = true;
}
}
size_low = size;
}
if (failed)
abort();
}

Просмотреть файл

@ -1,11 +0,0 @@
#undef IS_ADDRESS_SPACE_CONSTRAINED
#define OPEN_ENCLAVE
#define OPEN_ENCLAVE_SIMULATION
#define USE_RESERVE_MULTIPLE 1
#define NO_BOOTSTRAP_ALLOCATOR
#define IS_ADDRESS_SPACE_CONSTRAINED
#define SNMALLOC_EXPOSE_PAGEMAP
#define SNMALLOC_NAME_MANGLE(a) enclave_##a
// Redefine the namespace, so we can have two versions.
#define snmalloc snmalloc_enclave
#include "../../../override/malloc.cc"

Просмотреть файл

@ -1,7 +0,0 @@
#undef IS_ADDRESS_SPACE_CONSTRAINED
#define SNMALLOC_NAME_MANGLE(a) host_##a
#define NO_BOOTSTRAP_ALLOCATOR
#define SNMALLOC_EXPOSE_PAGEMAP
// Redefine the namespace, so we can have two versions.
#define snmalloc snmalloc_host
#include "../../../override/malloc.cc"

Просмотреть файл

@ -1,63 +0,0 @@
#include "../../../snmalloc.h"
#include <iostream>
#include <stdlib.h>
#include <string.h>
void* oe_base;
void* oe_end;
extern "C" const void* __oe_get_heap_base()
{
return oe_base;
}
extern "C" const void* __oe_get_heap_end()
{
return oe_end;
}
extern "C" void* oe_memset(void* p, int c, size_t size)
{
return memset(p, c, size);
}
extern "C" void oe_abort()
{
abort();
}
extern "C" void* host_malloc(size_t);
extern "C" void host_free(void*);
extern "C" void* enclave_malloc(size_t);
extern "C" void enclave_free(void*);
extern "C" void*
enclave_snmalloc_pagemap_global_get(snmalloc::PagemapConfig const**);
extern "C" void*
host_snmalloc_pagemap_global_get(snmalloc::PagemapConfig const**);
using namespace snmalloc;
int main()
{
MemoryProviderStateMixin<DefaultPal> mp;
size_t size = 1ULL << 28;
oe_base = mp.reserve<true>(&size, 1);
oe_end = (uint8_t*)oe_base + size;
std::cout << "Allocated region " << oe_base << " - " << oe_end << std::endl;
// Call these functions to trigger asserts if the cast-to-self doesn't work.
const PagemapConfig* c;
enclave_snmalloc_pagemap_global_get(&c);
host_snmalloc_pagemap_global_get(&c);
auto a = host_malloc(128);
auto b = enclave_malloc(128);
std::cout << "Host alloc " << a << std::endl;
std::cout << "Enclave alloc " << b << std::endl;
host_free(a);
enclave_free(b);
}

249
3rdparty/snmalloc/src/test/histogram.h поставляемый
Просмотреть файл

@ -1,249 +0,0 @@
#pragma once
#ifdef USE_MEASURE
# include "../ds/flaglock.h"
# include <algorithm>
# include <iomanip>
# include <iostream>
# define MEASURE_TIME_MARKERS(id, minbits, maxbits, markers) \
static constexpr const char* const id##_time_markers[] = markers; \
static histogram::Global<histogram::Histogram<uint64_t, minbits, maxbits>> \
id##_time_global(#id, __FILE__, __LINE__, id##_time_markers); \
static thread_local histogram::Histogram<uint64_t, minbits, maxbits> \
id##_time_local(id##_time_global); \
histogram::MeasureTime<histogram::Histogram<uint64_t, minbits, maxbits>> \
id##_time(id##_time_local);
# define MEASURE_TIME(id, minbits, maxbits) \
MEASURE_TIME_MARKERS(id, minbits, maxbits, {nullptr})
# define MARKERS(...) \
{ \
__VA_ARGS__, nullptr \
}
namespace histogram
{
using namespace snmalloc;
template<class H>
class Global;
template<
class V,
size_t LOW_BITS,
size_t HIGH_BITS,
size_t INTERMEDIATE_BITS = LOW_BITS>
class Histogram
{
public:
using This = Histogram<V, LOW_BITS, HIGH_BITS, INTERMEDIATE_BITS>;
friend Global<This>;
static_assert(LOW_BITS < HIGH_BITS, "LOW_BITS must be less than HIGH_BITS");
static constexpr V LOW = (V)((size_t)1 << LOW_BITS);
static constexpr V HIGH = (V)((size_t)1 << HIGH_BITS);
static constexpr size_t BUCKETS =
((HIGH_BITS - LOW_BITS) << INTERMEDIATE_BITS) + 2;
private:
V high = (std::numeric_limits<V>::min)();
size_t overflow;
size_t count[BUCKETS];
Global<This>* global;
public:
Histogram() : global(nullptr) {}
Histogram(Global<This>& g) : global(&g) {}
~Histogram()
{
if (global != nullptr)
global->add(*this);
}
void record(V value)
{
if (value > high)
high = value;
if (value >= HIGH)
{
overflow++;
}
else
{
auto i = get_index(value);
assert(i < BUCKETS);
count[i]++;
}
}
V get_high()
{
return high;
}
size_t get_overflow()
{
return overflow;
}
size_t get_buckets()
{
return BUCKETS;
}
size_t get_count(size_t index)
{
if (index >= BUCKETS)
return 0;
return count[index];
}
static std::pair<V, V> get_range(size_t index)
{
if (index >= BUCKETS)
return std::make_pair(HIGH, HIGH);
if (index == 0)
return std::make_pair(0, get_value(index));
return std::make_pair(get_value(index - 1) + 1, get_value(index));
}
void add(This& that)
{
high = (std::max)(high, that.high);
overflow += that.overflow;
for (size_t i = 0; i < BUCKETS; i++)
count[i] += that.count[i];
}
void print(std::ostream& o)
{
o << "\tHigh: " << high << std::endl
<< "\tOverflow: " << overflow << std::endl;
size_t grand_total = overflow;
for (size_t i = 0; i < BUCKETS; i++)
grand_total += count[i];
size_t old_percentage = 0;
size_t cumulative_total = 0;
for (size_t i = 0; i < BUCKETS; i++)
{
auto r = get_range(i);
cumulative_total += count[i];
o << "\t" << std::setfill(' ') << std::setw(6) << std::get<0>(r) << ".."
<< std::setfill(' ') << std::setw(6) << std::get<1>(r) << ": "
<< std::setfill(' ') << std::setw(10) << count[i];
auto percentage = (cumulative_total * 100 / grand_total);
if (percentage != old_percentage)
{
old_percentage = percentage;
o << std::setfill(' ') << std::setw(20)
<< (cumulative_total * 100 / grand_total) << "%";
}
o << std::endl;
}
}
static size_t get_index(V value)
{
return bits::to_exp_mant<INTERMEDIATE_BITS, LOW_BITS - INTERMEDIATE_BITS>(
value);
}
static V get_value(size_t index)
{
return bits::
from_exp_mant<INTERMEDIATE_BITS, LOW_BITS - INTERMEDIATE_BITS>(index);
}
};
template<class H>
class Global
{
private:
const char* name;
const char* file;
size_t line;
const char* const* markers;
std::atomic_flag lock = ATOMIC_FLAG_INIT;
H aggregate;
public:
Global(
const char* name_,
const char* file_,
size_t line_,
const char* const* markers)
: name(name_), file(file_), line(line_), markers(markers)
{}
~Global()
{
print();
}
void add(H& histogram)
{
FlagLock f(lock);
aggregate.add(histogram);
}
private:
void print()
{
std::cout << name;
if (markers != nullptr)
{
std::cout << ": ";
size_t i = 0;
while (markers[i] != nullptr)
std::cout << markers[i++] << " ";
}
std::cout << std::endl << file << ":" << line << std::endl;
aggregate.print(std::cout);
}
};
template<class H>
class MeasureTime
{
private:
H& histogram;
uint64_t t;
public:
MeasureTime(H& histogram_) : histogram(histogram_)
{
t = bits::benchmark_time_start();
}
~MeasureTime()
{
histogram.record(bits::benchmark_time_end() - t);
}
};
}
#else
# define MEASURE_TIME(id, minbits, maxbits)
# define MEASURE_TIME_MARKERS(id, minbits, maxbits, markers)
#endif

14
3rdparty/snmalloc/src/test/measuretime.h поставляемый
Просмотреть файл

@ -1,14 +0,0 @@
#pragma once
#include <chrono>
#include <iomanip>
#include <iostream>
#define DO_TIME(name, code) \
{ \
auto start__ = std::chrono::high_resolution_clock::now(); \
code auto finish__ = std::chrono::high_resolution_clock::now(); \
auto diff__ = finish__ - start__; \
std::cout << name << ": " << std::setw(12) << diff__.count() << " ns" \
<< std::endl; \
}

91
3rdparty/snmalloc/src/test/opt.h поставляемый
Просмотреть файл

@ -1,91 +0,0 @@
#pragma once
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <type_traits>
namespace opt
{
class Opt
{
private:
int argc;
char** argv;
public:
Opt(int argc, char** argv) : argc(argc), argv(argv) {}
bool has(const char* opt)
{
for (int i = 1; i < argc; i++)
{
if (!strcmp(opt, argv[i]))
return true;
}
return false;
}
template<class T>
T is(const char* opt, T def)
{
size_t len = strlen(opt);
for (int i = 1; i < argc; i++)
{
const char* p = param(opt, len, i);
if (p != nullptr)
{
char* end = nullptr;
T r;
if (std::is_unsigned<T>::value)
r = (T)strtoull(p, &end, 10);
else
r = (T)strtoll(p, &end, 10);
if ((r == 0) && (end == p))
return def;
return r;
}
}
return def;
}
const char* is(const char* opt, const char* def)
{
size_t len = strlen(opt);
for (int i = 1; i < argc; i++)
{
const char* p = param(opt, len, i);
if (p != nullptr)
return p;
}
return def;
}
private:
const char* param(const char* opt, size_t len, int i)
{
if (strncmp(opt, argv[i], len))
return nullptr;
switch (argv[i][len])
{
case '\0':
return (i < (argc - 1)) ? argv[i + 1] : nullptr;
case '=':
return &argv[i][len + 1];
default:
return nullptr;
}
}
};
}

Просмотреть файл

@ -1,176 +0,0 @@
#include "test/measuretime.h"
#include "test/opt.h"
#include "test/usage.h"
#include "test/xoroshiro.h"
#include <iomanip>
#include <iostream>
#include <snmalloc.h>
#include <thread>
#include <vector>
using namespace snmalloc;
bool use_malloc = false;
template<void f(size_t id)>
class ParallelTest
{
private:
std::atomic<bool> flag = false;
std::atomic<size_t> ready = 0;
uint64_t start;
uint64_t end;
std::atomic<size_t> complete = 0;
size_t cores;
void run(size_t id)
{
auto prev = ready.fetch_add(1);
if (prev + 1 == cores)
{
start = bits::tick();
flag = true;
}
while (!flag)
bits::pause();
f(id);
prev = complete.fetch_add(1);
if (prev + 1 == cores)
{
end = bits::tick();
}
}
public:
ParallelTest(size_t cores) : cores(cores)
{
std::thread* t = new std::thread[cores];
for (size_t i = 0; i < cores; i++)
{
t[i] = std::thread(&ParallelTest::run, this, i);
}
// Wait for all the threads.
for (size_t i = 0; i < cores; i++)
{
t[i].join();
}
delete[] t;
}
uint64_t time()
{
return end - start;
}
};
std::atomic<size_t*>* contention;
size_t swapsize;
size_t swapcount;
void test_tasks_f(size_t id)
{
Alloc* a = ThreadAlloc::get();
xoroshiro::p128r32 r(id + 5000);
for (size_t n = 0; n < swapcount; n++)
{
size_t size = 16 + (r.next() % 1024);
size_t* res = (size_t*)(use_malloc ? malloc(size) : a->alloc(size));
*res = size;
size_t* out =
contention[n % swapsize].exchange(res, std::memory_order_relaxed);
if (out != nullptr)
{
size = *out;
if (use_malloc)
free(out);
else
a->dealloc(out, size);
}
}
};
void test_tasks(size_t num_tasks, size_t count, size_t size)
{
Alloc* a = ThreadAlloc::get();
contention = new std::atomic<size_t*>[size];
xoroshiro::p128r32 r;
for (size_t n = 0; n < size; n++)
{
size_t alloc_size = 16 + (r.next() % 1024);
size_t* res =
(size_t*)(use_malloc ? malloc(alloc_size) : a->alloc(alloc_size));
*res = alloc_size;
contention[n] = res;
}
swapcount = count;
swapsize = size;
#ifdef USE_SNMALLOC_STATS
Stats s0;
current_alloc_pool()->aggregate_stats(s0);
#endif
{
ParallelTest<test_tasks_f> test(num_tasks);
std::cout << "Task test, " << num_tasks << " threads, " << count
<< " swaps per thread " << test.time() << "ticks" << std::endl;
for (size_t n = 0; n < swapsize; n++)
{
if (contention[n] != nullptr)
{
if (use_malloc)
free(contention[n]);
else
a->dealloc(contention[n], *contention[n]);
}
}
delete[] contention;
}
#ifndef NDEBUG
current_alloc_pool()->debug_check_empty();
#endif
};
int main(int argc, char** argv)
{
opt::Opt opt(argc, argv);
size_t cores = opt.is<size_t>("--cores", 8);
size_t count = opt.is<size_t>("--swapcount", 1 << 20);
size_t size = opt.is<size_t>("--swapsize", 1 << 18);
use_malloc = opt.has("--use_malloc");
std::cout << "Allocator is " << (use_malloc ? "System" : "snmalloc")
<< std::endl;
for (size_t i = cores; i > 0; i >>= 1)
test_tasks(i, count, size);
if (opt.has("--stats"))
{
#ifdef USE_SNMALLOC_STATS
Stats s;
current_alloc_pool()->aggregate_stats(s);
s.print<Alloc>(std::cout);
#endif
usage::print_memory();
}
return 0;
}

Просмотреть файл

@ -1,81 +0,0 @@
#include <snmalloc.h>
#include <test/measuretime.h>
#include <test/xoroshiro.h>
#include <unordered_set>
using namespace snmalloc;
namespace test
{
static constexpr size_t count_log = 20;
static constexpr size_t count = 1 << count_log;
// Pre allocate all the objects
size_t* objects[count];
NOINLINE void setup(xoroshiro::p128r64& r, Alloc* alloc)
{
for (size_t i = 0; i < count; i++)
{
size_t rand = (size_t)r.next();
size_t offset = bits::clz(rand);
if (offset > 30)
offset = 30;
size_t size = (rand & 15) << offset;
if (size < 16)
size = 16;
// store object
objects[i] = (size_t*)alloc->alloc(size);
// Store allocators size for this object
*objects[i] = Alloc::alloc_size(objects[i]);
}
}
NOINLINE void teardown(Alloc* alloc)
{
// Deallocate everything
for (size_t i = 0; i < count; i++)
{
alloc->dealloc(objects[i]);
}
current_alloc_pool()->debug_check_empty();
}
void test_external_pointer(xoroshiro::p128r64& r)
{
auto* alloc = ThreadAlloc::get();
setup(r, alloc);
DO_TIME("External pointer queries ", {
for (size_t i = 0; i < 10000000; i++)
{
size_t rand = (size_t)r.next();
size_t oid = rand & (((size_t)1 << count_log) - 1);
size_t* external_ptr = objects[oid];
size_t size = *external_ptr;
size_t offset = (size >> 4) * (rand & 15);
size_t interior_ptr = ((size_t)external_ptr) + offset;
void* calced_external = Alloc::external_pointer((void*)interior_ptr);
if (calced_external != external_ptr)
abort();
}
});
teardown(alloc);
}
}
int main(int, char**)
{
xoroshiro::p128r64 r;
#if NDEBUG
size_t nn = 30;
#else
size_t nn = 3;
#endif
for (size_t n = 0; n < nn; n++)
test::test_external_pointer(r);
return 0;
}

Просмотреть файл

@ -1,83 +0,0 @@
#include <snmalloc.h>
#include <test/measuretime.h>
#include <unordered_set>
using namespace snmalloc;
template<ZeroMem zero_mem>
void test_alloc_dealloc(size_t count, size_t size, bool write)
{
auto* alloc = ThreadAlloc::get();
DO_TIME(
"Count: " << std::setw(6) << count << ", Size: " << std::setw(6) << size
<< ", ZeroMem: " << (zero_mem == YesZero) << ", Write: " << write,
{
std::unordered_set<void*> set;
// alloc 1.5x objects
for (size_t i = 0; i < ((count * 3) / 2); i++)
{
void* p = alloc->alloc<zero_mem>(size);
assert(set.find(p) == set.end());
if (write)
*(int*)p = 4;
set.insert(p);
}
// free 0.25x of the objects
for (size_t i = 0; i < (count / 4); i++)
{
auto it = set.begin();
void* p = *it;
alloc->dealloc(p, size);
set.erase(it);
assert(set.find(p) == set.end());
}
// alloc 1x objects
for (size_t i = 0; i < count; i++)
{
void* p = alloc->alloc<zero_mem>(size);
assert(set.find(p) == set.end());
if (write)
*(int*)p = 4;
set.insert(p);
}
// free everything
while (!set.empty())
{
auto it = set.begin();
alloc->dealloc(*it, size);
set.erase(it);
}
});
current_alloc_pool()->debug_check_empty();
}
int main(int, char**)
{
for (size_t size = 16; size <= 128; size <<= 1)
{
test_alloc_dealloc<NoZero>(1 << 15, size, false);
test_alloc_dealloc<NoZero>(1 << 15, size, true);
test_alloc_dealloc<YesZero>(1 << 15, size, false);
test_alloc_dealloc<YesZero>(1 << 15, size, true);
}
for (size_t size = 1 << 12; size <= 1 << 17; size <<= 1)
{
test_alloc_dealloc<NoZero>(1 << 10, size, false);
test_alloc_dealloc<NoZero>(1 << 10, size, true);
test_alloc_dealloc<YesZero>(1 << 10, size, false);
test_alloc_dealloc<YesZero>(1 << 10, size, true);
}
return 0;
}

42
3rdparty/snmalloc/src/test/usage.h поставляемый
Просмотреть файл

@ -1,42 +0,0 @@
#pragma once
#if defined(_WIN32)
# define WIN32_LEAN_AND_MEAN
# define NOMINMAX
# include <windows.h>
// Needs to be included after windows.h
# include <psapi.h>
#endif
#include <iomanip>
#include <iostream>
namespace usage
{
void print_memory()
{
#if defined(_WIN32)
PROCESS_MEMORY_COUNTERS_EX pmc;
if (!GetProcessMemoryInfo(
GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc)))
return;
std::cout << "Memory info:" << std::endl
<< "\tPageFaultCount: " << pmc.PageFaultCount << std::endl
<< "\tPeakWorkingSetSize: " << pmc.PeakWorkingSetSize << std::endl
<< "\tWorkingSetSize: " << pmc.WorkingSetSize << std::endl
<< "\tQuotaPeakPagedPoolUsage: " << pmc.QuotaPeakPagedPoolUsage
<< std::endl
<< "\tQuotaPagedPoolUsage: " << pmc.QuotaPagedPoolUsage
<< std::endl
<< "\tQuotaPeakNonPagedPoolUsage: "
<< pmc.QuotaPeakNonPagedPoolUsage << std::endl
<< "\tQuotaNonPagedPoolUsage: " << pmc.QuotaNonPagedPoolUsage
<< std::endl
<< "\tPagefileUsage: " << pmc.PagefileUsage << std::endl
<< "\tPeakPagefileUsage: " << pmc.PeakPagefileUsage << std::endl
<< "\tPrivateUsage: " << pmc.PrivateUsage << std::endl;
#endif
}
};

71
3rdparty/snmalloc/src/test/xoroshiro.h поставляемый
Просмотреть файл

@ -1,71 +0,0 @@
#pragma once
#include <cstdint>
#include <cstdlib>
namespace xoroshiro
{
namespace detail
{
template<typename STATE, typename RESULT, STATE A, STATE B, STATE C>
class XorOshiro
{
private:
static constexpr unsigned STATE_BITS = 8 * sizeof(STATE);
static constexpr unsigned RESULT_BITS = 8 * sizeof(RESULT);
static_assert(
STATE_BITS >= RESULT_BITS,
"STATE must have at least as many bits as RESULT");
STATE x;
STATE y;
static inline STATE rotl(STATE x, STATE k)
{
return (x << k) | (x >> (STATE_BITS - k));
}
public:
XorOshiro(STATE x_ = 5489, STATE y_ = 0) : x(x_), y(y_)
{
// If both zero, then this does not work
if (x_ == 0 && y_ == 0)
abort();
next();
}
void set_state(STATE x_, STATE y_ = 0)
{
// If both zero, then this does not work
if (x_ == 0 && y_ == 0)
abort();
x = x_;
y = y_;
next();
}
RESULT next()
{
STATE r = x + y;
y ^= x;
x = rotl(x, A) ^ y ^ (y << B);
y = rotl(y, C);
// If both zero, then this does not work
if (x == 0 && y == 0)
abort();
return r >> (STATE_BITS - RESULT_BITS);
}
};
}
using p128r64 = detail::XorOshiro<uint64_t, uint64_t, 55, 14, 36>;
using p128r32 = detail::XorOshiro<uint64_t, uint32_t, 55, 14, 36>;
using p64r32 = detail::XorOshiro<uint32_t, uint32_t, 27, 7, 20>;
using p64r16 = detail::XorOshiro<uint32_t, uint16_t, 27, 7, 20>;
using p32r16 = detail::XorOshiro<uint16_t, uint16_t, 13, 5, 10>;
using p32r8 = detail::XorOshiro<uint16_t, uint8_t, 13, 5, 10>;
using p16r8 = detail::XorOshiro<uint8_t, uint8_t, 4, 7, 3>;
}

Просмотреть файл

@ -90,15 +90,6 @@
}
}
},
{
"component": {
"type": "git",
"git": {
"repositoryUrl": "https://github.com/microsoft/snmalloc",
"commitHash": "9ca436c951d635bface96a0b9d73262ab4cb088e"
}
}
},
{
"Component": {
"Type": "other",

Просмотреть файл

@ -85,19 +85,14 @@ if("virtual" IN_LIST TARGET)
set_property(TARGET libbyz.host PROPERTY POSITION_INDEPENDENT_CODE ON)
target_include_directories(libbyz.host PRIVATE SYSTEM ${EVERCRYPT_INC})
set(SNMALLOC_ONLY_HEADER_LIBRARY ON)
add_subdirectory(${CMAKE_SOURCE_DIR}/3rdparty/snmalloc EXCLUDE_FROM_ALL)
add_library(libcommontest STATIC
${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/test/network_udp.cpp
${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/test/network_udp_mt.cpp
${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/test/ITimer.cpp
${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/test/Time.cpp
${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/test/Statistics.cpp
${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/test/snmalloc.cpp
)
target_compile_options(libcommontest PRIVATE -stdlib=libc++)
target_link_libraries(libcommontest PRIVATE snmalloc_lib)
target_include_directories(libcommontest PRIVATE
${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz

Просмотреть файл

@ -1,5 +0,0 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
#define NO_BOOTSTRAP_ALLOCATOR
#include "snmalloc/src/override/malloc.cc"