diff --git a/3rdparty/snmalloc/CMakeLists.txt b/3rdparty/snmalloc/CMakeLists.txt index 13887a6ae7..611d7302e8 100644 --- a/3rdparty/snmalloc/CMakeLists.txt +++ b/3rdparty/snmalloc/CMakeLists.txt @@ -1,7 +1,13 @@ cmake_minimum_required(VERSION 3.8) project(snmalloc C CXX) +if (NOT CMAKE_BUILD_TYPE) + message(STATUS "No build type selected, default to: Release") + set(CMAKE_BUILD_TYPE "Release") +endif() + include(CheckCXXCompilerFlag) +include(CheckCSourceCompiles) option(USE_SNMALLOC_STATS "Track allocation stats" OFF) option(SNMALLOC_CI_BUILD "Disable features not sensible for CI" OFF) @@ -9,8 +15,17 @@ option(USE_MEASURE "Measure performance with histograms" OFF) option(EXPOSE_EXTERNAL_PAGEMAP "Expose the global pagemap" OFF) option(EXPOSE_EXTERNAL_RESERVE "Expose an interface to reserve memory using the default memory provider" OFF) option(SNMALLOC_RUST_SUPPORT "Build static library for rust" OFF) +option(SNMALLOC_STATIC_LIBRARY "Build static libraries" ON) option(SNMALLOC_QEMU_WORKAROUND "Disable using madvise(DONT_NEED) to zero memory on Linux" Off) +option(SNMALLOC_OPTIMISE_FOR_CURRENT_MACHINE "Compile for current machine architecture" Off) set(CACHE_FRIENDLY_OFFSET OFF CACHE STRING "Base offset to place linked-list nodes.") +set(SNMALLOC_STATIC_LIBRARY_PREFIX "sn_" CACHE STRING "Static library function prefix") + +CHECK_C_SOURCE_COMPILES(" +#include +size_t malloc_usable_size(const void* ptr) { return 0; } +int main() { return 0; } +" CONST_QUALIFIED_MALLOC_USABLE_SIZE) if ((CMAKE_BUILD_TYPE STREQUAL "Release") AND (NOT SNMALLOC_CI_BUILD)) option(USE_POSIX_COMMIT_CHECKS "Instrument Posix PAL to check for access to unused blocks of memory." Off) @@ -36,6 +51,10 @@ macro(warnings_high) endif() endmacro() +macro(oe_simulate target) + target_compile_definitions(${target} PRIVATE SNMALLOC_USE_SMALL_CHUNKS) +endmacro() + macro(clangformat_targets) # The clang-format tool is installed under a variety of different names. Try # to find a sensible one. Only look for versions 9 explicitly - we don't @@ -43,8 +62,7 @@ macro(clangformat_targets) # tool. It does not work with older versions as AfterCaseLabel is not supported # in earlier versions. find_program(CLANG_FORMAT NAMES - clang-format-9 - clang-format) + clang-format-9) # If we've found a clang-format tool, generate a target for it, otherwise emit # a warning. @@ -135,6 +153,10 @@ if(USE_POSIX_COMMIT_CHECKS) target_compile_definitions(snmalloc_lib INTERFACE -DUSE_POSIX_COMMIT_CHECKS) endif() +if(CONST_QUALIFIED_MALLOC_USABLE_SIZE) + target_compile_definitions(snmalloc_lib INTERFACE -DMALLOC_USABLE_SIZE_QUALIFIER=const) +endif() + # To build with just the header library target define SNMALLOC_ONLY_HEADER_LIBRARY # in containing Cmake file. @@ -146,20 +168,32 @@ if(NOT DEFINED SNMALLOC_ONLY_HEADER_LIBRARY) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi") set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG") else() - add_compile_options(-fno-exceptions -fno-rtti -g -ftls-model=initial-exec -fomit-frame-pointer) + add_compile_options(-fno-exceptions -fno-rtti -g -fomit-frame-pointer) + # Static TLS model unsupported on Haiku + if (NOT CMAKE_SYSTEM_NAME MATCHES "Haiku") + add_compile_options(-ftls-model=initial-exec) + endif() if(SNMALLOC_CI_BUILD OR (${CMAKE_BUILD_TYPE} MATCHES "Debug")) # Get better stack traces in CI and Debug. target_link_libraries(snmalloc_lib INTERFACE "-rdynamic") endif() - if((${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64") OR - (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86") OR - (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm")) + if(SNMALLOC_OPTIMISE_FOR_CURRENT_MACHINE) check_cxx_compiler_flag(-march=native SUPPORT_MARCH_NATIVE) if (SUPPORT_MARCH_NATIVE) add_compile_options(-march=native) + else() + message(WARNING "Compiler does not support `-march=native` required by SNMALLOC_OPTIMISE_FOR_CURRENT_MACHINE") endif() endif() + + find_package(Backtrace) + if(${Backtrace_FOUND}) + target_compile_definitions(snmalloc_lib INTERFACE -DBACKTRACE_HEADER="${Backtrace_HEADER}") + target_link_libraries(snmalloc_lib INTERFACE ${Backtrace_LIBRARIES}) + target_include_directories(snmalloc_lib INTERFACE ${Backtrace_INCLUD_DIRS}) + endif() + endif() macro(subdirlist result curdir) @@ -204,17 +238,34 @@ if(NOT DEFINED SNMALLOC_ONLY_HEADER_LIBRARY) endmacro() + if (SNMALLOC_STATIC_LIBRARY) + add_shim(snmallocshim-static STATIC src/override/malloc.cc) + add_shim(snmallocshim-1mib-static STATIC src/override/malloc.cc) + add_shim(snmallocshim-16mib-static STATIC src/override/malloc.cc) + target_compile_definitions(snmallocshim-16mib-static PRIVATE SNMALLOC_USE_LARGE_CHUNKS + SNMALLOC_STATIC_LIBRARY_PREFIX=${SNMALLOC_STATIC_LIBRARY_PREFIX}) + target_compile_definitions(snmallocshim-static PRIVATE + SNMALLOC_STATIC_LIBRARY_PREFIX=${SNMALLOC_STATIC_LIBRARY_PREFIX}) + target_compile_definitions(snmallocshim-1mib-static PRIVATE + SNMALLOC_STATIC_LIBRARY_PREFIX=${SNMALLOC_STATIC_LIBRARY_PREFIX}) + endif () + if(NOT WIN32) set(SHARED_FILES src/override/new.cc src/override/malloc.cc) add_shim(snmallocshim SHARED ${SHARED_FILES}) add_shim(snmallocshim-1mib SHARED ${SHARED_FILES}) - target_compile_definitions(snmallocshim-1mib PRIVATE IS_ADDRESS_SPACE_CONSTRAINED) + add_shim(snmallocshim-16mib SHARED ${SHARED_FILES}) + target_compile_definitions(snmallocshim-16mib PRIVATE SNMALOC_USE_LARGE_CHUNKS) + # Build a shim with some settings from oe. + add_shim(snmallocshim-oe SHARED ${SHARED_FILES}) + oe_simulate(snmallocshim-oe) endif() if(SNMALLOC_RUST_SUPPORT) add_shim(snmallocshim-rust STATIC src/override/rust.cc) add_shim(snmallocshim-1mib-rust STATIC src/override/rust.cc) - target_compile_definitions(snmallocshim-1mib-rust PRIVATE IS_ADDRESS_SPACE_CONSTRAINED) + add_shim(snmallocshim-16mib-rust STATIC src/override/rust.cc) + target_compile_definitions(snmallocshim-16mib-rust PRIVATE SNMALLOC_USE_LARGE_CHUNKS) endif() enable_testing() @@ -225,14 +276,17 @@ if(NOT DEFINED SNMALLOC_ONLY_HEADER_LIBRARY) foreach(TEST_CATEGORY ${TEST_CATEGORIES}) subdirlist(TESTS ${TESTDIR}/${TEST_CATEGORY}) foreach(TEST ${TESTS}) - foreach(SUPER_SLAB_SIZE 1;16) + foreach(SUPER_SLAB_SIZE 1;16;oe) unset(SRC) aux_source_directory(${TESTDIR}/${TEST_CATEGORY}/${TEST} SRC) set(TESTNAME "${TEST_CATEGORY}-${TEST}-${SUPER_SLAB_SIZE}") add_executable(${TESTNAME} ${SRC}) - if (${SUPER_SLAB_SIZE} EQUAL 1) - target_compile_definitions(${TESTNAME} PRIVATE IS_ADDRESS_SPACE_CONSTRAINED) + if (${SUPER_SLAB_SIZE} EQUAL 16) + target_compile_definitions(${TESTNAME} PRIVATE SNMALLOC_USE_LARGE_CHUNKS) + endif() + if (${SUPER_SLAB_SIZE} EQUAL oe) + oe_simulate(${TESTNAME}) endif() target_link_libraries(${TESTNAME} snmalloc_lib) if (${TEST} MATCHES "release-.*") diff --git a/3rdparty/snmalloc/README.md b/3rdparty/snmalloc/README.md index 2b126c9e11..5f69911f2f 100644 --- a/3rdparty/snmalloc/README.md +++ b/3rdparty/snmalloc/README.md @@ -105,6 +105,16 @@ your toolchain: LD_PRELOAD=/usr/local/lib/libsnmallocshim.so ninja ``` +## Cross Compile for Android +Android support is out-of-the-box. + +To cross-compile the library for arm android, you can simply invoke CMake with the toolchain file and the andorid api settings (for more infomation, check this [document](https://developer.android.com/ndk/guides/cmake)). + +For example, you can cross-compile for `arm64-v8a` with the following command: +``` +cmake /path/to/snmalloc -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a +``` + # CMake Feature Flags These can be added to your cmake command line. @@ -150,7 +160,7 @@ your system. The PAL must implement the following methods: ```c++ -void error(const char* const str) noexcept; +[[noreturn]] void error(const char* const str) noexcept; ``` Report a fatal error and exit. @@ -184,16 +194,24 @@ pages, rather than zeroing them synchronously in this call ```c++ template -void* reserve(size_t size, size_t align); -template -void* reserve(size_t size) noexcept; +void* reserve_aligned(size_t size) noexcept; +std::pair reserve_at_least(size_t size) noexcept; ``` Only one of these needs to be implemented, depending on whether the underlying system can provide strongly aligned memory regions. -If the system guarantees only page alignment, implement the second and snmalloc -will over-allocate and then trim the requested region. +If the system guarantees only page alignment, implement the second. The Pal is +free to overallocate based on the platforms desire and snmalloc +will find suitably aligned blocks inside the region. `reserve_at_least` should +not commit memory as snmalloc will commit the range of memory it requires of what +is returned. + If the system provides strong alignment, implement the first to return memory -at the desired alignment. +at the desired alignment. If providing the first, then the `Pal` should also +specify the minimum size block it can provide: +``` +static constexpr size_t minimum_alloc_size = ...; +``` + Finally, you need to define a field to indicate the features that your PAL supports: ```c++ @@ -225,6 +243,16 @@ The [Windows](src/pal/pal_windows.h), and [FreeBSD kernel](src/pal/pal_freebsd_kernel.h) implementations give examples of non-POSIX environments that snmalloc supports. +The POSIX PAL uses `mmap` to map memory. +Some POSIX or POSIX-like systems require minor tweaks to this behaviour. +Rather than requiring these to copy and paste the code, a PAL that inherits from the POSIX PAL can define one or both of these (`static constexpr`) fields to customise the `mmap` behaviour. + + - `default_mmap_flags` allows a PAL to provide additional `MAP_*` + flags to all `mmap` calls. + - `anonymous_memory_fd` allows the PAL to override the default file + descriptor used for memory mappings. + + # Contributing This project welcomes contributions and suggestions. Most contributions require you to agree to a diff --git a/3rdparty/snmalloc/azure-pipelines.yml b/3rdparty/snmalloc/azure-pipelines.yml index f65fe87b1f..03bdfd354d 100644 --- a/3rdparty/snmalloc/azure-pipelines.yml +++ b/3rdparty/snmalloc/azure-pipelines.yml @@ -9,97 +9,123 @@ jobs: displayName: Linux pool: vmImage: 'ubuntu-18.04' - container: snmallocciteam/build_linux_x64:latest strategy: matrix: - Clang-7 Debug: + 64-bit Clang-7 Debug: CC: clang-7 CXX: clang++-7 BuildType: Debug SelfHost: false CMakeArgs: '' + Image: snmallocciteam/build_linux_x64:latest - Clang-7 Release: + 64-bit Clang-7 Release: CC: clang-7 CXX: clang++-7 BuildType: Release SelfHost: false CMakeArgs: '' + Image: snmallocciteam/build_linux_x64:latest - Clang-8 Debug: + 64-bit Clang-8 Debug: CC: clang-8 CXX: clang++-8 BuildType: Debug SelfHost: false CMakeArgs: '' + Image: snmallocciteam/build_linux_x64:latest - Clang-8 Release: + 64-bit Clang-8 Release: CC: clang-8 CXX: clang++-8 BuildType: Release SelfHost: false CMakeArgs: '' + Image: snmallocciteam/build_linux_x64:latest - Clang-9 Debug: + 64-bit Clang-9 Debug: CC: clang-9 CXX: clang++-9 BuildType: Debug SelfHost: false CMakeArgs: '' + Image: snmallocciteam/build_linux_x64:latest - Clang-9 Release: + 64-bit Clang-9 Release: CC: clang-9 CXX: clang++-9 BuildType: Release SelfHost: false CMakeArgs: '' + Image: snmallocciteam/build_linux_x64:latest - GCC-8 Debug: + 64-bit GCC-8 Debug: CC: gcc-8 CXX: g++-8 BuildType: Debug SelfHost: false CMakeArgs: '' + Image: snmallocciteam/build_linux_x64:latest - GCC-8 Release: + 64-bit GCC-8 Release: CC: gcc-8 CXX: g++-8 BuildType: Release SelfHost: false CMakeArgs: '' + Image: snmallocciteam/build_linux_x64:latest - Self Host: + 64-bit Self Host: CC: clang-7 CXX: clang++-7 BuildType: Debug SelfHost: true CMakeArgs: '' + Image: snmallocciteam/build_linux_x64:latest - Cache Friendly: + 64-bit Cache Friendly: CC: clang-7 CXX: clang++-7 BuildType: Debug SelfHost: false CMakeArgs: '-DCACHE_FRIENDLY_OFFSET=64' + Image: snmallocciteam/build_linux_x64:latest + 32-bit Clang-9 Debug: + CC: clang-9 + CXX: clang++-9 + BuildType: Debug + SelfHost: false + CMakeArgs: '' + Image: snmallocciteam/build_linux_x86:latest + + 32-bit Clang-9 Release: + CC: clang-9 + CXX: clang++-9 + BuildType: Release + SelfHost: false + CMakeArgs: '' + Image: snmallocciteam/build_linux_x86:latest + + container: $[ variables['Image'] ] steps: - script: | + set -eo pipefail ci/scripts/build.sh env: CC: $(CC) CXX: $(CXX) BUILD_TYPE: $(BuildType) CMAKE_ARGS: $(CMakeArgs) - failOnStderr: true displayName: 'Build' - script: | + set -eo pipefail ci/scripts/test.sh env: SELF_HOST: $(SelfHost) BUILD_TYPE: $(BuildType) - failOnStderr: true displayName: 'Test' - job: @@ -114,7 +140,7 @@ jobs: CXX: clang++-9 BuildType: Debug SelfHost: false - CMakeArgs: '' + CMakeArgs: '-DSNMALLOC_QEMU_WORKAROUND=On' Image: snmallocciteam/build_linux_arm64:latest 64-bit Clang-9 Release: @@ -122,7 +148,7 @@ jobs: CXX: clang++-9 BuildType: Release SelfHost: false - CMakeArgs: '' + CMakeArgs: '-DSNMALLOC_QEMU_WORKAROUND=On' Image: snmallocciteam/build_linux_arm64:latest 32-bit Clang-9 Debug: @@ -130,7 +156,7 @@ jobs: CXX: clang++-9 BuildType: Debug SelfHost: false - CMakeArgs: '' + CMakeArgs: '-DSNMALLOC_QEMU_WORKAROUND=On' Image: snmallocciteam/build_linux_armhf:latest 32-bit Clang-9 Release: @@ -138,7 +164,7 @@ jobs: CXX: clang++-9 BuildType: Release SelfHost: false - CMakeArgs: '' + CMakeArgs: '-DSNMALLOC_QEMU_WORKAROUND=On' Image: snmallocciteam/build_linux_armhf:latest steps: @@ -308,7 +334,7 @@ jobs: - script: | set -eo pipefail ninja clangformat - git diff --exit-code $(Build.SourceVersion) + git diff --exit-code workingDirectory: build failOnStderr: true diff --git a/3rdparty/snmalloc/ci/linux_x86 b/3rdparty/snmalloc/ci/linux_x86 new file mode 100644 index 0000000000..6124ddf435 --- /dev/null +++ b/3rdparty/snmalloc/ci/linux_x86 @@ -0,0 +1,7 @@ +FROM multiarch/ubuntu-core:x86-bionic + +WORKDIR /src + +RUN apt update \ + && apt install --no-install-recommends -y ninja-build clang++-9 cmake \ + && apt -y clean diff --git a/3rdparty/snmalloc/ci/scripts/test.sh b/3rdparty/snmalloc/ci/scripts/test.sh index 1d1176e509..02e2e65346 100755 --- a/3rdparty/snmalloc/ci/scripts/test.sh +++ b/3rdparty/snmalloc/ci/scripts/test.sh @@ -4,9 +4,11 @@ cd build if [ $SELF_HOST = false ]; then ctest -j 4 --output-on-failure -C $BUILD_TYPE else - sudo cp libsnmallocshim.so libsnmallocshim-1mib.so /usr/local/lib/ + sudo cp libsnmallocshim.so libsnmallocshim-16mib.so libsnmallocshim-oe.so /usr/local/lib/ ninja clean LD_PRELOAD=/usr/local/lib/libsnmallocshim.so ninja ninja clean - LD_PRELOAD=/usr/local/lib/libsnmallocshim-1mib.so ninja + LD_PRELOAD=/usr/local/lib/libsnmallocshim-16mib.so ninja + ninja clean + LD_PRELOAD=/usr/local/lib/libsnmallocshim-oe.so ninja fi \ No newline at end of file diff --git a/3rdparty/snmalloc/src/aal/aal.h b/3rdparty/snmalloc/src/aal/aal.h index 55b14778a1..ce5e6c19da 100644 --- a/3rdparty/snmalloc/src/aal/aal.h +++ b/3rdparty/snmalloc/src/aal/aal.h @@ -19,6 +19,10 @@ # define PLATFORM_IS_ARM #endif +#if defined(__powerpc__) || defined(__powerpc64__) +# define PLATFORM_IS_POWERPC +#endif + namespace snmalloc { /** @@ -36,6 +40,20 @@ namespace snmalloc * This architecture cannot access cpu cycles counters. */ NoCpuCycleCounters = (1 << 1), + /** + * This architecture enforces strict pointer provenance; we bound the + * pointers given out on malloc() and friends and must, therefore retain + * internal high-privilege pointers for recycling memory on free(). + */ + StrictProvenance = (1 << 2), + }; + + enum AalName : int + { + ARM, + PowerPC, + X86, + X86_SGX, }; /** @@ -46,6 +64,31 @@ namespace snmalloc template struct AAL_Generic : Arch { + /* + * Provide a default specification of address_t as uintptr_t for Arch-es + * that support IntegerPointers. Those Arch-es without IntegerPoihnters + * must explicitly give their address_t. + * + * This somewhat obtuse way of spelling the defaulting is necessary so + * that all arguments to std::conditional_t are valid, even if they + * wouldn't be valid in context. One might rather wish to say + * + * std::conditional_t<..., uintptr_t, Arch::address_t> + * + * but that requires that Arch::address_t always be given, precisely + * the thing we're trying to avoid with the conditional. + */ + + struct default_address_t + { + using address_t = uintptr_t; + }; + + using address_t = typename std::conditional_t< + (Arch::aal_features & IntegerPointers) != 0, + default_address_t, + Arch>::address_t; + /** * Prefetch a specific address. * @@ -102,6 +145,8 @@ namespace snmalloc # include "aal_x86_sgx.h" #elif defined(PLATFORM_IS_ARM) # include "aal_arm.h" +#elif defined(PLATFORM_IS_POWERPC) +# include "aal_powerpc.h" #endif namespace snmalloc diff --git a/3rdparty/snmalloc/src/aal/aal_arm.h b/3rdparty/snmalloc/src/aal/aal_arm.h index baa690fc1c..56cdbd6b16 100644 --- a/3rdparty/snmalloc/src/aal/aal_arm.h +++ b/3rdparty/snmalloc/src/aal/aal_arm.h @@ -12,7 +12,7 @@ # endif #endif -#include +#include namespace snmalloc { /** @@ -27,6 +27,10 @@ namespace snmalloc static constexpr uint64_t aal_features = IntegerPointers | NoCpuCycleCounters; + static constexpr enum AalName aal_name = ARM; + + static constexpr size_t smallest_page_size = 0x1000; + /** * On pipelined processors, notify the core that we are in a spin loop and * that speculative execution past this point may not be a performance gain. diff --git a/3rdparty/snmalloc/src/aal/aal_powerpc.h b/3rdparty/snmalloc/src/aal/aal_powerpc.h new file mode 100644 index 0000000000..60b9f125d1 --- /dev/null +++ b/3rdparty/snmalloc/src/aal/aal_powerpc.h @@ -0,0 +1,37 @@ +#pragma once + +#if defined(__powerpc64__) +# define SNMALLOC_VA_BITS_64 +#else +# define SNMALLOC_VA_BITS_32 +#endif + +namespace snmalloc +{ + /** + * ARM-specific architecture abstraction layer. + */ + class AAL_PowerPC + { + public: + /** + * Bitmap of AalFeature flags + */ + static constexpr uint64_t aal_features = IntegerPointers; + + static constexpr enum AalName aal_name = PowerPC; + + static constexpr size_t smallest_page_size = 0x1000; + + /** + * On pipelined processors, notify the core that we are in a spin loop and + * that speculative execution past this point may not be a performance gain. + */ + static inline void pause() + { + __asm__ volatile("or 27,27,27"); // "yield" + } + }; + + using AAL_Arch = AAL_PowerPC; +} // namespace snmalloc diff --git a/3rdparty/snmalloc/src/aal/aal_x86.h b/3rdparty/snmalloc/src/aal/aal_x86.h index 2473b5f38c..cc20e777a0 100644 --- a/3rdparty/snmalloc/src/aal/aal_x86.h +++ b/3rdparty/snmalloc/src/aal/aal_x86.h @@ -60,6 +60,10 @@ namespace snmalloc */ static constexpr uint64_t aal_features = IntegerPointers; + static constexpr enum AalName aal_name = X86; + + static constexpr size_t smallest_page_size = 0x1000; + /** * On pipelined processors, notify the core that we are in a spin loop and * that speculative execution past this point may not be a performance gain. diff --git a/3rdparty/snmalloc/src/aal/aal_x86_sgx.h b/3rdparty/snmalloc/src/aal/aal_x86_sgx.h index 9aa87c36fc..87808a4dea 100644 --- a/3rdparty/snmalloc/src/aal/aal_x86_sgx.h +++ b/3rdparty/snmalloc/src/aal/aal_x86_sgx.h @@ -3,8 +3,6 @@ #ifdef _MSC_VER # include # include -#else -# include #endif #if defined(__amd64__) || defined(__x86_64__) || defined(_M_X64) || \ @@ -28,13 +26,21 @@ namespace snmalloc */ static constexpr uint64_t aal_features = IntegerPointers; + static constexpr enum AalName aal_name = X86_SGX; + + static constexpr size_t smallest_page_size = 0x1000; + /** * On pipelined processors, notify the core that we are in a spin loop and * that speculative execution past this point may not be a performance gain. */ static inline void pause() { +#ifdef _MSC_VER _mm_pause(); +#else + asm volatile("pause"); +#endif } /** @@ -42,7 +48,11 @@ namespace snmalloc */ static inline void prefetch(void* ptr) { +#ifdef _MSC_VER _mm_prefetch(reinterpret_cast(ptr), _MM_HINT_T0); +#else + asm volatile("prefetcht0 %0" ::"m"(ptr)); +#endif } /** diff --git a/3rdparty/snmalloc/src/ds/aba.h b/3rdparty/snmalloc/src/ds/aba.h index 38aaa36059..fc8e3f8f71 100644 --- a/3rdparty/snmalloc/src/ds/aba.h +++ b/3rdparty/snmalloc/src/ds/aba.h @@ -99,9 +99,11 @@ namespace snmalloc (__int64)value, (__int64*)&old); # else -# if defined(__GNUC__) && !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16) +# if defined(__GNUC__) && defined(SNMALLOC_VA_BITS_64) && \ + !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16) #error You must compile with -mcx16 to enable 16-byte atomic compare and swap. # endif + Linked xchg{value, old.aba + 1}; std::atomic& addr = parent->linked; @@ -119,6 +121,7 @@ namespace snmalloc } Cmp(const Cmp&) = delete; + Cmp(Cmp&&) noexcept = default; }; // This method is used in Verona diff --git a/3rdparty/snmalloc/src/ds/address.h b/3rdparty/snmalloc/src/ds/address.h index a92ef36a68..cc73e49e03 100644 --- a/3rdparty/snmalloc/src/ds/address.h +++ b/3rdparty/snmalloc/src/ds/address.h @@ -13,7 +13,7 @@ namespace snmalloc * separated into two types, one for raw addresses and one for addresses that * can be cast back to pointers. */ - using address_t = uintptr_t; + using address_t = Aal::address_t; /** * Perform pointer arithmetic and return the adjusted pointer. @@ -42,16 +42,6 @@ namespace snmalloc return reinterpret_cast(ptr); } - /** - * Cast from an address back to a pointer of the specified type. All uses of - * this will eventually need auditing for CHERI compatibility. - */ - template - inline T* pointer_cast(address_t address) - { - return reinterpret_cast(address); - } - /** * Test if a pointer is aligned to a given size, which must be a power of * two. @@ -61,8 +51,7 @@ namespace snmalloc { static_assert(bits::next_pow2_const(alignment) == alignment); - return ((static_cast(address_cast(p)) | size) & (alignment - 1)) == - 0; + return ((address_cast(p) | size) & (alignment - 1)) == 0; } /** @@ -81,7 +70,8 @@ namespace snmalloc #if __has_builtin(__builtin_align_down) return static_cast(__builtin_align_down(p, alignment)); #else - return pointer_cast(bits::align_down(address_cast(p), alignment)); + return reinterpret_cast( + bits::align_down(reinterpret_cast(p), alignment)); #endif } } @@ -102,11 +92,29 @@ namespace snmalloc #if __has_builtin(__builtin_align_up) return static_cast(__builtin_align_up(p, alignment)); #else - return pointer_cast(bits::align_up(address_cast(p), alignment)); + return reinterpret_cast( + bits::align_up(reinterpret_cast(p), alignment)); #endif } } + /** + * Align a pointer down to a dynamically specified granularity, which must be + * a power of two. + */ + template + SNMALLOC_FAST_PATH T* pointer_align_down(void* p, size_t alignment) + { + SNMALLOC_ASSERT(alignment > 0); + SNMALLOC_ASSERT(bits::next_pow2(alignment) == alignment); +#if __has_builtin(__builtin_align_down) + return static_cast(__builtin_align_down(p, alignment)); +#else + return reinterpret_cast( + bits::align_down(reinterpret_cast(p), alignment)); +#endif + } + /** * Align a pointer up to a dynamically specified granularity, which must * be a power of two. @@ -119,7 +127,8 @@ namespace snmalloc #if __has_builtin(__builtin_align_up) return static_cast(__builtin_align_up(p, alignment)); #else - return pointer_cast(bits::align_up(address_cast(p), alignment)); + return reinterpret_cast( + bits::align_up(reinterpret_cast(p), alignment)); #endif } diff --git a/3rdparty/snmalloc/src/ds/cdllist.h b/3rdparty/snmalloc/src/ds/cdllist.h index 1bc39aad4d..7fe4795767 100644 --- a/3rdparty/snmalloc/src/ds/cdllist.h +++ b/3rdparty/snmalloc/src/ds/cdllist.h @@ -7,13 +7,8 @@ namespace snmalloc { - /** - * Special class for cyclic doubly linked non-empty linked list - * - * This code assumes there is always one element in the list. The client - * must ensure there is a sentinal element. - */ - class CDLLNode + template + class CDLLNodeBase { /** * to_next is used to handle a zero initialised data structure. @@ -22,38 +17,86 @@ namespace snmalloc */ ptrdiff_t to_next = 0; - // TODO: CHERI will need a real pointer too - // CDLLNode* next = nullptr; - CDLLNode* prev = nullptr; - - void set_next(CDLLNode* c) + protected: + void set_next(T* c) { - // TODO: CHERI will need a real pointer too - // next = c; to_next = pointer_diff_signed(this, c); } + public: + SNMALLOC_FAST_PATH bool is_empty() + { + return to_next == 0; + } + + SNMALLOC_FAST_PATH T* get_next() + { + return pointer_offset_signed(static_cast(this), to_next); + } + }; + + template + class CDLLNodeBaseNext + { + /** + * Like to_next in the pointer-less case, this version still works with + * zero-initialized data structure. To make `is_empty` work in this case, + * next is set to `nullptr` rather than `this` when the list is empty. + * + */ + + T* next = nullptr; + + protected: + void set_next(T* c) + { + next = (c == static_cast(this)) ? nullptr : c; + } + + public: + SNMALLOC_FAST_PATH bool is_empty() + { + return next == nullptr; + } + + SNMALLOC_FAST_PATH T* get_next() + { + return next == nullptr ? static_cast(this) : next; + } + }; + + template + using CDLLNodeParent = std::conditional_t< + aal_supports, + CDLLNodeBaseNext, + CDLLNodeBase>; + + /** + * Special class for cyclic doubly linked non-empty linked list + * + * This code assumes there is always one element in the list. The client + * must ensure there is a sentinal element. + */ + class CDLLNode : public CDLLNodeParent + { + CDLLNode* prev = nullptr; + public: /** * Single element cyclic list. This is the empty case. */ CDLLNode() { - set_next(this); + this->set_next(this); prev = this; } - SNMALLOC_FAST_PATH bool is_empty() - { - return to_next == 0; - } - /** * Removes this element from the cyclic list is it part of. */ SNMALLOC_FAST_PATH void remove() { - SNMALLOC_ASSERT(!is_empty()); + SNMALLOC_ASSERT(!this->is_empty()); debug_check(); get_next()->prev = prev; prev->set_next(get_next()); @@ -67,13 +110,6 @@ namespace snmalloc #endif } - SNMALLOC_FAST_PATH CDLLNode* get_next() - { - // TODO: CHERI will require a real pointer - // return next; - return pointer_offset_signed(this, to_next); - } - SNMALLOC_FAST_PATH CDLLNode* get_prev() { return prev; diff --git a/3rdparty/snmalloc/src/ds/defines.h b/3rdparty/snmalloc/src/ds/defines.h index f22dd64aac..7b9735912d 100644 --- a/3rdparty/snmalloc/src/ds/defines.h +++ b/3rdparty/snmalloc/src/ds/defines.h @@ -37,7 +37,7 @@ namespace snmalloc { // Forwards reference so that the platform can define how to handle errors. - void error(const char* const str); + [[noreturn]] void error(const char* const str); } // namespace snmalloc #define TOSTRING(expr) TOSTRING2(expr) diff --git a/3rdparty/snmalloc/src/ds/dllist.h b/3rdparty/snmalloc/src/ds/dllist.h index 4a7e9c7267..915ce1b98a 100644 --- a/3rdparty/snmalloc/src/ds/dllist.h +++ b/3rdparty/snmalloc/src/ds/dllist.h @@ -17,7 +17,7 @@ namespace snmalloc * are always the same, invalid pointer values with different sentinels are * always different. */ - template + template constexpr bool operator==(const InvalidPointer&) { return Sentinel == OtherSentinel; @@ -27,7 +27,7 @@ namespace snmalloc * are always the same, invalid pointer values with different sentinels are * always different. */ - template + template constexpr bool operator!=(const InvalidPointer&) { return Sentinel != OtherSentinel; diff --git a/3rdparty/snmalloc/src/ds/helpers.h b/3rdparty/snmalloc/src/ds/helpers.h index b479011954..a042981b89 100644 --- a/3rdparty/snmalloc/src/ds/helpers.h +++ b/3rdparty/snmalloc/src/ds/helpers.h @@ -14,7 +14,7 @@ namespace snmalloc class Singleton { inline static std::atomic_flag flag; - inline static std::atomic initialised = false; + inline static std::atomic initialised{false}; inline static Object obj; public: diff --git a/3rdparty/snmalloc/src/ds/mpscq.h b/3rdparty/snmalloc/src/ds/mpscq.h index d5d51617f6..2b98dca4c5 100644 --- a/3rdparty/snmalloc/src/ds/mpscq.h +++ b/3rdparty/snmalloc/src/ds/mpscq.h @@ -14,7 +14,7 @@ namespace snmalloc std::is_same>::value, "T->next must be a std::atomic"); - std::atomic back = nullptr; + std::atomic back{nullptr}; T* front = nullptr; public: @@ -72,10 +72,10 @@ namespace snmalloc SNMALLOC_ASSERT(front); std::atomic_thread_fence(std::memory_order_acquire); invariant(); - return std::pair(first, true); + return {first, true}; } - return std::pair(nullptr, false); + return {nullptr, false}; } }; } // namespace snmalloc diff --git a/3rdparty/snmalloc/src/mem/address_space.h b/3rdparty/snmalloc/src/mem/address_space.h new file mode 100644 index 0000000000..2b41ac7b8c --- /dev/null +++ b/3rdparty/snmalloc/src/mem/address_space.h @@ -0,0 +1,243 @@ +#include "../ds/address.h" +#include "../ds/flaglock.h" +#include "../pal/pal.h" + +#include +namespace snmalloc +{ + /** + * Implements a power of two allocator, where all blocks are aligned to the + * same power of two as their size. This is what snmalloc uses to get + * alignment of very large sizeclasses. + * + * It cannot unreserve memory, so this does not require the + * usual complexity of a buddy allocator. + */ + template + class AddressSpaceManager : public Pal + { + /** + * Stores the blocks of address space + * + * The first level of array indexes based on power of two size. + * + * The first entry ranges[n][0] is just a pointer to an address range + * of size 2^n. + * + * The second entry ranges[n][1] is a pointer to a linked list of blocks + * of this size. The final block in the list is not committed, so we commit + * on pop for this corner case. + * + * Invariants + * ranges[n][1] != nullptr => ranges[n][0] != nullptr + * + * bits::BITS is used for simplicity, we do not use below the pointer size, + * and large entries will be unlikely to be supported by the platform. + */ + std::array, bits::BITS> ranges = {}; + + /** + * This is infrequently used code, a spin lock simplifies the code + * considerably, and should never be on the fast path. + */ + std::atomic_flag spin_lock = ATOMIC_FLAG_INIT; + + /** + * Checks a block satisfies its invariant. + */ + inline void check_block(void* base, size_t align_bits) + { + SNMALLOC_ASSERT( + base == pointer_align_up(base, bits::one_at_bit(align_bits))); + // All blocks need to be bigger than a pointer. + SNMALLOC_ASSERT(bits::one_at_bit(align_bits) >= sizeof(void*)); + UNUSED(base); + UNUSED(align_bits); + } + + /** + * Adds a block to `ranges`. + */ + void add_block(size_t align_bits, void* base) + { + check_block(base, align_bits); + SNMALLOC_ASSERT(align_bits < 64); + if (ranges[align_bits][0] == nullptr) + { + // Prefer first slot if available. + ranges[align_bits][0] = base; + return; + } + + if (ranges[align_bits][1] != nullptr) + { + // Add to linked list. + commit_block(base, sizeof(void*)); + *reinterpret_cast(base) = ranges[align_bits][1]; + check_block(ranges[align_bits][1], align_bits); + } + + // Update head of list + ranges[align_bits][1] = base; + check_block(ranges[align_bits][1], align_bits); + } + + /** + * Find a block of the correct size. May split larger blocks + * to satisfy this request. + */ + void* remove_block(size_t align_bits) + { + auto first = ranges[align_bits][0]; + if (first == nullptr) + { + if (align_bits == (bits::BITS - 1)) + { + // Out of memory + return nullptr; + } + + // Look for larger block and split up recursively + void* bigger = remove_block(align_bits + 1); + if (bigger != nullptr) + { + void* left_over = + pointer_offset(bigger, bits::one_at_bit(align_bits)); + ranges[align_bits][0] = left_over; + check_block(left_over, align_bits); + } + check_block(bigger, align_bits + 1); + return bigger; + } + + auto second = ranges[align_bits][1]; + if (second != nullptr) + { + commit_block(second, sizeof(void*)); + auto next = *reinterpret_cast(second); + ranges[align_bits][1] = next; + // Zero memory. Client assumes memory contains only zeros. + *reinterpret_cast(second) = nullptr; + check_block(second, align_bits); + check_block(next, align_bits); + return second; + } + + check_block(first, align_bits); + ranges[align_bits][0] = nullptr; + return first; + } + + /** + * Add a range of memory to the address space. + * Divides blocks into power of two sizes with natural alignment + */ + void add_range(void* base, size_t length) + { + // Find the minimum set of maximally aligned blocks in this range. + // Each block's alignment and size are equal. + while (length >= sizeof(void*)) + { + size_t base_align_bits = bits::ctz(address_cast(base)); + size_t length_align_bits = (bits::BITS - 1) - bits::clz(length); + size_t align_bits = bits::min(base_align_bits, length_align_bits); + size_t align = bits::one_at_bit(align_bits); + + check_block(base, align_bits); + add_block(align_bits, base); + + base = pointer_offset(base, align); + length -= align; + } + } + + /** + * Commit a block of memory + */ + void commit_block(void* base, size_t size) + { + // Rounding required for sub-page allocations. + auto page_start = pointer_align_down(base); + auto page_end = + pointer_align_up(pointer_offset(base, size)); + Pal::template notify_using( + page_start, static_cast(page_end - page_start)); + } + + public: + /** + * Returns a pointer to a block of memory of the supplied size. + * The block will be committed, if specified by the template parameter. + * The returned block is guaranteed to be aligened to the size. + * + * Only request 2^n sizes, and not less than a pointer. + */ + template + void* reserve(size_t size) + { + SNMALLOC_ASSERT(bits::next_pow2(size) == size); + SNMALLOC_ASSERT(size >= sizeof(void*)); + + if constexpr (pal_supports) + { + if (size >= Pal::minimum_alloc_size) + return static_cast(this)->template reserve_aligned( + size); + } + + void* res; + { + FlagLock lock(spin_lock); + res = remove_block(bits::next_pow2_bits(size)); + if (res == nullptr) + { + // Allocation failed ask OS for more memory + void* block; + size_t block_size; + if constexpr (pal_supports) + { + block_size = Pal::minimum_alloc_size; + block = static_cast(this)->template reserve_aligned( + block_size); + } + else + { + // Need at least 2 times the space to guarantee alignment. + // Hold lock here as a race could cause additional requests to + // the Pal, and this could lead to suprious OOM. This is + // particularly bad if the Pal gives all the memory on first call. + auto block_and_size = + static_cast(this)->reserve_at_least(size * 2); + block = block_and_size.first; + block_size = block_and_size.second; + + // Ensure block is pointer aligned. + if ( + pointer_align_up(block, sizeof(void*)) != block || + bits::align_up(block_size, sizeof(void*)) > block_size) + { + auto diff = + pointer_diff(block, pointer_align_up(block, sizeof(void*))); + block_size = block_size - diff; + block_size = bits::align_down(block_size, sizeof(void*)); + } + } + if (block == nullptr) + { + return nullptr; + } + add_range(block, block_size); + + // still holding lock so guaranteed to succeed. + res = remove_block(bits::next_pow2_bits(size)); + } + } + + // Don't need lock while committing pages. + if constexpr (committed) + commit_block(res, size); + + return res; + } + }; +} // namespace snmalloc diff --git a/3rdparty/snmalloc/src/mem/alloc.h b/3rdparty/snmalloc/src/mem/alloc.h index 8cd2dbb435..74380d1d78 100644 --- a/3rdparty/snmalloc/src/mem/alloc.h +++ b/3rdparty/snmalloc/src/mem/alloc.h @@ -17,6 +17,7 @@ #include "sizeclasstable.h" #include "slab.h" +#include #include namespace snmalloc @@ -284,6 +285,7 @@ namespace snmalloc UNUSED(size); return free(p); #else + SNMALLOC_ASSERT(p != nullptr); check_size(p, size); if (likely((size - 1) <= (sizeclass_to_size(NUM_SMALL_CLASSES - 1) - 1))) { @@ -393,7 +395,7 @@ namespace snmalloc } template - static address_t external_address(void* p) + static void* external_pointer(void* p) { #ifdef USE_MALLOC error("Unsupported"); @@ -422,12 +424,13 @@ namespace snmalloc return external_pointer(p, sc, slab_end); } - auto ss = address_cast(super); + auto ss = super; while (size > 64) { // This is a large alloc redirect. - ss = ss - (1ULL << (size - 64)); + ss = pointer_offset_signed( + ss, -(static_cast(1) << (size - 64))); size = ChunkMap::get(ss); } @@ -435,38 +438,35 @@ namespace snmalloc { if constexpr ((location == End) || (location == OnePastEnd)) // We don't know the End, so return MAX_PTR - return UINTPTR_MAX; + return pointer_offset(nullptr, UINTPTR_MAX); else // We don't know the Start, so return MIN_PTR - return 0; + return nullptr; } // This is a large alloc, mask off to the slab size. if constexpr (location == Start) return ss; else if constexpr (location == End) - return (ss + (1ULL << size) - 1ULL); + return pointer_offset(ss, (1ULL << size) - 1ULL); else - return (ss + (1ULL << size)); + return pointer_offset(ss, 1ULL << size); #endif } - template - static void* external_pointer(void* p) + private: + SNMALLOC_SLOW_PATH static size_t alloc_size_error() { - return pointer_cast(external_address(p)); + error("Not allocated by this allocator"); } - static size_t alloc_size(void* p) + public: + SNMALLOC_FAST_PATH static size_t alloc_size(const void* p) { // This must be called on an external pointer. size_t size = ChunkMap::get(address_cast(p)); - if (size == 0) - { - error("Not allocated by this allocator"); - } - else if (size == CMSuperslab) + if (likely(size == CMSuperslab)) { Superslab* super = Superslab::get(p); @@ -477,7 +477,8 @@ namespace snmalloc return sizeclass_to_size(meta.sizeclass); } - else if (size == CMMediumslab) + + if (likely(size == CMMediumslab)) { Mediumslab* slab = Mediumslab::get(p); // Reading a remote sizeclass won't fail, since the other allocator @@ -485,7 +486,12 @@ namespace snmalloc return sizeclass_to_size(slab->get_sizeclass()); } - return 1ULL << size; + if (likely(size != 0)) + { + return 1ULL << size; + } + + return alloc_size_error(); } size_t get_id() @@ -508,14 +514,9 @@ namespace snmalloc * A stub Remote object that will always be the head of this list; * never taken for further processing. */ - Remote head; + Remote head{}; - Remote* last; - - RemoteList() - { - clear(); - } + Remote* last{&head}; void clear() { @@ -538,8 +539,8 @@ namespace snmalloc * need to dispatch everything, we can check if we are a real allocator * and lazily provide a real allocator. */ - int64_t capacity = 0; - RemoteList list[REMOTE_SLOTS]; + int64_t capacity{0}; + std::array list{}; /// Used to find the index into the array of queues for remote /// deallocation @@ -728,12 +729,6 @@ namespace snmalloc size_t size1 = sizeclass_to_size(sc1); size_t size2 = sizeclass_to_size(sc2); - // All medium size classes are page aligned. - if (i > NUM_SMALL_CLASSES) - { - SNMALLOC_ASSERT(is_aligned_block(nullptr, size1)); - } - SNMALLOC_ASSERT(sc1 == i); SNMALLOC_ASSERT(sc1 == sc2); SNMALLOC_ASSERT(size1 == size); @@ -823,31 +818,35 @@ namespace snmalloc } template - static uintptr_t + static void* external_pointer(void* p, sizeclass_t sizeclass, void* end_point) { size_t rsize = sizeclass_to_size(sizeclass); void* end_point_correction = location == End ? - (static_cast(end_point) - 1) : - (location == OnePastEnd ? end_point : - (static_cast(end_point) - rsize)); + pointer_offset_signed(end_point, -1) : + (location == OnePastEnd ? + end_point : + pointer_offset_signed(end_point, -static_cast(rsize))); - ptrdiff_t offset_from_end = - (static_cast(end_point) - 1) - static_cast(p); + size_t offset_from_end = + pointer_diff(p, pointer_offset_signed(end_point, -1)); - size_t end_to_end = - round_by_sizeclass(rsize, static_cast(offset_from_end)); + size_t end_to_end = round_by_sizeclass(rsize, offset_from_end); - return address_cast( - static_cast(end_point_correction) - end_to_end); + return pointer_offset_signed( + end_point_correction, -static_cast(end_to_end)); } void init_message_queue() { // Manufacture an allocation to prime the queue - // Using an actual allocation removes a conditional of a critical path. + // Using an actual allocation removes a conditional from a critical path. Remote* dummy = reinterpret_cast(alloc(MIN_ALLOC_SIZE)); + if (dummy == nullptr) + { + error("Critical error: Out-of-memory during initialisation."); + } dummy->set_target_id(id()); message_queue().init(dummy); } diff --git a/3rdparty/snmalloc/src/mem/allocconfig.h b/3rdparty/snmalloc/src/mem/allocconfig.h index 2e9bb6d39f..0ea5257c5f 100644 --- a/3rdparty/snmalloc/src/mem/allocconfig.h +++ b/3rdparty/snmalloc/src/mem/allocconfig.h @@ -41,20 +41,21 @@ namespace snmalloc // Specifies smaller slab and super slab sizes for address space // constrained scenarios. - static constexpr size_t ADDRESS_SPACE_CONSTRAINED = -#ifdef IS_ADDRESS_SPACE_CONSTRAINED - true -#else + static constexpr size_t USE_LARGE_CHUNKS = +#ifdef SNMALLOC_USE_LARGE_CHUNKS // In 32 bit uses smaller superslab. - (!bits::is64()) + (bits::is64()) +#else + false #endif ; - static constexpr size_t RESERVE_MULTIPLE = -#ifdef USE_RESERVE_MULTIPLE - USE_RESERVE_MULTIPLE + // Specifies even smaller slab and super slab sizes for open enclave. + static constexpr size_t USE_SMALL_CHUNKS = +#ifdef SNMALLOC_USE_SMALL_CHUNKS + true #else - bits::is64() ? 16 : 2 + false #endif ; @@ -92,18 +93,7 @@ namespace snmalloc // Used to isolate values on cache lines to prevent false sharing. static constexpr size_t CACHELINE_SIZE = 64; - // Used to keep Superslab metadata committed. - static constexpr size_t OS_PAGE_SIZE = 0x1000; static constexpr size_t PAGE_ALIGNED_SIZE = OS_PAGE_SIZE << INTERMEDIATE_BITS; - // Some system headers (e.g. Linux' sys/user.h, FreeBSD's machine/param.h) - // define `PAGE_SIZE` as a macro. We don't use `PAGE_SIZE` as our variable - // name, to avoid conflicts, but if we do see a macro definition then check - // that our value matches the platform's expected value. -#ifdef PAGE_SIZE - static_assert( - PAGE_SIZE == OS_PAGE_SIZE, - "Page size from system header does not match snmalloc config page size."); -#endif // Minimum allocation size is space for two pointers. static_assert(bits::next_pow2_const(sizeof(void*)) == sizeof(void*)); @@ -111,19 +101,20 @@ namespace snmalloc static constexpr size_t MIN_ALLOC_BITS = bits::ctz_const(MIN_ALLOC_SIZE); // Slabs are 64 KiB unless constrained to 16 KiB. - static constexpr size_t SLAB_BITS = ADDRESS_SPACE_CONSTRAINED ? 14 : 16; + static constexpr size_t SLAB_BITS = + USE_SMALL_CHUNKS ? 13 : (USE_LARGE_CHUNKS ? 16 : 14); static constexpr size_t SLAB_SIZE = 1 << SLAB_BITS; static constexpr size_t SLAB_MASK = ~(SLAB_SIZE - 1); // Superslabs are composed of this many slabs. Slab offsets are encoded as // a byte, so the maximum count is 256. This must be a power of two to // allow fast masking to find a superslab start address. - static constexpr size_t SLAB_COUNT_BITS = ADDRESS_SPACE_CONSTRAINED ? 6 : 8; + static constexpr size_t SLAB_COUNT_BITS = + USE_SMALL_CHUNKS ? 5 : (USE_LARGE_CHUNKS ? 8 : 6); static constexpr size_t SLAB_COUNT = 1 << SLAB_COUNT_BITS; static constexpr size_t SUPERSLAB_SIZE = SLAB_SIZE * SLAB_COUNT; static constexpr size_t SUPERSLAB_MASK = ~(SUPERSLAB_SIZE - 1); static constexpr size_t SUPERSLAB_BITS = SLAB_BITS + SLAB_COUNT_BITS; - static constexpr size_t RESERVE_SIZE = SUPERSLAB_SIZE * RESERVE_MULTIPLE; static_assert((1ULL << SUPERSLAB_BITS) == SUPERSLAB_SIZE, "Sanity check"); diff --git a/3rdparty/snmalloc/src/mem/largealloc.h b/3rdparty/snmalloc/src/mem/largealloc.h index 14cbd537ac..114fa3918a 100644 --- a/3rdparty/snmalloc/src/mem/largealloc.h +++ b/3rdparty/snmalloc/src/mem/largealloc.h @@ -4,6 +4,7 @@ #include "../ds/helpers.h" #include "../ds/mpmcstack.h" #include "../pal/pal.h" +#include "address_space.h" #include "allocstats.h" #include "baseslab.h" #include "sizeclass.h" @@ -58,27 +59,17 @@ namespace snmalloc template class MemoryProviderStateMixin : public PalNotificationObject, public PAL { - /** - * Flag to protect the bump allocator - */ - std::atomic_flag lock = ATOMIC_FLAG_INIT; - - /** - * Pointer to block being bump allocated - */ - void* bump = nullptr; - - /** - * Space remaining in this block being bump allocated - */ - size_t remaining = 0; - /** * Simple flag for checking if another instance of lazy-decommit is * running */ std::atomic_flag lazy_decommit_guard = {}; + /** + * Manages address space for this memory provider. + */ + AddressSpaceManager address_space = {}; + public: /** * Stack of large allocations that have been returned for reuse. @@ -91,12 +82,15 @@ namespace snmalloc static MemoryProviderStateMixin* make() noexcept { // Temporary stack-based storage to start the allocator in. - MemoryProviderStateMixin local; + MemoryProviderStateMixin local{}; // Allocate permanent storage for the allocator usung temporary allocator MemoryProviderStateMixin* allocated = local.alloc_chunk, 1>(); + if (allocated == nullptr) + error("Failed to initialise system!"); + #ifdef GCC_VERSION_EIGHT_PLUS # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wclass-memaccess" @@ -105,7 +99,10 @@ namespace snmalloc // memcpy is safe as this is entirely single threaded: the move // constructors were removed as unsafe to move std::atomic in a // concurrent setting. - memcpy(allocated, &local, sizeof(MemoryProviderStateMixin)); + ::memcpy( + &(allocated->address_space), + &(local.address_space), + sizeof(AddressSpaceManager)); #ifdef GCC_VERSION_EIGHT_PLUS # pragma GCC diagnostic pop #endif @@ -121,22 +118,6 @@ namespace snmalloc } private: - void new_block() - { - // Reserve the smallest large_class which is SUPERSLAB_SIZE - void* r = reserve(0); - - if (r == nullptr) - Pal::error( - "Unrecoverable internal error: \ - failed to allocator internal data structure."); - - PAL::template notify_using(r, OS_PAGE_SIZE); - - bump = r; - remaining = SUPERSLAB_SIZE; - } - SNMALLOC_SLOW_PATH void lazy_decommit() { // If another thread is try to do lazy decommit, let it continue. If @@ -183,25 +164,6 @@ namespace snmalloc lazy_decommit_guard.clear(); } - void push_space(address_t start, size_t large_class) - { - // All fresh pages so can use "NoZero" - void* p = pointer_cast(start); - if (large_class > 0) - PAL::template notify_using(p, OS_PAGE_SIZE); - else - { - if (decommit_strategy == DecommitSuperLazy) - { - PAL::template notify_using(p, OS_PAGE_SIZE); - p = new (p) Decommittedslab(); - } - else - PAL::template notify_using(p, SUPERSLAB_SIZE); - } - large_stack[large_class].push(reinterpret_cast(p)); - } - /*** * Method for callback object to perform lazy decommit. */ @@ -222,45 +184,10 @@ namespace snmalloc { // Cache line align size_t size = bits::align_up(sizeof(T), 64); - - void* p; - { - FlagLock f(lock); - - if constexpr (alignment != 0) - { - char* aligned_bump = pointer_align_up(bump); - - size_t bump_delta = pointer_diff(bump, aligned_bump); - - if (bump_delta > remaining) - { - new_block(); - } - else - { - remaining -= bump_delta; - bump = aligned_bump; - } - } - - if (remaining < size) - { - new_block(); - } - - p = bump; - bump = pointer_offset(bump, size); - remaining -= size; - } - - auto page_start = pointer_align_down(p); - auto page_end = - pointer_align_up(pointer_offset(p, size)); - - PAL::template notify_using( - page_start, static_cast(page_end - page_start)); - + size = bits::max(size, alignment); + void* p = address_space.template reserve(bits::next_pow2(size)); + if (p == nullptr) + return nullptr; return new (p) T(std::forward(args)...); } @@ -268,67 +195,8 @@ namespace snmalloc void* reserve(size_t large_class) noexcept { size_t size = bits::one_at_bit(SUPERSLAB_BITS) << large_class; - size_t align = size; - if constexpr (pal_supports) - { - return PAL::template reserve(size, align); - } - else - { - // Reserve 4 times the amount, and put aligned leftovers into the - // large_stack - size_t request = bits::max(size * 4, SUPERSLAB_SIZE * 8); - void* p = PAL::template reserve(request); - - if (p == nullptr) - return nullptr; - - address_t p0 = address_cast(p); - address_t start = bits::align_up(p0, align); - address_t p1 = p0 + request; - address_t end = start + size; - - for (; end < bits::align_down(p1, align); end += size) - { - push_space(end, large_class); - } - - // Put offcuts before alignment into the large stack - address_t offcut_end = start; - address_t offcut_start; - for (size_t i = large_class; i > 0;) - { - i--; - size_t offcut_align = bits::one_at_bit(SUPERSLAB_BITS) << i; - offcut_start = bits::align_up(p0, offcut_align); - if (offcut_start != offcut_end) - { - push_space(offcut_start, i); - offcut_end = offcut_start; - } - } - - // Put offcuts after returned block into the large stack - offcut_start = end; - for (size_t i = large_class; i > 0;) - { - i--; - auto offcut_align = bits::one_at_bit(SUPERSLAB_BITS) << i; - offcut_end = bits::align_down(p1, offcut_align); - if (offcut_start != offcut_end) - { - push_space(offcut_start, i); - offcut_start = offcut_end; - } - } - - void* result = pointer_cast(start); - if (committed) - PAL::template notify_using(result, size); - - return result; - } + return address_space.template reserve(size); } }; @@ -366,8 +234,7 @@ namespace snmalloc p = memory_provider.template reserve(large_class); if (p == nullptr) return nullptr; - memory_provider.template notify_using( - p, bits::align_up(size, OS_PAGE_SIZE)); + memory_provider.template notify_using(p, rsize); } else { @@ -390,8 +257,7 @@ namespace snmalloc // Passing zero_mem ensures the PAL provides zeroed pages if // required. memory_provider.template notify_using( - pointer_offset(p, OS_PAGE_SIZE), - bits::align_up(size, OS_PAGE_SIZE) - OS_PAGE_SIZE); + pointer_offset(p, OS_PAGE_SIZE), rsize - OS_PAGE_SIZE); } else { @@ -399,6 +265,8 @@ namespace snmalloc if constexpr (zero_mem == YesZero) memory_provider.template zero( p, bits::align_up(size, OS_PAGE_SIZE)); + else + UNUSED(size); } } diff --git a/3rdparty/snmalloc/src/mem/mediumslab.h b/3rdparty/snmalloc/src/mem/mediumslab.h index dcabf8e11d..59a10f2e02 100644 --- a/3rdparty/snmalloc/src/mem/mediumslab.h +++ b/3rdparty/snmalloc/src/mem/mediumslab.h @@ -39,9 +39,10 @@ namespace snmalloc return OS_PAGE_SIZE; } - static Mediumslab* get(void* p) + static Mediumslab* get(const void* p) { - return pointer_align_down(p); + return pointer_align_down( + const_cast(p)); } void init(RemoteAllocator* alloc, sizeclass_t sc, size_t rsize) @@ -84,11 +85,10 @@ namespace snmalloc void* p = pointer_offset(this, (static_cast(index) << 8)); free--; - SNMALLOC_ASSERT(is_aligned_block(p, OS_PAGE_SIZE)); - size = bits::align_up(size, OS_PAGE_SIZE); - if constexpr (zero_mem == YesZero) - memory_provider.template zero(p, size); + memory_provider.zero(p, size); + else + UNUSED(size); return p; } diff --git a/3rdparty/snmalloc/src/mem/metaslab.h b/3rdparty/snmalloc/src/mem/metaslab.h index 4d53b77f2b..ea4200f6d1 100644 --- a/3rdparty/snmalloc/src/mem/metaslab.h +++ b/3rdparty/snmalloc/src/mem/metaslab.h @@ -140,9 +140,9 @@ namespace snmalloc return (slab_end - allocation_start) % size == 0; } - static Slab* get_slab(void* p) + static Slab* get_slab(const void* p) { - return pointer_align_down(p); + return pointer_align_down(const_cast(p)); } static bool is_short(Slab* p) diff --git a/3rdparty/snmalloc/src/mem/pagemap.h b/3rdparty/snmalloc/src/mem/pagemap.h index fc3da85509..bc75fd39d9 100644 --- a/3rdparty/snmalloc/src/mem/pagemap.h +++ b/3rdparty/snmalloc/src/mem/pagemap.h @@ -188,7 +188,7 @@ namespace snmalloc { PagemapEntry* value = get_node(e, result); if (unlikely(!result)) - return std::pair(nullptr, 0); + return {nullptr, 0}; shift -= BITS_PER_INDEX_LEVEL; ix = (static_cast(addr) >> shift) & ENTRIES_MASK; @@ -208,11 +208,11 @@ namespace snmalloc Leaf* leaf = reinterpret_cast(get_node(e, result)); if (unlikely(!result)) - return std::pair(nullptr, 0); + return {nullptr, 0}; shift -= BITS_FOR_LEAF; ix = (static_cast(addr) >> shift) & LEAF_MASK; - return std::pair(leaf, ix); + return {leaf, ix}; } template diff --git a/3rdparty/snmalloc/src/mem/pooled.h b/3rdparty/snmalloc/src/mem/pooled.h index 71f8d911d0..4778a8fe10 100644 --- a/3rdparty/snmalloc/src/mem/pooled.h +++ b/3rdparty/snmalloc/src/mem/pooled.h @@ -14,7 +14,7 @@ namespace snmalloc friend class MPMCStack; /// Used by the pool for chaining together entries when not in use. - std::atomic next = nullptr; + std::atomic next{nullptr}; /// Used by the pool to keep the list of all entries ever created. T* list_next; std::atomic_flag in_use = ATOMIC_FLAG_INIT; diff --git a/3rdparty/snmalloc/src/mem/remoteallocator.h b/3rdparty/snmalloc/src/mem/remoteallocator.h index 4c1f90662e..d833a2560d 100644 --- a/3rdparty/snmalloc/src/mem/remoteallocator.h +++ b/3rdparty/snmalloc/src/mem/remoteallocator.h @@ -19,7 +19,7 @@ namespace snmalloc union { Remote* non_atomic_next; - std::atomic next = nullptr; + std::atomic next{nullptr}; }; alloc_id_t allocator_id; diff --git a/3rdparty/snmalloc/src/mem/sizeclass.h b/3rdparty/snmalloc/src/mem/sizeclass.h index 76e774e385..801192d830 100644 --- a/3rdparty/snmalloc/src/mem/sizeclass.h +++ b/3rdparty/snmalloc/src/mem/sizeclass.h @@ -1,6 +1,6 @@ #pragma once -#include "../pal/pal_consts.h" +#include "../pal/pal.h" #include "allocconfig.h" namespace snmalloc @@ -185,4 +185,17 @@ namespace snmalloc return ((alignment - 1) | (size - 1)) + 1; } + + SNMALLOC_FAST_PATH static size_t round_size(size_t size) + { + if (size > sizeclass_to_size(NUM_SIZECLASSES - 1)) + { + return bits::next_pow2(size); + } + if (size == 0) + { + size = 1; + } + return sizeclass_to_size(size_to_sizeclass(size)); + } } // namespace snmalloc diff --git a/3rdparty/snmalloc/src/mem/slowalloc.h b/3rdparty/snmalloc/src/mem/slowalloc.h index f2fcba1522..0ab116990b 100644 --- a/3rdparty/snmalloc/src/mem/slowalloc.h +++ b/3rdparty/snmalloc/src/mem/slowalloc.h @@ -63,6 +63,6 @@ namespace snmalloc */ inline SlowAllocator get_slow_allocator() { - return SlowAllocator{}; + return {}; } } // namespace snmalloc diff --git a/3rdparty/snmalloc/src/mem/superslab.h b/3rdparty/snmalloc/src/mem/superslab.h index 443e33f802..50e6e16304 100644 --- a/3rdparty/snmalloc/src/mem/superslab.h +++ b/3rdparty/snmalloc/src/mem/superslab.h @@ -65,9 +65,10 @@ namespace snmalloc StatusChange = 2 }; - static Superslab* get(void* p) + static Superslab* get(const void* p) { - return pointer_align_down(p); + return pointer_align_down( + const_cast(p)); } static bool is_short_sizeclass(sizeclass_t sizeclass) @@ -180,8 +181,8 @@ namespace snmalloc Slab* alloc_slab(sizeclass_t sizeclass) { uint8_t h = head; - Slab* slab = pointer_cast( - address_cast(this) + (static_cast(h) << SLAB_BITS)); + Slab* slab = pointer_offset( + reinterpret_cast(this), (static_cast(h) << SLAB_BITS)); uint8_t n = meta[h].next; diff --git a/3rdparty/snmalloc/src/override/malloc.cc b/3rdparty/snmalloc/src/override/malloc.cc index 975bd4102b..fd2ff1c5e3 100644 --- a/3rdparty/snmalloc/src/override/malloc.cc +++ b/3rdparty/snmalloc/src/override/malloc.cc @@ -9,11 +9,19 @@ using namespace snmalloc; #ifndef SNMALLOC_EXPORT # define SNMALLOC_EXPORT #endif - -#ifndef SNMALLOC_NAME_MANGLE +#ifdef SNMALLOC_STATIC_LIBRARY_PREFIX +# define __SN_CONCAT(a, b) a##b +# define __SN_EVALUATE(a, b) __SN_CONCAT(a, b) +# define SNMALLOC_NAME_MANGLE(a) \ + __SN_EVALUATE(SNMALLOC_STATIC_LIBRARY_PREFIX, a) +#elif !defined(SNMALLOC_NAME_MANGLE) # define SNMALLOC_NAME_MANGLE(a) a #endif +#ifndef MALLOC_USABLE_SIZE_QUALIFIER +# define MALLOC_USABLE_SIZE_QUALIFIER +#endif + extern "C" { SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(__malloc_end_pointer)(void* ptr) @@ -31,6 +39,11 @@ extern "C" ThreadAlloc::get_noncachable()->dealloc(ptr); } + SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(cfree)(void* ptr) + { + SNMALLOC_NAME_MANGLE(free)(ptr); + } + SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(calloc)(size_t nmemb, size_t size) { bool overflow = false; @@ -43,7 +56,9 @@ extern "C" return ThreadAlloc::get_noncachable()->alloc(sz); } - SNMALLOC_EXPORT size_t SNMALLOC_NAME_MANGLE(malloc_usable_size)(void* ptr) + SNMALLOC_EXPORT + size_t SNMALLOC_NAME_MANGLE(malloc_usable_size)( + MALLOC_USABLE_SIZE_QUALIFIER void* ptr) { return Alloc::alloc_size(ptr); } @@ -75,7 +90,7 @@ extern "C" #endif size_t sz = Alloc::alloc_size(ptr); // Keep the current allocation if the given size is in the same sizeclass. - if (sz == sizeclass_to_size(size_to_sizeclass(size))) + if (sz == round_size(size)) return ptr; void* p = SNMALLOC_NAME_MANGLE(malloc)(size); diff --git a/3rdparty/snmalloc/src/override/new.cc b/3rdparty/snmalloc/src/override/new.cc index 87450e750a..70083da373 100644 --- a/3rdparty/snmalloc/src/override/new.cc +++ b/3rdparty/snmalloc/src/override/new.cc @@ -43,6 +43,8 @@ void operator delete(void* p)EXCEPTSPEC void operator delete(void* p, size_t size)EXCEPTSPEC { + if (p == nullptr) + return; ThreadAlloc::get_noncachable()->dealloc(p, size); } @@ -58,6 +60,8 @@ void operator delete[](void* p) EXCEPTSPEC void operator delete[](void* p, size_t size) EXCEPTSPEC { + if (p == nullptr) + return; ThreadAlloc::get_noncachable()->dealloc(p, size); } diff --git a/3rdparty/snmalloc/src/pal/pal.h b/3rdparty/snmalloc/src/pal/pal.h index 53faecbacf..47f30e0257 100644 --- a/3rdparty/snmalloc/src/pal/pal.h +++ b/3rdparty/snmalloc/src/pal/pal.h @@ -3,18 +3,20 @@ #include "pal_consts.h" // If simultating OE, then we need the underlying platform +#if defined(OPEN_ENCLAVE) +# include "pal_open_enclave.h" +#endif #if !defined(OPEN_ENCLAVE) || defined(OPEN_ENCLAVE_SIMULATION) # include "pal_apple.h" # include "pal_freebsd.h" # include "pal_freebsd_kernel.h" +# include "pal_haiku.h" # include "pal_linux.h" # include "pal_netbsd.h" # include "pal_openbsd.h" +# include "pal_solaris.h" # include "pal_windows.h" #endif -#if defined(OPEN_ENCLAVE) -# include "pal_open_enclave.h" -#endif #include "pal_plain.h" namespace snmalloc @@ -31,10 +33,14 @@ namespace snmalloc PALFreeBSDKernel; # elif defined(__FreeBSD__) PALFreeBSD; +# elif defined(__HAIKU__) + PALHaiku; # elif defined(__NetBSD__) PALNetBSD; # elif defined(__OpenBSD__) PALOpenBSD; +# elif defined(__sun) + PALSolaris; # else # error Unsupported platform # endif @@ -49,7 +55,7 @@ namespace snmalloc DefaultPal; #endif - SNMALLOC_SLOW_PATH inline void error(const char* const str) + [[noreturn]] SNMALLOC_SLOW_PATH inline void error(const char* const str) { Pal::error(str); } @@ -59,4 +65,25 @@ namespace snmalloc */ template constexpr static bool pal_supports = (PAL::pal_features & F) == F; + + // Used to keep Superslab metadata committed. + static constexpr size_t OS_PAGE_SIZE = Pal::page_size; + + static_assert( + bits::next_pow2_const(OS_PAGE_SIZE) == OS_PAGE_SIZE, + "OS_PAGE_SIZE must be a power of two"); + static_assert( + OS_PAGE_SIZE % Aal::smallest_page_size == 0, + "The smallest architectural page size must divide OS_PAGE_SIZE"); + + // Some system headers (e.g. Linux' sys/user.h, FreeBSD's machine/param.h) + // define `PAGE_SIZE` as a macro. We don't use `PAGE_SIZE` as our variable + // name, to avoid conflicts, but if we do see a macro definition then check + // that our value matches the platform's expected value. +#ifdef PAGE_SIZE + static_assert( + PAGE_SIZE == OS_PAGE_SIZE, + "Page size from system header does not match snmalloc config page size."); +#endif + } // namespace snmalloc diff --git a/3rdparty/snmalloc/src/pal/pal_apple.h b/3rdparty/snmalloc/src/pal/pal_apple.h index b27dd43c3c..22600f815d 100644 --- a/3rdparty/snmalloc/src/pal/pal_apple.h +++ b/3rdparty/snmalloc/src/pal/pal_apple.h @@ -4,6 +4,7 @@ # include "pal_bsd.h" # include +# include namespace snmalloc { @@ -24,65 +25,16 @@ namespace snmalloc static constexpr uint64_t pal_features = PALBSD::pal_features; /** - * OS specific function for zeroing memory with the Apple application - * tag id. - * - * See comment below. - */ - template - void zero(void* p, size_t size) - { - if (page_aligned || is_aligned_block(p, size)) - { - SNMALLOC_ASSERT(is_aligned_block(p, size)); - void* r = mmap( - p, - size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - pal_anon_id, - 0); - - if (r != MAP_FAILED) - return; - } - - bzero(p, size); - } - - /** - * Reserve memory with the Apple application tag id. - * - * See comment below. - */ - template - void* reserve(size_t size) - { - void* p = mmap( - nullptr, - size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - pal_anon_id, - 0); - - if (p == MAP_FAILED) - error("Out of memory"); - - return p; - } - - private: - /** - * Anonymous page tag ID + * Anonymous page tag ID. * * Darwin platform allows to gives an ID to anonymous pages via * the VM_MAKE_TAG's macro, from 240 up to 255 are guaranteed * to be free of usage, however eventually a lower could be taken * (e.g. LLVM sanitizers has 99) so we can monitor their states - * via vmmap for instance. + * via vmmap for instance. This value is provided to `mmap` as the file + * descriptor for the mapping. */ - static constexpr int pal_anon_id = VM_MAKE_TAG(PALAnonID); + static constexpr int anonymous_memory_fd = VM_MAKE_TAG(PALAnonID); }; } // namespace snmalloc #endif diff --git a/3rdparty/snmalloc/src/pal/pal_bsd.h b/3rdparty/snmalloc/src/pal/pal_bsd.h index 2641a03729..c446dcbc1e 100644 --- a/3rdparty/snmalloc/src/pal/pal_bsd.h +++ b/3rdparty/snmalloc/src/pal/pal_bsd.h @@ -33,7 +33,7 @@ namespace snmalloc */ void notify_not_using(void* p, size_t size) noexcept { - SNMALLOC_ASSERT(is_aligned_block(p, size)); + SNMALLOC_ASSERT(is_aligned_block(p, size)); madvise(p, size, MADV_FREE); } }; diff --git a/3rdparty/snmalloc/src/pal/pal_bsd_aligned.h b/3rdparty/snmalloc/src/pal/pal_bsd_aligned.h index 7ca1277b58..d5d57661b7 100644 --- a/3rdparty/snmalloc/src/pal/pal_bsd_aligned.h +++ b/3rdparty/snmalloc/src/pal/pal_bsd_aligned.h @@ -23,18 +23,19 @@ namespace snmalloc static constexpr uint64_t pal_features = AlignedAllocation | PALBSD::pal_features; + static constexpr size_t minimum_alloc_size = 4096; + /** * Reserve memory at a specific alignment. */ template - void* reserve(size_t size, size_t align) noexcept + void* reserve_aligned(size_t size) noexcept { // Alignment must be a power of 2. - SNMALLOC_ASSERT(align == bits::next_pow2(align)); + SNMALLOC_ASSERT(size == bits::next_pow2(size)); + SNMALLOC_ASSERT(size >= minimum_alloc_size); - align = bits::max(4096, align); - - size_t log2align = bits::next_pow2_bits(align); + size_t log2align = bits::next_pow2_bits(size); void* p = mmap( nullptr, diff --git a/3rdparty/snmalloc/src/pal/pal_consts.h b/3rdparty/snmalloc/src/pal/pal_consts.h index 538f90d863..20359cd4ca 100644 --- a/3rdparty/snmalloc/src/pal/pal_consts.h +++ b/3rdparty/snmalloc/src/pal/pal_consts.h @@ -78,7 +78,7 @@ namespace snmalloc /** * List of callbacks to notify */ - std::atomic callbacks = nullptr; + std::atomic callbacks{nullptr}; public: /** diff --git a/3rdparty/snmalloc/src/pal/pal_freebsd_kernel.h b/3rdparty/snmalloc/src/pal/pal_freebsd_kernel.h index 914b736181..826c1dc2a7 100644 --- a/3rdparty/snmalloc/src/pal/pal_freebsd_kernel.h +++ b/3rdparty/snmalloc/src/pal/pal_freebsd_kernel.h @@ -1,7 +1,6 @@ #pragma once #include "../ds/bits.h" -#include "../mem/allocconfig.h" #if defined(FreeBSD_KERNEL) extern "C" @@ -29,7 +28,7 @@ namespace snmalloc * PAL supports. */ static constexpr uint64_t pal_features = AlignedAllocation; - void error(const char* const str) + [[noreturn]] void error(const char* const str) { panic("snmalloc error: %s", str); } @@ -61,8 +60,12 @@ namespace snmalloc } template - void* reserve(size_t size, size_t align) + void* reserve_aligned(size_t size) noexcept { + SNMALLOC_ASSERT(size == bits::next_pow2(size)); + SNMALLOC_ASSERT(size >= minimum_alloc_size); + size_t align = size; + vm_offset_t addr; if (vmem_xalloc( kernel_arena, diff --git a/3rdparty/snmalloc/src/pal/pal_haiku.h b/3rdparty/snmalloc/src/pal/pal_haiku.h new file mode 100644 index 0000000000..05ba71a06f --- /dev/null +++ b/3rdparty/snmalloc/src/pal/pal_haiku.h @@ -0,0 +1,41 @@ +#pragma once + +#if defined(__HAIKU__) +# include "pal_posix.h" + +# include + +namespace snmalloc +{ + /** + * Platform abstraction layer for Haiku. This provides features for this + * system. + */ + class PALHaiku : public PALPOSIX + { + public: + /** + * Bitmap of PalFeatures flags indicating the optional features that this + * PAL supports. + * + */ + static constexpr uint64_t pal_features = PALPOSIX::pal_features; + + /** + * Haiku requires an explicit no-reserve flag in `mmap` to guarantee lazy + * commit. + */ + static constexpr int default_mmap_flags = MAP_NORESERVE; + + /** + * Notify platform that we will not be needing these pages. + * Haiku does not provide madvise call per say only the posix equivalent. + */ + void notify_not_using(void* p, size_t size) noexcept + { + SNMALLOC_ASSERT(is_aligned_block(p, size)); + posix_madvise(p, size, POSIX_MADV_DONTNEED); + } + }; +} // namespace snmalloc +#endif diff --git a/3rdparty/snmalloc/src/pal/pal_linux.h b/3rdparty/snmalloc/src/pal/pal_linux.h index d4c20a7aa9..a645c163ca 100644 --- a/3rdparty/snmalloc/src/pal/pal_linux.h +++ b/3rdparty/snmalloc/src/pal/pal_linux.h @@ -2,7 +2,6 @@ #if defined(__linux__) # include "../ds/bits.h" -# include "../mem/allocconfig.h" # include "pal_posix.h" # include @@ -26,6 +25,9 @@ namespace snmalloc */ static constexpr uint64_t pal_features = PALPOSIX::pal_features; + static constexpr size_t page_size = + Aal::aal_name == PowerPC ? 0x10000 : 0x1000; + /** * OS specific function for zeroing memory. * @@ -41,12 +43,12 @@ namespace snmalloc // MADV_DONTNEED. switch back to memset only for QEMU. # ifndef SNMALLOC_QEMU_WORKAROUND if ( - (page_aligned || is_aligned_block(p, size)) && - (size > SLAB_SIZE)) + (page_aligned || is_aligned_block(p, size)) && + (size > 16 * page_size)) { // Only use this on large allocations as memset faster, and doesn't // introduce IPI so faster for small allocations. - SNMALLOC_ASSERT(is_aligned_block(p, size)); + SNMALLOC_ASSERT(is_aligned_block(p, size)); madvise(p, size, MADV_DONTNEED); } else diff --git a/3rdparty/snmalloc/src/pal/pal_open_enclave.h b/3rdparty/snmalloc/src/pal/pal_open_enclave.h index 58de146284..0183c9f9cd 100644 --- a/3rdparty/snmalloc/src/pal/pal_open_enclave.h +++ b/3rdparty/snmalloc/src/pal/pal_open_enclave.h @@ -1,55 +1,64 @@ #pragma once +#include "ds/address.h" +#include "ds/flaglock.h" #include "pal_plain.h" + +#include #ifdef OPEN_ENCLAVE -extern "C" const void* __oe_get_heap_base(); -extern "C" const void* __oe_get_heap_end(); extern "C" void* oe_memset_s(void* p, size_t p_size, int c, size_t size); -extern "C" void oe_abort(); +extern "C" [[noreturn]] void oe_abort(); namespace snmalloc { class PALOpenEnclave { - std::atomic oe_base = nullptr; + /// Base of OE heap + static inline void* heap_base = nullptr; + + /// Size of OE heap + static inline size_t heap_size; + + // This is infrequently used code, a spin lock simplifies the code + // considerably, and should never be on the fast path. + static inline std::atomic_flag spin_lock; public: + /** + * This will be called by oe_allocator_init to set up enclave heap bounds. + */ + static void setup_initial_range(void* base, void* end) + { + heap_size = pointer_diff(base, end); + heap_base = base; + } + /** * Bitmap of PalFeatures flags indicating the optional features that this * PAL supports. */ static constexpr uint64_t pal_features = 0; - static void error(const char* const str) + + static constexpr size_t page_size = 0x1000; + + [[noreturn]] static void error(const char* const str) { UNUSED(str); oe_abort(); } - template - void* reserve(size_t size) noexcept + static std::pair + reserve_at_least(size_t request_size) noexcept { - if (oe_base == 0) - { - void* dummy = NULL; - // If this CAS fails then another thread has initialised this. - oe_base.compare_exchange_strong( - dummy, const_cast(__oe_get_heap_base())); - } + // First call returns the entire address space + // subsequent calls return {nullptr, 0} + FlagLock lock(spin_lock); + if (request_size > heap_size) + return {nullptr, 0}; - void* old_base = oe_base; - void* next_base; - auto end = __oe_get_heap_end(); - do - { - auto new_base = old_base; - next_base = pointer_offset(new_base, size); - - if (next_base > end) - return nullptr; - - } while (!oe_base.compare_exchange_strong(old_base, next_base)); - - return old_base; + auto result = std::make_pair(heap_base, heap_size); + heap_size = 0; + return result; } template diff --git a/3rdparty/snmalloc/src/pal/pal_plain.h b/3rdparty/snmalloc/src/pal/pal_plain.h index b55edb4a58..a7cd199907 100644 --- a/3rdparty/snmalloc/src/pal/pal_plain.h +++ b/3rdparty/snmalloc/src/pal/pal_plain.h @@ -1,7 +1,6 @@ #pragma once #include "../ds/bits.h" -#include "../mem/allocconfig.h" namespace snmalloc { diff --git a/3rdparty/snmalloc/src/pal/pal_posix.h b/3rdparty/snmalloc/src/pal/pal_posix.h index 0289e001fd..03d39ccce3 100644 --- a/3rdparty/snmalloc/src/pal/pal_posix.h +++ b/3rdparty/snmalloc/src/pal/pal_posix.h @@ -1,14 +1,16 @@ #pragma once #include "../ds/address.h" -#include "../mem/allocconfig.h" - -#include +#if defined(BACKTRACE_HEADER) +# include BACKTRACE_HEADER +#endif #include #include #include +#include #include #include +#include extern "C" int puts(const char* str); @@ -29,6 +31,64 @@ namespace snmalloc template class PALPOSIX { + /** + * Helper class to access the `default_mmap_flags` field of `OS` if one + * exists or a default value if not. This provides the default version, + * which is used if `OS::default_mmap_flags` does not exist. + */ + template + struct DefaultMMAPFlags + { + /** + * If `OS::default_mmap_flags` does not exist, use 0. This value is + * or'd with the other mmap flags and so a value of 0 is a no-op. + */ + static const int flags = 0; + }; + + /** + * Helper class to access the `default_mmap_flags` field of `OS` if one + * exists or a default value if not. This provides the version that + * accesses the field, allowing other PALs to provide extra arguments to + * the `mmap` calls used here. + */ + template + struct DefaultMMAPFlags + { + static const int flags = T::default_mmap_flags; + }; + + /** + * Helper class to allow `OS` to provide the file descriptor used for + * anonymous memory. This is the default version, which provides the POSIX + * default of -1. + */ + template + struct AnonFD + { + /** + * If `OS::anonymous_memory_fd` does not exist, use -1. This value is + * defined by POSIX. + */ + static const int fd = -1; + }; + + /** + * Helper class to allow `OS` to provide the file descriptor used for + * anonymous memory. This exposes the `anonymous_memory_fd` field in `OS`. + */ + template + struct AnonFD + { + /** + * The PAL's provided file descriptor for anonymous memory. This is + * used, for example, on Apple platforms, which use the file descriptor + * in a `MAP_ANONYMOUS` mapping to encode metadata about the owner of the + * mapping. + */ + static const int fd = T::anonymous_memory_fd; + }; + public: /** * Bitmap of PalFeatures flags indicating the optional features that this @@ -38,8 +98,11 @@ namespace snmalloc */ static constexpr uint64_t pal_features = LazyCommit; + static constexpr size_t page_size = 0x1000; + static void print_stack_trace() { +#ifdef BACKTRACE_HEADER constexpr int SIZE = 1024; void* buffer[SIZE]; auto nptrs = backtrace(buffer, SIZE); @@ -47,12 +110,13 @@ namespace snmalloc backtrace_symbols_fd(buffer, nptrs, STDOUT_FILENO); puts(""); fflush(stdout); +#endif } /** * Report a fatal error an exit. */ - static void error(const char* const str) noexcept + [[noreturn]] static void error(const char* const str) noexcept { puts(str); print_stack_trace(); @@ -69,7 +133,7 @@ namespace snmalloc */ void notify_not_using(void* p, size_t size) noexcept { - SNMALLOC_ASSERT(is_aligned_block(p, size)); + SNMALLOC_ASSERT(is_aligned_block(p, size)); #ifdef USE_POSIX_COMMIT_CHECKS mprotect(p, size, PROT_NONE); #else @@ -89,7 +153,7 @@ namespace snmalloc void notify_using(void* p, size_t size) noexcept { SNMALLOC_ASSERT( - is_aligned_block(p, size) || (zero_mem == NoZero)); + is_aligned_block(p, size) || (zero_mem == NoZero)); #ifdef USE_POSIX_COMMIT_CHECKS mprotect(p, size, PROT_READ | PROT_WRITE); @@ -116,15 +180,15 @@ namespace snmalloc template void zero(void* p, size_t size) noexcept { - if (page_aligned || is_aligned_block(p, size)) + if (page_aligned || is_aligned_block(p, size)) { - SNMALLOC_ASSERT(is_aligned_block(p, size)); + SNMALLOC_ASSERT(is_aligned_block(p, size)); void* r = mmap( p, size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - -1, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | DefaultMMAPFlags::flags, + AnonFD::fd, 0); if (r != MAP_FAILED) @@ -143,21 +207,32 @@ namespace snmalloc * POSIX does not define a portable interface for specifying alignment * greater than a page. */ - template - void* reserve(size_t size) noexcept + std::pair reserve_at_least(size_t size) noexcept { - void* p = mmap( - nullptr, - size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - -1, - 0); + SNMALLOC_ASSERT(size == bits::next_pow2(size)); - if (p == MAP_FAILED) - OS::error("Out of memory"); + // Magic number for over-allocating chosen by the Pal + // These should be further refined based on experiments. + constexpr size_t min_size = + bits::is64() ? bits::one_at_bit(32) : bits::one_at_bit(28); - return p; + for (size_t size_request = bits::max(size, min_size); + size_request >= size; + size_request = size_request / 2) + { + void* p = mmap( + nullptr, + size_request, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | DefaultMMAPFlags::flags, + AnonFD::fd, + 0); + + if (p != MAP_FAILED) + return {p, size_request}; + } + + OS::error("Out of memory"); } }; } // namespace snmalloc diff --git a/3rdparty/snmalloc/src/pal/pal_solaris.h b/3rdparty/snmalloc/src/pal/pal_solaris.h new file mode 100644 index 0000000000..5965760394 --- /dev/null +++ b/3rdparty/snmalloc/src/pal/pal_solaris.h @@ -0,0 +1,29 @@ +#pragma once + +#if defined(__sun) +# include "pal_posix.h" + +namespace snmalloc +{ + /** + * Platform abstraction layer for Solaris. This provides features for this + * system. + */ + class PALSolaris : public PALPOSIX + { + public: + /** + * Bitmap of PalFeatures flags indicating the optional features that this + * PAL supports. + * + */ + static constexpr uint64_t pal_features = PALPOSIX::pal_features; + + /** + * Solaris requires an explicit no-reserve flag in `mmap` to guarantee lazy + * commit. + */ + static constexpr int default_mmap_flags = MAP_NORESERVE; + }; +} // namespace snmalloc +#endif diff --git a/3rdparty/snmalloc/src/pal/pal_windows.h b/3rdparty/snmalloc/src/pal/pal_windows.h index ba7fcea7e1..b4cec0989b 100644 --- a/3rdparty/snmalloc/src/pal/pal_windows.h +++ b/3rdparty/snmalloc/src/pal/pal_windows.h @@ -2,7 +2,6 @@ #include "../ds/address.h" #include "../ds/bits.h" -#include "../mem/allocconfig.h" #ifdef _WIN32 # ifndef _MSC_VER @@ -78,11 +77,15 @@ namespace snmalloc * PAL supports. This PAL supports low-memory notifications. */ static constexpr uint64_t pal_features = LowMemoryNotification -# if defined(PLATFORM_HAS_VIRTUALALLOC2) +# if defined(PLATFORM_HAS_VIRTUALALLOC2) && !defined(USE_SYSTEMATIC_TESTING) | AlignedAllocation # endif ; + static constexpr size_t minimum_alloc_size = 0x10000; + + static constexpr size_t page_size = 0x1000; + /** * Check whether the low memory state is still in effect. This is an * expensive operation and should not be on any fast paths. @@ -105,7 +108,7 @@ namespace snmalloc low_memory_callbacks.register_notification(callback); } - static void error(const char* const str) + [[noreturn]] static void error(const char* const str) { puts(str); fflush(stdout); @@ -115,7 +118,7 @@ namespace snmalloc /// Notify platform that we will not be using these pages void notify_not_using(void* p, size_t size) noexcept { - SNMALLOC_ASSERT(is_aligned_block(p, size)); + SNMALLOC_ASSERT(is_aligned_block(p, size)); BOOL ok = VirtualFree(p, size, MEM_DECOMMIT); @@ -128,7 +131,7 @@ namespace snmalloc void notify_using(void* p, size_t size) noexcept { SNMALLOC_ASSERT( - is_aligned_block(p, size) || (zero_mem == NoZero)); + is_aligned_block(p, size) || (zero_mem == NoZero)); void* r = VirtualAlloc(p, size, MEM_COMMIT, PAGE_READWRITE); @@ -140,9 +143,9 @@ namespace snmalloc template void zero(void* p, size_t size) noexcept { - if (page_aligned || is_aligned_block(p, size)) + if (page_aligned || is_aligned_block(p, size)) { - SNMALLOC_ASSERT(is_aligned_block(p, size)); + SNMALLOC_ASSERT(is_aligned_block(p, size)); notify_not_using(p, size); notify_using(p, size); } @@ -156,13 +159,16 @@ namespace snmalloc static size_t bump_ptr = (size_t)0x4000'0000'0000; return bump_ptr; } - template - void* reserve(size_t size) noexcept - { - DWORD flags = MEM_RESERVE; - if (committed) - flags |= MEM_COMMIT; + std::pair reserve_at_least(size_t size) noexcept + { + // Magic number for over-allocating chosen by the Pal + // These should be further refined based on experiments. + constexpr size_t min_size = + bits::is64() ? bits::one_at_bit(32) : bits::one_at_bit(28); + auto size_request = bits::max(size, min_size); + + DWORD flags = MEM_RESERVE; size_t retries = 1000; void* p; @@ -170,34 +176,30 @@ namespace snmalloc do { p = VirtualAlloc( - (void*)systematic_bump_ptr(), size, flags, PAGE_READWRITE); + (void*)systematic_bump_ptr(), size_request, flags, PAGE_READWRITE); - systematic_bump_ptr() += size; + systematic_bump_ptr() += size_request; retries--; } while (p == nullptr && retries > 0); - return p; + return {p, size_request}; } # elif defined(PLATFORM_HAS_VIRTUALALLOC2) template - void* reserve(size_t size, size_t align) noexcept + void* reserve_aligned(size_t size) noexcept { + SNMALLOC_ASSERT(size == bits::next_pow2(size)); + SNMALLOC_ASSERT(size >= minimum_alloc_size); + DWORD flags = MEM_RESERVE; if (committed) flags |= MEM_COMMIT; - // Windows doesn't let you request memory less than 64KB aligned. Most - // operating systems will simply give you something more aligned than you - // ask for, but Windows complains about invalid parameters. - const size_t min_align = 64 * 1024; - if (align < min_align) - align = min_align; - // If we're on Windows 10 or newer, we can use the VirtualAlloc2 // function. The FromApp variant is useable by UWP applications and // cannot allocate executable memory. - MEM_ADDRESS_REQUIREMENTS addressReqs = {NULL, NULL, align}; + MEM_ADDRESS_REQUIREMENTS addressReqs = {NULL, NULL, size}; MEM_EXTENDED_PARAMETER param = { {MemExtendedParameterAddressRequirements, 0}, {0}}; @@ -214,20 +216,26 @@ namespace snmalloc return ret; } # else - template - void* reserve(size_t size) noexcept + std::pair reserve_at_least(size_t size) noexcept { - DWORD flags = MEM_RESERVE; + SNMALLOC_ASSERT(size == bits::next_pow2(size)); - if (committed) - flags |= MEM_COMMIT; - - void* ret = VirtualAlloc(nullptr, size, flags, PAGE_READWRITE); - if (ret == nullptr) + // Magic number for over-allocating chosen by the Pal + // These should be further refined based on experiments. + constexpr size_t min_size = + bits::is64() ? bits::one_at_bit(32) : bits::one_at_bit(28); + for (size_t size_request = bits::max(size, min_size); + size_request >= size; + size_request = size_request / 2) { - error("Failed to allocate memory\n"); + void* ret = + VirtualAlloc(nullptr, size_request, MEM_RESERVE, PAGE_READWRITE); + if (ret != nullptr) + { + return std::pair(ret, size_request); + } } - return ret; + error("Failed to allocate memory\n"); } # endif }; diff --git a/3rdparty/snmalloc/src/test/func/fixed_region/fixed_region.cc b/3rdparty/snmalloc/src/test/func/fixed_region/fixed_region.cc index 6c96797807..8f25c57560 100644 --- a/3rdparty/snmalloc/src/test/func/fixed_region/fixed_region.cc +++ b/3rdparty/snmalloc/src/test/func/fixed_region/fixed_region.cc @@ -1,7 +1,6 @@ #define SNMALLOC_SGX #define OPEN_ENCLAVE #define OPEN_ENCLAVE_SIMULATION -#define USE_RESERVE_MULTIPLE 1 #include #include @@ -10,18 +9,6 @@ #endif #define assert please_use_SNMALLOC_ASSERT -void* oe_base; -void* oe_end; -extern "C" const void* __oe_get_heap_base() -{ - return oe_base; -} - -extern "C" const void* __oe_get_heap_end() -{ - return oe_end; -} - extern "C" void* oe_memset_s(void* p, size_t p_size, int c, size_t size) { UNUSED(p_size); @@ -43,8 +30,9 @@ int main() // For 1MiB superslabs, SUPERSLAB_BITS + 4 is not big enough for the example. size_t large_class = 28 - SUPERSLAB_BITS; size_t size = 1ULL << (SUPERSLAB_BITS + large_class); - oe_base = mp.reserve(large_class); - oe_end = (uint8_t*)oe_base + size; + void* oe_base = mp.reserve(large_class); + void* oe_end = (uint8_t*)oe_base + size; + PALOpenEnclave::setup_initial_range(oe_base, oe_end); std::cout << "Allocated region " << oe_base << " - " << oe_end << std::endl; auto a = ThreadAlloc::get(); diff --git a/3rdparty/snmalloc/src/test/func/malloc/malloc.cc b/3rdparty/snmalloc/src/test/func/malloc/malloc.cc index 3ed01ed371..029428d74a 100644 --- a/3rdparty/snmalloc/src/test/func/malloc/malloc.cc +++ b/3rdparty/snmalloc/src/test/func/malloc/malloc.cc @@ -15,22 +15,44 @@ void check_result(size_t size, size_t align, void* p, int err, bool null) { if (p != nullptr) abort(); - } - else - { - if (our_malloc_usable_size(p) < size) - abort(); - - if (static_cast(reinterpret_cast(p) % align) != 0) - abort(); our_free(p); + return; } + + const auto alloc_size = our_malloc_usable_size(p); + const auto expected_size = round_size(size); + if ((align == 1) && (alloc_size != expected_size)) + { + printf( + "Usable size is %zu, but required to be %zu.\n", + alloc_size, + expected_size); + abort(); + } + if ((align != 1) && (alloc_size < expected_size)) + { + printf( + "Usable size is %zu, but required to be at least %zu.\n", + alloc_size, + expected_size); + abort(); + } + if (static_cast(reinterpret_cast(p) % align) != 0) + { + printf( + "Address is 0x%zx, but required to be aligned to 0x%zx.\n", + reinterpret_cast(p), + align); + abort(); + } + + our_free(p); } void test_calloc(size_t nmemb, size_t size, int err, bool null) { - fprintf(stderr, "calloc(%d, %d)\n", (int)nmemb, (int)size); + fprintf(stderr, "calloc(%zu, %zu)\n", nmemb, size); errno = 0; void* p = our_calloc(nmemb, size); @@ -47,7 +69,11 @@ void test_calloc(size_t nmemb, size_t size, int err, bool null) void test_realloc(void* p, size_t size, int err, bool null) { - fprintf(stderr, "realloc(%p(%d), %d)\n", p, int(size), (int)size); + size_t old_size = 0; + if (p != nullptr) + old_size = our_malloc_usable_size(p); + + fprintf(stderr, "realloc(%p(%zu), %zu)\n", p, old_size, size); errno = 0; auto new_p = our_realloc(p, size); // Realloc failure case, deallocate original block @@ -58,7 +84,7 @@ void test_realloc(void* p, size_t size, int err, bool null) void test_posix_memalign(size_t size, size_t align, int err, bool null) { - fprintf(stderr, "posix_memalign(&p, %d, %d)\n", (int)align, (int)size); + fprintf(stderr, "posix_memalign(&p, %zu, %zu)\n", align, size); void* p = nullptr; errno = our_posix_memalign(&p, align, size); check_result(size, align, p, err, null); @@ -66,7 +92,7 @@ void test_posix_memalign(size_t size, size_t align, int err, bool null) void test_memalign(size_t size, size_t align, int err, bool null) { - fprintf(stderr, "memalign(%d, %d)\n", (int)align, (int)size); + fprintf(stderr, "memalign(%zu, %zu)\n", align, size); errno = 0; void* p = our_memalign(align, size); check_result(size, align, p, err, null); @@ -81,9 +107,19 @@ int main(int argc, char** argv) constexpr int SUCCESS = 0; + test_realloc(our_malloc(64), 4194304, SUCCESS, false); + + for (sizeclass_t sc = 0; sc < (SUPERSLAB_BITS + 4); sc++) + { + const size_t size = 1ULL << sc; + printf("malloc: %zu\n", size); + check_result(size, 1, our_malloc(size), SUCCESS, false); + check_result(size + 1, 1, our_malloc(size + 1), SUCCESS, false); + } + test_calloc(0, 0, SUCCESS, false); - for (snmalloc::sizeclass_t sc = 0; sc < NUM_SIZECLASSES; sc++) + for (sizeclass_t sc = 0; sc < NUM_SIZECLASSES; sc++) { const size_t size = sizeclass_to_size(sc); @@ -97,11 +133,37 @@ int main(int argc, char** argv) test_calloc(n, 0, SUCCESS, false); } test_calloc(0, size, SUCCESS, false); + } + for (sizeclass_t sc = 0; sc < NUM_SIZECLASSES; sc++) + { + const size_t size = sizeclass_to_size(sc); test_realloc(our_malloc(size), size, SUCCESS, false); test_realloc(our_malloc(size), 0, SUCCESS, true); test_realloc(nullptr, size, SUCCESS, false); test_realloc(our_malloc(size), (size_t)-1, ENOMEM, true); + for (sizeclass_t sc2 = 0; sc2 < NUM_SIZECLASSES; sc2++) + { + const size_t size2 = sizeclass_to_size(sc2); + test_realloc(our_malloc(size), size2, SUCCESS, false); + test_realloc(our_malloc(size + 1), size2, SUCCESS, false); + } + } + + for (sizeclass_t sc = 0; sc < (SUPERSLAB_BITS + 4); sc++) + { + const size_t size = 1ULL << sc; + test_realloc(our_malloc(size), size, SUCCESS, false); + test_realloc(our_malloc(size), 0, SUCCESS, true); + test_realloc(nullptr, size, SUCCESS, false); + test_realloc(our_malloc(size), (size_t)-1, ENOMEM, true); + for (sizeclass_t sc2 = 0; sc2 < (SUPERSLAB_BITS + 4); sc2++) + { + const size_t size2 = 1ULL << sc2; + printf("size1: %zu, size2:%zu\n", size, size2); + test_realloc(our_malloc(size), size2, SUCCESS, false); + test_realloc(our_malloc(size + 1), size2, SUCCESS, false); + } } test_posix_memalign(0, 0, EINVAL, true); @@ -111,7 +173,7 @@ int main(int argc, char** argv) for (size_t align = sizeof(uintptr_t); align <= SUPERSLAB_SIZE * 8; align <<= 1) { - for (snmalloc::sizeclass_t sc = 0; sc < NUM_SIZECLASSES; sc++) + for (sizeclass_t sc = 0; sc < NUM_SIZECLASSES; sc++) { const size_t size = sizeclass_to_size(sc); test_posix_memalign(size, align, SUCCESS, false); diff --git a/3rdparty/snmalloc/src/test/func/memory/memory.cc b/3rdparty/snmalloc/src/test/func/memory/memory.cc index 8a0d122a19..64cdf125b2 100644 --- a/3rdparty/snmalloc/src/test/func/memory/memory.cc +++ b/3rdparty/snmalloc/src/test/func/memory/memory.cc @@ -4,9 +4,76 @@ #include #include #include +#if defined(__linux__) && !defined(SNMALLOC_QEMU_WORKAROUND) +/* + * We only test allocations with limited AS on linux for now. + * It should be a good representative for POSIX systems. + * QEMU `setrlimit64` does not behave as the same as native linux, + * so we need to exclude it from such tests. + */ +# include +# include +# include +# include +# define TEST_LIMITED +# define KiB (1024ull) +# define MiB (KiB * KiB) +# define GiB (KiB * MiB) +#endif using namespace snmalloc; +#ifdef TEST_LIMITED +void test_limited(rlim64_t as_limit, size_t& count) +{ + auto pid = fork(); + if (!pid) + { + auto limit = rlimit64{.rlim_cur = as_limit, .rlim_max = RLIM64_INFINITY}; + if (setrlimit64(RLIMIT_AS, &limit)) + { + std::abort(); + } + if (getrlimit64(RLIMIT_AS, &limit)) + { + std::abort(); + } + std::cout << "limiting memory to " << limit.rlim_cur / KiB << " KiB" + << std::endl; + struct sysinfo info + {}; + if (sysinfo(&info)) + { + std::abort(); + } + std::cout << "host freeram: " << info.freeram / KiB << " KiB" << std::endl; + // set the allocation size to the minimum value among: + // 2GiB, 1/8 of the AS limit, 1/8 of the Free RAM + auto upper_bound = + std::min(static_cast(limit.rlim_cur >> 3u), 2 * GiB); + upper_bound = std::min( + upper_bound, static_cast(info.freeram >> 3u)); + std::cout << "trying to alloc " << upper_bound / KiB << " KiB" << std::endl; + auto alloc = ThreadAlloc::get(); + std::cout << "allocator initialised" << std::endl; + auto chunk = alloc->alloc(upper_bound); + alloc->dealloc(chunk); + std::cout << "success" << std::endl; + std::exit(0); + } + else + { + int status; + waitpid(pid, &status, 0); + if (status) + { + std::cout << "failed" << std::endl; + count++; + } + } +} +#endif + void test_alloc_dealloc_64k() { auto alloc = ThreadAlloc::get(); @@ -221,7 +288,7 @@ void test_external_pointer_large() for (size_t i = 0; i < count; i++) { - size_t b = snmalloc::bits::is64() ? 28 : 26; + size_t b = SUPERSLAB_BITS + 3; size_t rand = r.next() & ((1 << b) - 1); size_t size = (1 << 24) + rand; total_size += size; @@ -313,7 +380,20 @@ void test_calloc_large_bug() int main(int argc, char** argv) { setup(); - +#ifdef TEST_LIMITED + size_t count = 0; + test_limited(512 * MiB, count); + test_limited(2 * GiB, count); + test_limited( + 8 * + GiB, // 8 * GiB is large enough for a loose upper-bound of our allocations + count); + if (count) + { + std::cout << count << " attempts failed out of 3" << std::endl; + std::abort(); + } +#endif #ifdef USE_SYSTEMATIC_TESTING opt::Opt opt(argc, argv); size_t seed = opt.is("--seed", 0); @@ -333,6 +413,5 @@ int main(int argc, char** argv) test_external_pointer(); test_alloc_16M(); test_calloc_16M(); - return 0; } diff --git a/3rdparty/snmalloc/src/test/func/sizeclass/sizeclass.cc b/3rdparty/snmalloc/src/test/func/sizeclass/sizeclass.cc index 757d3a6e4e..7d6eaf817a 100644 --- a/3rdparty/snmalloc/src/test/func/sizeclass/sizeclass.cc +++ b/3rdparty/snmalloc/src/test/func/sizeclass/sizeclass.cc @@ -18,7 +18,7 @@ void test_align_size() size < snmalloc::sizeclass_to_size(snmalloc::NUM_SIZECLASSES - 1); size++) { - size_t rsize = snmalloc::sizeclass_to_size(size_to_sizeclass(size)); + size_t rsize = snmalloc::round_size(size); if (rsize < size) { diff --git a/3rdparty/snmalloc/src/test/func/two_alloc_types/alloc1.cc b/3rdparty/snmalloc/src/test/func/two_alloc_types/alloc1.cc index c86dcde02e..0f3a9cb717 100644 --- a/3rdparty/snmalloc/src/test/func/two_alloc_types/alloc1.cc +++ b/3rdparty/snmalloc/src/test/func/two_alloc_types/alloc1.cc @@ -1,11 +1,14 @@ -#undef IS_ADDRESS_SPACE_CONSTRAINED +#undef SNMALLOC_USE_LARGE_CHUNKS #define OPEN_ENCLAVE #define OPEN_ENCLAVE_SIMULATION -#define USE_RESERVE_MULTIPLE 1 #define NO_BOOTSTRAP_ALLOCATOR -#define IS_ADDRESS_SPACE_CONSTRAINED #define SNMALLOC_EXPOSE_PAGEMAP #define SNMALLOC_NAME_MANGLE(a) enclave_##a // Redefine the namespace, so we can have two versions. #define snmalloc snmalloc_enclave #include "../../../override/malloc.cc" + +extern "C" void oe_allocator_init(void* base, void* end) +{ + snmalloc_enclave::PALOpenEnclave::setup_initial_range(base, end); +} diff --git a/3rdparty/snmalloc/src/test/func/two_alloc_types/alloc2.cc b/3rdparty/snmalloc/src/test/func/two_alloc_types/alloc2.cc index 5338054beb..21316ebd94 100644 --- a/3rdparty/snmalloc/src/test/func/two_alloc_types/alloc2.cc +++ b/3rdparty/snmalloc/src/test/func/two_alloc_types/alloc2.cc @@ -1,4 +1,7 @@ -#undef IS_ADDRESS_SPACE_CONSTRAINED +// Remove parameters feed from test harness +#undef SNMALLOC_USE_LARGE_CHUNKS +#undef SNMALLOC_USE_SMALL_CHUNKS + #define SNMALLOC_NAME_MANGLE(a) host_##a #define NO_BOOTSTRAP_ALLOCATOR #define SNMALLOC_EXPOSE_PAGEMAP diff --git a/3rdparty/snmalloc/src/test/func/two_alloc_types/main.cc b/3rdparty/snmalloc/src/test/func/two_alloc_types/main.cc index 341f32dc7c..1ef9d1cd11 100644 --- a/3rdparty/snmalloc/src/test/func/two_alloc_types/main.cc +++ b/3rdparty/snmalloc/src/test/func/two_alloc_types/main.cc @@ -5,18 +5,6 @@ #include #include -void* oe_base; -void* oe_end; -extern "C" const void* __oe_get_heap_base() -{ - return oe_base; -} - -extern "C" const void* __oe_get_heap_end() -{ - return oe_end; -} - extern "C" void* oe_memset_s(void* p, size_t p_size, int c, size_t size) { UNUSED(p_size); @@ -28,6 +16,7 @@ extern "C" void oe_abort() abort(); } +extern "C" void oe_allocator_init(void* base, void* end); extern "C" void* host_malloc(size_t); extern "C" void host_free(void*); @@ -51,8 +40,9 @@ int main() // For 1MiB superslabs, SUPERSLAB_BITS + 2 is not big enough for the example. size_t large_class = 26 - SUPERSLAB_BITS; size_t size = 1ULL << (SUPERSLAB_BITS + large_class); - oe_base = mp.reserve(large_class); - oe_end = (uint8_t*)oe_base + size; + void* oe_base = mp.reserve(large_class); + void* oe_end = (uint8_t*)oe_base + size; + oe_allocator_init(oe_base, oe_end); std::cout << "Allocated region " << oe_base << " - " << oe_end << std::endl; // Call these functions to trigger asserts if the cast-to-self doesn't work. diff --git a/3rdparty/snmalloc/src/test/perf/contention/contention.cc b/3rdparty/snmalloc/src/test/perf/contention/contention.cc index 845c4d2abb..7506052fed 100644 --- a/3rdparty/snmalloc/src/test/perf/contention/contention.cc +++ b/3rdparty/snmalloc/src/test/perf/contention/contention.cc @@ -86,7 +86,7 @@ void test_tasks_f(size_t id) *res = size; size_t* out = - contention[n % swapsize].exchange(res, std::memory_order_relaxed); + contention[n % swapsize].exchange(res, std::memory_order_acq_rel); if (out != nullptr) { diff --git a/cgmanifest.json b/cgmanifest.json index 73e993f357..2c51da102c 100644 --- a/cgmanifest.json +++ b/cgmanifest.json @@ -133,7 +133,7 @@ "type": "git", "git": { "repositoryUrl": "https://github.com/microsoft/snmalloc", - "commitHash": "2b92574123db1d1674f3df9c6dfb3ec10f05ebb3" + "commitHash": "4e1f5829a754ab9c170ec090979d3a670d2d5d1a" } } },