Add AddressSpaceManager (#214)

This change brings in a new approach to managing address space. It wraps the Pal with a power of two reservation system, that guarantees all returned blocks are naturally aligned to their size. It either lets the Pal perform aligned requests, or over allocates and splits into power of two blocks.
2020-06-22 12:36:40 +01:00 · 2020-06-22 12:36:40 +01:00 · e16f2aff6f
--- a/README.md
+++ b/README.md
@ -194,16 +194,24 @@ pages, rather than zeroing them synchronously in this call
 ```c++
 template<bool committed>
-void* reserve(size_t size, size_t align);
+void* reserve_aligned(size_t size) noexcept;
-template<bool committed>
+std::pair<void*, size_t> reserve_at_least(size_t size) noexcept;
 void* reserve(size_t size) noexcept;
 ```
 Only one of these needs to be implemented, depending on whether the underlying
 system can provide strongly aligned memory regions.
-If the system guarantees only page alignment, implement the second and snmalloc
+If the system guarantees only page alignment, implement the second. The Pal is 
-will over-allocate and then trim the requested region.
+free to overallocate based on the platforms desire and snmalloc
 will find suitably aligned blocks inside the region.  `reserve_at_least` should 
 not commit memory as snmalloc will commit the range of memory it requires of what 
 is returned.
 If the system provides strong alignment, implement the first to return memory
-at the desired alignment.
+at the desired alignment. If providing the first, then the `Pal` should also 
 specify the minimum size block it can provide: 
 ```
 static constexpr size_t minimum_alloc_size = ...;
 ```
 Finally, you need to define a field to indicate the features that your PAL supports:
 ```c++
--- a/src/mem/address_space.h
+++ b/src/mem/address_space.h
@ -0,0 +1,243 @@
 #include "../ds/address.h"
 #include "../ds/flaglock.h"
 #include "../pal/pal.h"
 #include <array>
 namespace snmalloc
 {
  /**
   * Implements a power of two allocator, where all blocks are aligned to the
   * same power of two as their size. This is what snmalloc uses to get
   * alignment of very large sizeclasses.
   *
   * It cannot unreserve memory, so this does not require the
   * usual complexity of a buddy allocator.
   */
  template<typename Pal>
  class AddressSpaceManager : public Pal
  {
    /**
     * Stores the blocks of address space
     *
     * The first level of array indexes based on power of two size.
     *
     * The first entry ranges[n][0] is just a pointer to an address range
     * of size 2^n.
     *
     * The second entry ranges[n][1] is a pointer to a linked list of blocks
     * of this size. The final block in the list is not committed, so we commit
     * on pop for this corner case.
     *
     * Invariants
     *  ranges[n][1] != nullptr => ranges[n][0] != nullptr
     *
     * bits::BITS is used for simplicity, we do not use below the pointer size,
     * and large entries will be unlikely to be supported by the platform.
     */
    std::array<std::array<void*, 2>, bits::BITS> ranges = {};
    /**
     * This is infrequently used code, a spin lock simplifies the code
     * considerably, and should never be on the fast path.
     */
    std::atomic_flag spin_lock = ATOMIC_FLAG_INIT;
    /**
     * Checks a block satisfies its invariant.
     */
    inline void check_block(void* base, size_t align_bits)
    {
      SNMALLOC_ASSERT(
        base == pointer_align_up(base, bits::one_at_bit(align_bits)));
      // All blocks need to be bigger than a pointer.
      SNMALLOC_ASSERT(bits::one_at_bit(align_bits) >= sizeof(void*));
      UNUSED(base);
      UNUSED(align_bits);
    }
    /**
     * Adds a block to `ranges`.
     */
    void add_block(size_t align_bits, void* base)
    {
      check_block(base, align_bits);
      SNMALLOC_ASSERT(align_bits < 64);
      if (ranges[align_bits][0] == nullptr)
      {
        // Prefer first slot if available.
        ranges[align_bits][0] = base;
        return;
      }
      if (ranges[align_bits][1] != nullptr)
      {
        // Add to linked list.
        commit_block(base, sizeof(void*));
        *reinterpret_cast<void**>(base) = ranges[align_bits][1];
        check_block(ranges[align_bits][1], align_bits);
      }
      // Update head of list
      ranges[align_bits][1] = base;
      check_block(ranges[align_bits][1], align_bits);
    }
    /**
     * Find a block of the correct size. May split larger blocks
     * to satisfy this request.
     */
    void* remove_block(size_t align_bits)
    {
      auto first = ranges[align_bits][0];
      if (first == nullptr)
      {
        if (align_bits == (bits::BITS - 1))
        {
          // Out of memory
          return nullptr;
        }
        // Look for larger block and split up recursively
        void* bigger = remove_block(align_bits + 1);
        if (bigger != nullptr)
        {
          void* left_over =
            pointer_offset(bigger, bits::one_at_bit(align_bits));
          ranges[align_bits][0] = left_over;
          check_block(left_over, align_bits);
        }
        check_block(bigger, align_bits + 1);
        return bigger;
      }
      auto second = ranges[align_bits][1];
      if (second != nullptr)
      {
        commit_block(second, sizeof(void*));
        auto next = *reinterpret_cast<void**>(second);
        ranges[align_bits][1] = next;
        // Zero memory. Client assumes memory contains only zeros.
        *reinterpret_cast<void**>(second) = nullptr;
        check_block(second, align_bits);
        check_block(next, align_bits);
        return second;
      }
      check_block(first, align_bits);
      ranges[align_bits][0] = nullptr;
      return first;
    }
    /**
     * Add a range of memory to the address space.
     * Divides blocks into power of two sizes with natural alignment
     */
    void add_range(void* base, size_t length)
    {
      // Find the minimum set of maximally aligned blocks in this range.
      // Each block's alignment and size are equal.
      while (length >= sizeof(void*))
      {
        size_t base_align_bits = bits::ctz(address_cast(base));
        size_t length_align_bits = (bits::BITS - 1) - bits::clz(length);
        size_t align_bits = bits::min(base_align_bits, length_align_bits);
        size_t align = bits::one_at_bit(align_bits);
        check_block(base, align_bits);
        add_block(align_bits, base);
        base = pointer_offset(base, align);
        length -= align;
      }
    }
    /**
     * Commit a block of memory
     */
    void commit_block(void* base, size_t size)
    {
      // Rounding required for sub-page allocations.
      auto page_start = pointer_align_down<OS_PAGE_SIZE, char>(base);
      auto page_end =
        pointer_align_up<OS_PAGE_SIZE, char>(pointer_offset(base, size));
      Pal::template notify_using<NoZero>(
        page_start, static_cast<size_t>(page_end - page_start));
    }
  public:
    /**
     * Returns a pointer to a block of memory of the supplied size.
     * The block will be committed, if specified by the template parameter.
     * The returned block is guaranteed to be aligened to the size.
     *
     * Only request 2^n sizes, and not less than a pointer.
     */
    template<bool committed>
    void* reserve(size_t size)
    {
      SNMALLOC_ASSERT(bits::next_pow2(size) == size);
      SNMALLOC_ASSERT(size >= sizeof(void*));
      if constexpr (pal_supports<AlignedAllocation, Pal>)
      {
        if (size >= Pal::minimum_alloc_size)
          return static_cast<Pal*>(this)->template reserve_aligned<committed>(
            size);
      }
      void* res;
      {
        FlagLock lock(spin_lock);
        res = remove_block(bits::next_pow2_bits(size));
        if (res == nullptr)
        {
          // Allocation failed ask OS for more memory
          void* block;
          size_t block_size;
          if constexpr (pal_supports<AlignedAllocation, Pal>)
          {
            block_size = Pal::minimum_alloc_size;
            block = static_cast<Pal*>(this)->template reserve_aligned<false>(
              block_size);
          }
          else
          {
            // Need at least 2 times the space to guarantee alignment.
            // Hold lock here as a race could cause additional requests to
            // the Pal, and this could lead to suprious OOM.  This is
            // particularly bad if the Pal gives all the memory on first call.
            auto block_and_size =
              static_cast<Pal*>(this)->reserve_at_least(size * 2);
            block = block_and_size.first;
            block_size = block_and_size.second;
            // Ensure block is pointer aligned.
            if (
              pointer_align_up(block, sizeof(void*)) != block ||
              bits::align_up(block_size, sizeof(void*)) > block_size)
            {
              auto diff =
                pointer_diff(block, pointer_align_up(block, sizeof(void*)));
              block_size = block_size - diff;
              block_size = bits::align_down(block_size, sizeof(void*));
            }
          }
          if (block == nullptr)
          {
            return nullptr;
          }
          add_range(block, block_size);
          // still holding lock so guaranteed to succeed.
          res = remove_block(bits::next_pow2_bits(size));
        }
      }
      // Don't need lock while committing pages.
      if constexpr (committed)
        commit_block(res, size);
      return res;
    }
  };
 } // namespace snmalloc
--- a/src/mem/largealloc.h
+++ b/src/mem/largealloc.h
@ -4,6 +4,7 @@
 #include "../ds/helpers.h"
 #include "../ds/mpmcstack.h"
 #include "../pal/pal.h"
 #include "address_space.h"
 #include "allocstats.h"
 #include "baseslab.h"
 #include "sizeclass.h"
@ -58,27 +59,17 @@ namespace snmalloc
  template<class PAL>
  class MemoryProviderStateMixin : public PalNotificationObject, public PAL
  {
    /**
     * Flag to protect the bump allocator
     */
    std::atomic_flag lock = ATOMIC_FLAG_INIT;
    /**
     * Pointer to block being bump allocated
     */
    void* bump = nullptr;
    /**
     * Space remaining in this block being bump allocated
     */
    size_t remaining = 0;
    /**
     * Simple flag for checking if another instance of lazy-decommit is
     * running
     */
    std::atomic_flag lazy_decommit_guard = {};
    /**
     * Manages address space for this memory provider.
     */
    AddressSpaceManager<PAL> address_space = {};
  public:
    /**
     * Stack of large allocations that have been returned for reuse.
@ -91,12 +82,15 @@ namespace snmalloc
    static MemoryProviderStateMixin<PAL>* make() noexcept
    {
      // Temporary stack-based storage to start the allocator in.
-      MemoryProviderStateMixin<PAL> local;
+      MemoryProviderStateMixin<PAL> local{};
      // Allocate permanent storage for the allocator usung temporary allocator
      MemoryProviderStateMixin<PAL>* allocated =
        local.alloc_chunk<MemoryProviderStateMixin<PAL>, 1>();
      if (allocated == nullptr)
        error("Failed to initialise system!");
 #ifdef GCC_VERSION_EIGHT_PLUS
 #  pragma GCC diagnostic push
 #  pragma GCC diagnostic ignored "-Wclass-memaccess"
@ -105,7 +99,10 @@ namespace snmalloc
      // memcpy is safe as this is entirely single threaded: the move
      // constructors were removed as unsafe to move std::atomic in a
      // concurrent setting.
-      memcpy(allocated, &local, sizeof(MemoryProviderStateMixin<PAL>));
+      ::memcpy(
        &(allocated->address_space),
        &(local.address_space),
        sizeof(AddressSpaceManager<PAL>));
 #ifdef GCC_VERSION_EIGHT_PLUS
 #  pragma GCC diagnostic pop
 #endif
@ -121,22 +118,6 @@ namespace snmalloc
    }
  private:
    void new_block()
    {
      // Reserve the smallest large_class which is SUPERSLAB_SIZE
      void* r = reserve<false>(0);
      if (r == nullptr)
        Pal::error(
          "Unrecoverable internal error: \
          failed to allocator internal data structure.");
      PAL::template notify_using<NoZero>(r, OS_PAGE_SIZE);
      bump = r;
      remaining = SUPERSLAB_SIZE;
    }
    SNMALLOC_SLOW_PATH void lazy_decommit()
    {
      // If another thread is try to do lazy decommit, let it continue.  If
@ -183,24 +164,6 @@ namespace snmalloc
      lazy_decommit_guard.clear();
    }
    void push_space(void* p, size_t large_class)
    {
      // All fresh pages so can use "NoZero"
      if (large_class > 0)
        PAL::template notify_using<NoZero>(p, OS_PAGE_SIZE);
      else
      {
        if (decommit_strategy == DecommitSuperLazy)
        {
          PAL::template notify_using<NoZero>(p, OS_PAGE_SIZE);
          p = new (p) Decommittedslab();
        }
        else
          PAL::template notify_using<NoZero>(p, SUPERSLAB_SIZE);
      }
      large_stack[large_class].push(reinterpret_cast<Largeslab*>(p));
    }
    /***
     * Method for callback object to perform lazy decommit.
     */
@ -221,45 +184,10 @@ namespace snmalloc
    {
      // Cache line align
      size_t size = bits::align_up(sizeof(T), 64);
-
+      size = bits::max(size, alignment);
-      void* p;
+      void* p = address_space.template reserve<true>(bits::next_pow2(size));
-      {
+      if (p == nullptr)
-        FlagLock f(lock);
+        return nullptr;
        if constexpr (alignment != 0)
        {
          char* aligned_bump = pointer_align_up<alignment, char>(bump);
          size_t bump_delta = pointer_diff(bump, aligned_bump);
          if (bump_delta > remaining)
          {
            new_block();
          }
          else
          {
            remaining -= bump_delta;
            bump = aligned_bump;
          }
        }
        if (remaining < size)
        {
          new_block();
        }
        p = bump;
        bump = pointer_offset(bump, size);
        remaining -= size;
      }
      auto page_start = pointer_align_down<OS_PAGE_SIZE, char>(p);
      auto page_end =
        pointer_align_up<OS_PAGE_SIZE, char>(pointer_offset(p, size));
      PAL::template notify_using<NoZero>(
        page_start, static_cast<size_t>(page_end - page_start));
      return new (p) T(std::forward<Args...>(args)...);
    }
@ -267,66 +195,8 @@ namespace snmalloc
    void* reserve(size_t large_class) noexcept
    {
      size_t size = bits::one_at_bit(SUPERSLAB_BITS) << large_class;
      size_t align = size;
-      if constexpr (pal_supports<AlignedAllocation, PAL>)
+      return address_space.template reserve<committed>(size);
      {
        return PAL::template reserve<committed>(size, align);
      }
      else
      {
        // Reserve 4 times the amount, and put aligned leftovers into the
        // large_stack
        size_t request = bits::max(size * 4, SUPERSLAB_SIZE * 8);
        void* p = PAL::template reserve<false>(request);
        if (p == nullptr)
          return nullptr;
        void* start = pointer_align_up(p, align);
        void* p1 = pointer_offset(p, request);
        void* end = pointer_offset(start, size);
        for (; end < pointer_align_down(p1, align);
             end = pointer_offset(end, size))
        {
          push_space(end, large_class);
        }
        // Put offcuts before alignment into the large stack
        void* offcut_end = start;
        void* offcut_start;
        for (size_t i = large_class; i > 0;)
        {
          i--;
          size_t offcut_align = bits::one_at_bit(SUPERSLAB_BITS) << i;
          offcut_start = pointer_align_up(p, offcut_align);
          if (offcut_start != offcut_end)
          {
            push_space(offcut_start, i);
            offcut_end = offcut_start;
          }
        }
        // Put offcuts after returned block into the large stack
        offcut_start = end;
        for (size_t i = large_class; i > 0;)
        {
          i--;
          auto offcut_align = bits::one_at_bit(SUPERSLAB_BITS) << i;
          offcut_end = pointer_align_down(p1, offcut_align);
          if (offcut_start != offcut_end)
          {
            push_space(offcut_start, i);
            offcut_start = offcut_end;
          }
        }
        if (committed)
          PAL::template notify_using<NoZero>(start, size);
        return start;
      }
    }
  };
--- a/src/pal/pal_apple.h
+++ b/src/pal/pal_apple.h
@ -4,6 +4,7 @@
 #  include "pal_bsd.h"
 #  include <mach/vm_statistics.h>
 #  include <utility>
 namespace snmalloc
 {
@ -55,12 +56,17 @@ namespace snmalloc
     *
     * See comment below.
     */
-    template<bool committed>
+    std::pair<void*, size_t> reserve_at_least(size_t size)
    void* reserve(size_t size)
    {
      // Magic number for over-allocating chosen by the Pal
      // These should be further refined based on experiments.
      constexpr size_t min_size =
        bits::is64() ? bits::one_at_bit(32) : bits::one_at_bit(28);
      auto size_request = bits::max(size, min_size);
      void* p = mmap(
        nullptr,
-        size,
+        size_request,
        PROT_READ | PROT_WRITE,
        MAP_PRIVATE | MAP_ANONYMOUS,
        pal_anon_id,
@ -69,7 +75,7 @@ namespace snmalloc
      if (p == MAP_FAILED)
        error("Out of memory");
-      return p;
+      return {p, size_request};
    }
  private:
--- a/src/pal/pal_bsd_aligned.h
+++ b/src/pal/pal_bsd_aligned.h
@ -23,18 +23,19 @@ namespace snmalloc
    static constexpr uint64_t pal_features =
      AlignedAllocation | PALBSD<OS>::pal_features;
    static constexpr size_t minimum_alloc_size = 4096;
    /**
     * Reserve memory at a specific alignment.
     */
    template<bool committed>
-    void* reserve(size_t size, size_t align) noexcept
+    void* reserve_aligned(size_t size) noexcept
    {
      // Alignment must be a power of 2.
-      SNMALLOC_ASSERT(align == bits::next_pow2(align));
+      SNMALLOC_ASSERT(size == bits::next_pow2(size));
      SNMALLOC_ASSERT(size >= minimum_alloc_size);
-      align = bits::max<size_t>(4096, align);
+      size_t log2align = bits::next_pow2_bits(size);
      size_t log2align = bits::next_pow2_bits(align);
      void* p = mmap(
        nullptr,
--- a/src/pal/pal_freebsd_kernel.h
+++ b/src/pal/pal_freebsd_kernel.h
@ -60,8 +60,12 @@ namespace snmalloc
    }
    template<bool committed>
-    void* reserve(size_t size, size_t align)
+    void* reserve_aligned(size_t size) noexcept
    {
      SNMALLOC_ASSERT(size == bits::next_pow2(size));
      SNMALLOC_ASSERT(size >= minimum_alloc_size);
      size_t align = size;
      vm_offset_t addr;
      if (vmem_xalloc(
            kernel_arena,
--- a/src/pal/pal_open_enclave.h
+++ b/src/pal/pal_open_enclave.h
@ -13,98 +13,31 @@ namespace snmalloc
 {
  class PALOpenEnclave
  {
-    /**
+    /// Base of OE heap
-     * Implements a power of two allocator, where all blocks are aligned to the
+    static inline void* heap_base = nullptr;
-     * same power of two as their size. This is what snmalloc uses to get
+
-     * alignment of very large sizeclasses.
+    /// Size of OE heap
-     *
+    static inline size_t heap_size;
     * Pals are not required to unreserve memory, so this does not require the
     * usual complexity of a buddy allocator.
     */
    // There are a maximum of two blocks for any size/align in a range.
    // One before the point of maximum alignment, and one after.
    static inline std::array<std::array<void*, 2>, bits::BITS> ranges;
    // This is infrequently used code, a spin lock simplifies the code
    // considerably, and should never be on the fast path.
    static inline std::atomic_flag spin_lock;
    static void add_block(size_t align_bits, void* base)
    {
      if (ranges[align_bits][0] == nullptr)
      {
        ranges[align_bits][0] = base;
        return;
      }
      if (ranges[align_bits][1] != nullptr)
        error("Critical assumption violated!");
      ranges[align_bits][1] = base;
    }
    static void* remove_block(size_t align_bits)
    {
      auto first = ranges[align_bits][0];
      if (first == nullptr)
      {
        if (align_bits < (bits::BITS - 1))
        {
          // Look for larger block and split up recursively
          void* bigger = remove_block(align_bits + 1);
          if (bigger == nullptr)
          {
            // Out of memory.
            return bigger;
          }
          void* left_over =
            pointer_offset(bigger, bits::one_at_bit(align_bits));
          ranges[align_bits][0] = left_over;
          return bigger;
        }
        // Out of memory
        return nullptr;
      }
      auto second = ranges[align_bits][1];
      if (second != nullptr)
      {
        ranges[align_bits][1] = nullptr;
        return second;
      }
      ranges[align_bits][0] = nullptr;
      return first;
    }
  public:
    /**
     * This will be called by oe_allocator_init to set up enclave heap bounds.
     */
    static void setup_initial_range(void* base, void* end)
    {
-      // Find the minimum set of maximally aligned blocks in this range.
+      heap_size = pointer_diff(base, end);
-      // Each block's alignment and size are equal.
+      heap_base = base;
      size_t length = pointer_diff(base, end);
      while (length != 0)
      {
        size_t base_align_bits = bits::ctz(address_cast(base));
        size_t length_align_bits = (bits::BITS - 1) - bits::clz(length);
        size_t align_bits = bits::min(base_align_bits, length_align_bits);
        size_t align = bits::one_at_bit(align_bits);
        add_block(align_bits, base);
        base = pointer_offset(base, align);
        length -= align;
      }
    }
    /**
     * Bitmap of PalFeatures flags indicating the optional features that this
     * PAL supports.
     */
-    static constexpr uint64_t pal_features = AlignedAllocation;
+    static constexpr uint64_t pal_features = 0;
    static constexpr size_t page_size = 0x1000;
@ -114,19 +47,18 @@ namespace snmalloc
      oe_abort();
    }
-    template<bool committed>
+    static std::pair<void*, size_t>
-    static void* reserve(size_t size, size_t align) noexcept
+    reserve_at_least(size_t request_size) noexcept
    {
-      // The following are all true from the current way snmalloc uses the PAL.
+      // First call returns the entire address space
-      // The implementation here is depending on them.
+      // subsequent calls return {nullptr, 0}
      SNMALLOC_ASSERT(size == bits::next_pow2(size));
      SNMALLOC_ASSERT(align == bits::next_pow2(align));
      if (size != align)
        error("Critical assumption violated!");
      FlagLock lock(spin_lock);
-      size_t align_bits = bits::next_pow2_bits(align);
+      if (request_size > heap_size)
-      return remove_block(align_bits);
+        return {nullptr, 0};
      auto result = std::make_pair(heap_base, heap_size);
      heap_size = 0;
      return result;
    }
    template<bool page_aligned = false>
--- a/src/pal/pal_posix.h
+++ b/src/pal/pal_posix.h
@ -9,6 +9,7 @@
 #include <string.h>
 #include <sys/mman.h>
 #include <unistd.h>
 #include <utility>
 extern "C" int puts(const char* str);
@ -147,12 +148,16 @@ namespace snmalloc
     * POSIX does not define a portable interface for specifying alignment
     * greater than a page.
     */
-    template<bool committed>
+    std::pair<void*, size_t> reserve_at_least(size_t size) noexcept
    void* reserve(size_t size) noexcept
    {
      // Magic number for over-allocating chosen by the Pal
      // These should be further refined based on experiments.
      constexpr size_t min_size =
        bits::is64() ? bits::one_at_bit(32) : bits::one_at_bit(28);
      auto size_request = bits::max(size, min_size);
      void* p = mmap(
        nullptr,
-        size,
+        size_request,
        PROT_READ | PROT_WRITE,
        MAP_PRIVATE | MAP_ANONYMOUS,
        -1,
@ -161,7 +166,7 @@ namespace snmalloc
      if (p == MAP_FAILED)
        OS::error("Out of memory");
-      return p;
+      return {p, size_request};
    }
  };
 } // namespace snmalloc
--- a/src/pal/pal_windows.h
+++ b/src/pal/pal_windows.h
@ -77,11 +77,13 @@ namespace snmalloc
     * PAL supports.  This PAL supports low-memory notifications.
     */
    static constexpr uint64_t pal_features = LowMemoryNotification
-#  if defined(PLATFORM_HAS_VIRTUALALLOC2)
+#  if defined(PLATFORM_HAS_VIRTUALALLOC2) && !defined(USE_SYSTEMATIC_TESTING)
      | AlignedAllocation
 #  endif
      ;
    static constexpr size_t minimum_alloc_size = 0x10000;
    static constexpr size_t page_size = 0x1000;
    /**
@ -157,13 +159,16 @@ namespace snmalloc
      static size_t bump_ptr = (size_t)0x4000'0000'0000;
      return bump_ptr;
    }
    template<bool committed>
    void* reserve(size_t size) noexcept
    {
      DWORD flags = MEM_RESERVE;
-      if (committed)
+    std::pair<void*, size_t> reserve_at_least(size_t size) noexcept
-        flags |= MEM_COMMIT;
+    {
      // Magic number for over-allocating chosen by the Pal
      // These should be further refined based on experiments.
      constexpr size_t min_size =
        bits::is64() ? bits::one_at_bit(32) : bits::one_at_bit(28);
      auto size_request = bits::max(size, min_size);
      DWORD flags = MEM_RESERVE;
      size_t retries = 1000;
      void* p;
@ -171,34 +176,30 @@ namespace snmalloc
      do
      {
        p = VirtualAlloc(
-          (void*)systematic_bump_ptr(), size, flags, PAGE_READWRITE);
+          (void*)systematic_bump_ptr(), size_request, flags, PAGE_READWRITE);
-        systematic_bump_ptr() += size;
+        systematic_bump_ptr() += size_request;
        retries--;
      } while (p == nullptr && retries > 0);
-      return p;
+      return {p, size_request};
    }
 #  elif defined(PLATFORM_HAS_VIRTUALALLOC2)
    template<bool committed>
-    void* reserve(size_t size, size_t align) noexcept
+    void* reserve_aligned(size_t size) noexcept
    {
      SNMALLOC_ASSERT(size == bits::next_pow2(size));
      SNMALLOC_ASSERT(size >= minimum_alloc_size);
      DWORD flags = MEM_RESERVE;
      if (committed)
        flags |= MEM_COMMIT;
      // Windows doesn't let you request memory less than 64KB aligned.  Most
      // operating systems will simply give you something more aligned than you
      // ask for, but Windows complains about invalid parameters.
      const size_t min_align = 64 * 1024;
      if (align < min_align)
        align = min_align;
      // If we're on Windows 10 or newer, we can use the VirtualAlloc2
      // function.  The FromApp variant is useable by UWP applications and
      // cannot allocate executable memory.
-      MEM_ADDRESS_REQUIREMENTS addressReqs = {NULL, NULL, align};
+      MEM_ADDRESS_REQUIREMENTS addressReqs = {NULL, NULL, size};
      MEM_EXTENDED_PARAMETER param = {
        {MemExtendedParameterAddressRequirements, 0}, {0}};
@ -215,20 +216,21 @@ namespace snmalloc
      return ret;
    }
 #  else
-    template<bool committed>
+    std::pair<void*, size_t> reserve_at_least(size_t size) noexcept
    void* reserve(size_t size) noexcept
    {
      // Magic number for over-allocating chosen by the Pal
      // These should be further refined based on experiments.
      constexpr size_t min_size =
        bits::is64() ? bits::one_at_bit(32) : bits::one_at_bit(28);
      auto size_request = bits::max(size, min_size);
      DWORD flags = MEM_RESERVE;
-
+      void* ret = VirtualAlloc(nullptr, size_request, flags, PAGE_READWRITE);
      if (committed)
        flags |= MEM_COMMIT;
      void* ret = VirtualAlloc(nullptr, size, flags, PAGE_READWRITE);
      if (ret == nullptr)
      {
        error("Failed to allocate memory\n");
      }
-      return ret;
+      return std::pair(ret, size_request);
    }
 #  endif
  };