From 821620133d07c8ea561c63277b68f6ee5e21adf1 Mon Sep 17 00:00:00 2001
From: Matthew Parkinson <mattpark@microsoft.com>
Date: Wed, 23 Mar 2022 16:08:53 +0000
Subject: [PATCH] Remove ChunkAllocator

---
 src/backend/backend.h                        |   9 +-
 src/backend/chunkallocator.h                 | 370 -------------------
 src/backend/fixedglobalconfig.h              |   1 -
 src/backend/globalconfig.h                   |   1 -
 src/ds/spmcstack.h                           |  72 ----
 src/mem/corealloc.h                          |  21 +-
 src/mem/pool.h                               |  11 +-
 src/test/func/domestication/domestication.cc |   8 -
 8 files changed, 18 insertions(+), 475 deletions(-)
 delete mode 100644 src/backend/chunkallocator.h
 delete mode 100644 src/ds/spmcstack.h
diff --git a/src/backend/backend.h b/src/backend/backend.h
index 3145a1c1..4c169d6a 100644
--- a/src/backend/backend.h
+++ b/src/backend/backend.h
@@ -1,7 +1,6 @@
 #pragma once
 #include "../mem/allocconfig.h"
 #include "../pal/pal.h"
-#include "chunkallocator.h"
 #include "commitrange.h"
 #include "commonconfig.h"
 #include "empty_range.h"
@@ -301,10 +300,10 @@ namespace snmalloc
       return {p, meta};
     }
 
-    static void dealloc_chunk(
-      LocalState& local_state, ChunkRecord* chunk_record, size_t size)
+    static void
+    dealloc_chunk(LocalState& local_state, MetaCommon& meta_common, size_t size)
     {
-      auto chunk = chunk_record->meta_common.chunk;
+      auto chunk = meta_common.chunk;
 
       /*
        * The backend takes possession of these chunks now, by disassociating
@@ -316,7 +315,7 @@ namespace snmalloc
       Pagemap::set_metaentry(address_cast(chunk), size, t);
 
       local_state.get_meta_range()->dealloc_range(
-        capptr::Chunk<void>(chunk_record), PAGEMAP_METADATA_STRUCT_SIZE);
+        capptr::Chunk<void>(&meta_common), PAGEMAP_METADATA_STRUCT_SIZE);
 
       local_state.object_range->dealloc_range(chunk, size);
     }
diff --git a/src/backend/chunkallocator.h b/src/backend/chunkallocator.h
deleted file mode 100644
index 80d1be5d..00000000
--- a/src/backend/chunkallocator.h
+++ /dev/null
@@ -1,370 +0,0 @@
-#pragma once
-
-/***
- * WARNING:  This file is not currently in use.  The functionality has not
- * be transistioned to the new backend.  It does not seem to be required
- * but further testing is required before we delete it.
- */
-
-#include "../backend/backend_concept.h"
-#include "../backend/metatypes.h"
-#include "../ds/mpmcstack.h"
-#include "../ds/spmcstack.h"
-#include "../mem/metaslab.h"
-#include "../mem/sizeclasstable.h"
-#include "../pal/pal_ds.h"
-
-#ifdef SNMALLOC_TRACING
-#  include <iostream>
-#endif
-
-#include <new>
-
-namespace snmalloc
-{
-  /**
-   * Used to store slabs in the unused sizes.
-   */
-  struct ChunkRecord
-  {
-    MetaCommon meta_common;
-    std::atomic<ChunkRecord*> next;
-  };
-#if defined(USE_METADATA_CONCEPT)
-  static_assert(ConceptMetadataStruct<ChunkRecord>);
-#endif
-
-  /**
-   * How many slab sizes that can be provided.
-   */
-  constexpr size_t NUM_SLAB_SIZES = Pal::address_bits - MIN_CHUNK_BITS;
-
-  /**
-   * Number of free stacks per chunk size that each allocator will use.
-   * For performance ideally a power of 2.  We will return to the central
-   * pool anything that has not be used in the last NUM_EPOCHS - 1, where
-   * each epoch is separated by DecayMemoryTimerObject::PERIOD.
-   * I.e. if period is 500ms and num of epochs is 4, then we will return to
-   * the central pool anything not used for the last 1500-2000ms.
-   */
-  constexpr size_t NUM_EPOCHS = 4;
-  static_assert(bits::is_pow2(NUM_EPOCHS), "Code assumes power of two.");
-
-  class ChunkAllocatorLocalState
-  {
-    friend class ChunkAllocator;
-
-    /**
-     * Stack of slabs that have been returned for reuse.
-     */
-    ModArray<NUM_SLAB_SIZES, ModArray<NUM_EPOCHS, SPMCStack<ChunkRecord>>>
-      chunk_stack;
-
-    /**
-     * Used for list of all ChunkAllocatorLocalStates.
-     */
-    std::atomic<ChunkAllocatorLocalState*> next{nullptr};
-  };
-
-  /**
-   * This is the global state required for the chunk allocator.
-   * It must be provided as a part of the shared state handle
-   * to the chunk allocator.
-   */
-  class ChunkAllocatorState
-  {
-    friend class ChunkAllocator;
-    /**
-     * Stack of slabs that have been returned for reuse.
-     */
-    ModArray<NUM_SLAB_SIZES, MPMCStack<ChunkRecord, RequiresInit>>
-      decommitted_chunk_stack;
-
-    /**
-     * Which is the current epoch to place dealloced chunks, and the
-     * first place we look for allocating chunks.
-     */
-    alignas(CACHELINE_SIZE) std::atomic<size_t> epoch{0};
-
-    /**
-     * All memory issued by this address space manager
-     */
-    std::atomic<size_t> peak_memory_usage_{0};
-
-    std::atomic<size_t> memory_in_stacks{0};
-
-    std::atomic<ChunkAllocatorLocalState*> all_local{nullptr};
-
-    // Flag to ensure one-shot registration with the PAL for notifications.
-    std::atomic_flag register_decay{};
-
-  public:
-    size_t unused_memory()
-    {
-      return memory_in_stacks;
-    }
-
-    size_t peak_memory_usage()
-    {
-      return peak_memory_usage_;
-    }
-
-    void add_peak_memory_usage(size_t size)
-    {
-      peak_memory_usage_ += size;
-#ifdef SNMALLOC_TRACING
-      std::cout << "peak_memory_usage_: " << peak_memory_usage_ << std::endl;
-#endif
-    }
-  };
-
-  class ChunkAllocator
-  {
-    template<SNMALLOC_CONCEPT(ConceptPAL) Pal>
-    class DecayMemoryTimerObject : public PalTimerObject
-    {
-      ChunkAllocatorState* state;
-
-      /***
-       * Method for callback object to perform lazy decommit.
-       */
-      static void process(PalTimerObject* p)
-      {
-        // Unsafe downcast here. Don't want vtable and RTTI.
-        auto self = reinterpret_cast<DecayMemoryTimerObject*>(p);
-        ChunkAllocator::handle_decay_tick<Pal>(self->state);
-      }
-
-      // Specify that we notify the ChunkAllocator every 500ms.
-      static constexpr size_t PERIOD = 500;
-
-    public:
-      DecayMemoryTimerObject(ChunkAllocatorState* state)
-      : PalTimerObject(&process, PERIOD), state(state)
-      {}
-    };
-
-    template<SNMALLOC_CONCEPT(ConceptPAL) Pal>
-    static void handle_decay_tick(ChunkAllocatorState* state)
-    {
-      auto new_epoch = (state->epoch + 1) % NUM_EPOCHS;
-      // Flush old index for all threads.
-      ChunkAllocatorLocalState* curr = state->all_local;
-      while (curr != nullptr)
-      {
-        for (size_t sc = 0; sc < NUM_SLAB_SIZES; sc++)
-        {
-          auto& old_stack = curr->chunk_stack[sc][new_epoch];
-          ChunkRecord* record = old_stack.pop_all();
-          while (record != nullptr)
-          {
-            auto next = record->next.load();
-
-            // Disable pages for this
-            Pal::notify_not_using(
-              record->meta_common.chunk.unsafe_ptr(),
-              slab_sizeclass_to_size(sc));
-
-            // Add to global state
-            state->decommitted_chunk_stack[sc].push(record);
-
-            record = next;
-          }
-        }
-        curr = curr->next;
-      }
-
-      // Advance current index
-      state->epoch = new_epoch;
-    }
-
-  public:
-    template<SNMALLOC_CONCEPT(ConceptBackendGlobals) SharedStateHandle>
-    static std::pair<capptr::Chunk<void>, Metaslab*> alloc_chunk(
-      typename SharedStateHandle::LocalState& local_state,
-      ChunkAllocatorLocalState& chunk_alloc_local_state,
-      chunksizeclass_t slab_sizeclass,
-      size_t slab_size,
-      uintptr_t ras)
-    {
-      using PAL = typename SharedStateHandle::Pal;
-      ChunkAllocatorState& state =
-        SharedStateHandle::get_chunk_allocator_state(&local_state);
-
-      if (slab_sizeclass >= NUM_SLAB_SIZES)
-      {
-        // Your address space is not big enough for this allocation!
-        errno = ENOMEM;
-        return {nullptr, nullptr};
-      }
-
-      ChunkRecord* chunk_record = nullptr;
-      if constexpr (pal_supports<Time, PAL>)
-      {
-        // Try local cache of chunks first
-        for (size_t e = 0; e < NUM_EPOCHS && chunk_record == nullptr; e++)
-        {
-          chunk_record =
-            chunk_alloc_local_state
-              .chunk_stack[slab_sizeclass][(state.epoch - e) % NUM_EPOCHS]
-              .pop();
-        }
-      }
-
-      // Try global cache.
-      if (chunk_record == nullptr)
-      {
-        chunk_record = state.decommitted_chunk_stack[slab_sizeclass].pop();
-        if (chunk_record != nullptr)
-        {
-          PAL::template notify_using<NoZero>(
-            chunk_record->meta_common.chunk.unsafe_ptr(), slab_size);
-        }
-      }
-
-      if (chunk_record != nullptr)
-      {
-        auto slab = chunk_record->meta_common.chunk;
-        state.memory_in_stacks -= slab_size;
-        auto meta = reinterpret_cast<Metaslab*>(chunk_record);
-#ifdef SNMALLOC_TRACING
-        std::cout << "Reuse slab:" << slab.unsafe_ptr() << " slab_sizeclass "
-                  << slab_sizeclass << " size " << slab_size
-                  << " memory in stacks " << state.memory_in_stacks
-                  << std::endl;
-#endif
-        MetaEntry entry{&meta->meta_common, ras};
-        SharedStateHandle::Pagemap::set_metaentry(
-          address_cast(slab), slab_size, entry);
-        return {slab, meta};
-      }
-
-      // Allocate a fresh slab as there are no available ones.
-      // First create meta-data
-      auto [slab, meta] =
-        SharedStateHandle::alloc_chunk(&local_state, slab_size, ras);
-#ifdef SNMALLOC_TRACING
-      std::cout << "Create slab:" << slab.unsafe_ptr() << " slab_sizeclass "
-                << slab_sizeclass << " size " << slab_size << std::endl;
-#endif
-
-      state.add_peak_memory_usage(slab_size);
-      state.add_peak_memory_usage(PAGEMAP_METADATA_STRUCT_SIZE);
-      // TODO handle bounded versus lazy pagemaps in stats
-      state.add_peak_memory_usage(
-        (slab_size / MIN_CHUNK_SIZE) * sizeof(MetaEntry));
-
-      return {slab, meta};
-    }
-
-    template<SNMALLOC_CONCEPT(ConceptBackendGlobals) SharedStateHandle>
-    SNMALLOC_SLOW_PATH static void dealloc(
-      typename SharedStateHandle::LocalState& local_state,
-      ChunkAllocatorLocalState& chunk_alloc_local_state,
-      ChunkRecord* p,
-      size_t slab_sizeclass)
-    {
-      ChunkAllocatorState& state =
-        SharedStateHandle::get_chunk_allocator_state(&local_state);
-
-      if constexpr (pal_supports<Time, typename SharedStateHandle::Pal>)
-      {
-        // If we have a time source use decay based local cache.
-#ifdef SNMALLOC_TRACING
-        std::cout << "Return slab:" << p->meta_common.chunk.unsafe_ptr()
-                  << " slab_sizeclass " << slab_sizeclass << " size "
-                  << slab_sizeclass_to_size(slab_sizeclass)
-                  << " memory in stacks " << state.memory_in_stacks
-                  << std::endl;
-#endif
-        chunk_alloc_local_state.chunk_stack[slab_sizeclass][state.epoch].push(
-          p);
-      }
-      else
-      {
-        // No time source share immediately with global state.
-        // Disable pages for this chunk.
-        SharedStateHandle::Pal::notify_not_using(
-          p->meta_common.chunk.unsafe_ptr(),
-          slab_sizeclass_to_size(slab_sizeclass));
-
-        // Add to global state
-        state.decommitted_chunk_stack[slab_sizeclass].push(p);
-      }
-
-      state.memory_in_stacks += slab_sizeclass_to_size(slab_sizeclass);
-    }
-
-    /**
-     * Provide a block of meta-data with size and align.
-     *
-     * Backend allocator may use guard pages and separate area of
-     * address space to protect this from corruption.
-     */
-    template<
-      typename U,
-      SNMALLOC_CONCEPT(ConceptBackendGlobals) SharedStateHandle,
-      typename... Args>
-    static U* alloc_meta_data(
-      typename SharedStateHandle::LocalState* local_state, Args&&... args)
-    {
-      // Cache line align
-      size_t size = bits::align_up(sizeof(U), 64);
-
-      capptr::Chunk<void> p =
-        SharedStateHandle::template alloc_meta_data<U>(local_state, size);
-
-      if (p == nullptr)
-      {
-        errno = ENOMEM;
-        return nullptr;
-      }
-
-      return new (p.unsafe_ptr()) U(std::forward<Args>(args)...);
-    }
-
-    template<SNMALLOC_CONCEPT(ConceptBackendGlobals) SharedStateHandle>
-    static void register_local_state(
-      typename SharedStateHandle::LocalState& local_state,
-      ChunkAllocatorLocalState& chunk_alloc_local_state)
-    {
-      if constexpr (pal_supports<Time, typename SharedStateHandle::Pal>)
-      {
-        ChunkAllocatorState& state =
-          SharedStateHandle::get_chunk_allocator_state(&local_state);
-
-        // Register with the Pal to receive notifications.
-        if (!state.register_decay.test_and_set())
-        {
-          auto timer = alloc_meta_data<
-            DecayMemoryTimerObject<typename SharedStateHandle::Pal>,
-            SharedStateHandle>(&local_state, &state);
-          if (timer != nullptr)
-          {
-            SharedStateHandle::Pal::register_timer(timer);
-          }
-          else
-          {
-            // We failed to register the notification.
-            // This is not catarophic, but if we can't allocate this
-            // state something else will fail shortly.
-            state.register_decay.clear();
-          }
-        }
-
-        // Add to the list of local states.
-        auto* head = state.all_local.load();
-        do
-        {
-          chunk_alloc_local_state.next = head;
-        } while (!state.all_local.compare_exchange_strong(
-          head, &chunk_alloc_local_state));
-      }
-      else
-      {
-        UNUSED(local_state);
-        UNUSED(chunk_alloc_local_state);
-      }
-    }
-  };
-} // namespace snmalloc
diff --git a/src/backend/fixedglobalconfig.h b/src/backend/fixedglobalconfig.h
index 9bb33ae5..5b9cbf5d 100644
--- a/src/backend/fixedglobalconfig.h
+++ b/src/backend/fixedglobalconfig.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include "../backend/backend.h"
-#include "../backend/chunkallocator.h"
 #include "../mem/corealloc.h"
 #include "../mem/pool.h"
 #include "commonconfig.h"
diff --git a/src/backend/globalconfig.h b/src/backend/globalconfig.h
index 3b468569..a496aa3e 100644
--- a/src/backend/globalconfig.h
+++ b/src/backend/globalconfig.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include "../backend/backend.h"
-#include "../backend/chunkallocator.h"
 #include "../mem/corealloc.h"
 #include "../mem/pool.h"
 #include "commonconfig.h"
diff --git a/src/ds/spmcstack.h b/src/ds/spmcstack.h
deleted file mode 100644
index 7c6ea70e..00000000
--- a/src/ds/spmcstack.h
+++ /dev/null
@@ -1,72 +0,0 @@
-#pragma once
-
-#include "aba.h"
-#include "ptrwrap.h"
-
-namespace snmalloc
-{
-  /**
-   * Concurrent Stack
-   *
-   * This stack supports the following clients
-   * (push|pop)* || pop_all* || ... || pop_all*
-   *
-   * That is a single thread that can do push and pop, and other threads
-   * that do pop_all.  pop_all if it returns a value, returns all of the
-   * stack, however, it may return nullptr if it races with either a push
-   * or a pop.
-   *
-   * The primary use case is single-threaded access, where other threads
-   * can attempt to steal all the values.
-   */
-  template<class T>
-  class SPMCStack
-  {
-  private:
-    alignas(CACHELINE_SIZE) std::atomic<T*> stack{};
-
-  public:
-    constexpr SPMCStack() = default;
-
-    void push(T* item)
-    {
-      static_assert(
-        std::is_same<decltype(T::next), std::atomic<T*>>::value,
-        "T->next must be an std::atomic<T*>");
-
-      return push(item, item);
-    }
-
-    void push(T* first, T* last)
-    {
-      T* old_head = stack.exchange(nullptr, std::memory_order_relaxed);
-      last->next.store(old_head, std::memory_order_relaxed);
-      // Assume stays null as not allowed to race with pop or other pushes.
-      SNMALLOC_ASSERT(stack.load() == nullptr);
-      stack.store(first, std::memory_order_release);
-    }
-
-    T* pop()
-    {
-      if (stack.load(std::memory_order_relaxed) == nullptr)
-        return nullptr;
-      T* old_head = stack.exchange(nullptr);
-      if (SNMALLOC_UNLIKELY(old_head == nullptr))
-        return nullptr;
-
-      auto next = old_head->next.load(std::memory_order_relaxed);
-
-      // Assume stays null as not allowed to race with pop or other pushes.
-      SNMALLOC_ASSERT(stack.load() == nullptr);
-
-      stack.store(next, std::memory_order_release);
-
-      return old_head;
-    }
-
-    T* pop_all()
-    {
-      return stack.exchange(nullptr);
-    }
-  };
-} // namespace snmalloc
diff --git a/src/mem/corealloc.h b/src/mem/corealloc.h
index b22fbb0a..bd42b4e3 100644
--- a/src/mem/corealloc.h
+++ b/src/mem/corealloc.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include "../backend/chunkallocator.h"
 #include "../ds/defines.h"
 #include "allocconfig.h"
 #include "localcache.h"
@@ -282,7 +281,7 @@ namespace snmalloc
       bumpptr = slab_end;
     }
 
-    ChunkRecord* clear_slab(Metaslab* meta, smallsizeclass_t sizeclass)
+    void clear_slab(Metaslab* meta, smallsizeclass_t sizeclass)
     {
       auto& key = entropy.get_free_list_key();
       freelist::Iter<> fl;
@@ -324,15 +323,13 @@ namespace snmalloc
       SNMALLOC_ASSERT(
         count == snmalloc::sizeclass_to_slab_object_count(sizeclass));
 #endif
-      ChunkRecord* chunk_record = reinterpret_cast<ChunkRecord*>(meta);
       // TODO: This is a capability amplification as we are saying we
       // have the whole chunk.
       auto start_of_slab = pointer_align_down<void>(
         p, snmalloc::sizeclass_to_slab_size(sizeclass));
 
       SNMALLOC_ASSERT(
-        address_cast(start_of_slab) ==
-        chunk_record->meta_common.chunk_address());
+        address_cast(start_of_slab) == meta->meta_common.chunk_address());
 
 #if defined(__CHERI_PURE_CAPABILITY__) && !defined(SNMALLOC_CHECK_CLIENT)
       // Zero the whole slab. For CHERI we at least need to clear the freelist
@@ -340,9 +337,8 @@ namespace snmalloc
       // the freelist order as for SNMALLOC_CHECK_CLIENT. Zeroing the whole slab
       // may be more friendly to hw because it does not involve pointer chasing
       // and is amenable to prefetching.
-      chunk_record->meta_common
-        .template zero_chunk<typename SharedStateHandle::Pal>(
-          snmalloc::sizeclass_to_slab_size(sizeclass));
+      meta->meta_common.template zero_chunk<typename SharedStateHandle::Pal>(
+        snmalloc::sizeclass_to_slab_size(sizeclass));
 #endif
 
 #ifdef SNMALLOC_TRACING
@@ -351,7 +347,6 @@ namespace snmalloc
 #else
       UNUSED(start_of_slab);
 #endif
-      return chunk_record;
     }
 
     template<bool check_slabs = false>
@@ -386,11 +381,11 @@ namespace snmalloc
 
         // TODO delay the clear to the next user of the slab, or teardown so
         // don't touch the cache lines at this point in snmalloc_check_client.
-        auto chunk_record = clear_slab(meta, sizeclass);
+        clear_slab(meta, sizeclass);
 
         SharedStateHandle::dealloc_chunk(
           get_backend_local_state(),
-          chunk_record,
+          meta->meta_common,
           sizeclass_to_slab_size(sizeclass));
 
         return true;
@@ -422,10 +417,8 @@ namespace snmalloc
         UNUSED(size);
 #endif
 
-        auto slab_record = reinterpret_cast<ChunkRecord*>(meta);
-
         SharedStateHandle::dealloc_chunk(
-          get_backend_local_state(), slab_record, size);
+          get_backend_local_state(), meta->meta_common, size);
 
         return;
       }
diff --git a/src/mem/pool.h b/src/mem/pool.h
index c7924c63..141a14c5 100644
--- a/src/mem/pool.h
+++ b/src/mem/pool.h
@@ -1,11 +1,12 @@
 #pragma once
 
-#include "../backend/chunkallocator.h"
 #include "../ds/flaglock.h"
 #include "../ds/mpmcstack.h"
 #include "../pal/pal_concept.h"
 #include "pooled.h"
 
+#include <new>
+
 namespace snmalloc
 {
   /**
@@ -132,15 +133,17 @@ namespace snmalloc
         return p;
       }
 
-      p = ChunkAllocator::alloc_meta_data<T, SharedStateHandle>(
-        nullptr, std::forward<Args>(args)...);
+      auto raw =
+        SharedStateHandle::template alloc_meta_data<T>(nullptr, sizeof(T));
 
-      if (p == nullptr)
+      if (raw == nullptr)
       {
         SharedStateHandle::Pal::error(
           "Failed to initialise thread local allocator.");
       }
 
+      p = new (raw.unsafe_ptr()) T(std::forward<Args>(args)...);
+
       FlagLock f(pool.lock);
       p->list_next = pool.list;
       pool.list = p;
diff --git a/src/test/func/domestication/domestication.cc b/src/test/func/domestication/domestication.cc
index 7a5d2c07..5108960f 100644
--- a/src/test/func/domestication/domestication.cc
+++ b/src/test/func/domestication/domestication.cc
@@ -24,8 +24,6 @@ namespace snmalloc
 
   private:
     using Backend = BackendAllocator<Pal, false>;
-    SNMALLOC_REQUIRE_CONSTINIT
-    inline static ChunkAllocatorState chunk_allocator_state;
 
     SNMALLOC_REQUIRE_CONSTINIT
     inline static GlobalPoolState alloc_pool;
@@ -46,12 +44,6 @@ namespace snmalloc
     }
     ();
 
-    static ChunkAllocatorState&
-    get_chunk_allocator_state(Backend::LocalState* = nullptr)
-    {
-      return chunk_allocator_state;
-    }
-
     static GlobalPoolState& pool()
     {
       return alloc_pool;