* Added lambda lock primitive

* Implement MCS Combining lock

This is hybrid of Flat Combining and the MCS queue lock. It uses
the queue like the MCS queue lock, but each item additionally
contains a thunk to perform the body of the lock. This enables
other threads to perform the work than initially issued the request.

* Add a fast path flag

This update adds a fast path flag for the uncontended case. This
reduces the number of atomic operations in the uncontended case.

* CR feedback
This commit is contained in:
Matthew Parkinson 2024-06-28 15:43:17 +01:00 коммит произвёл GitHub
Родитель 6bd6db5f61
Коммит 6af38acd94
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
7 изменённых файлов: 317 добавлений и 68 удалений

Просмотреть файл

@ -96,33 +96,37 @@ namespace snmalloc
// of allocators.
SNMALLOC_SLOW_PATH static void ensure_init_slow()
{
FlagLock lock{initialisation_lock};
#ifdef SNMALLOC_TRACING
message<1024>("Run init_impl");
#endif
if (initialised)
return;
LocalEntropy entropy;
entropy.init<Pal>();
// Initialise key for remote deallocation lists
RemoteAllocator::key_global = FreeListKey(entropy.get_free_list_key());
with(initialisation_lock, [&]() {
#ifdef SNMALLOC_TRACING
message<1024>("Run init_impl");
#endif
// Need to randomise pagemap location. If requested and not a
// StrictProvenance architecture, randomize its table's location within a
// significantly larger address space allocation.
static constexpr bool pagemap_randomize =
mitigations(random_pagemap) && !aal_supports<StrictProvenance>;
if (initialised)
return;
Pagemap::concretePagemap.template init<pagemap_randomize>();
LocalEntropy entropy;
entropy.init<Pal>();
// Initialise key for remote deallocation lists
RemoteAllocator::key_global = FreeListKey(entropy.get_free_list_key());
if constexpr (aal_supports<StrictProvenance>)
{
Authmap::init();
}
// Need to randomise pagemap location. If requested and not a
// StrictProvenance architecture, randomize its table's location within
// a significantly larger address space allocation.
static constexpr bool pagemap_randomize =
mitigations(random_pagemap) && !aal_supports<StrictProvenance>;
initialised.store(true, std::memory_order_release);
Pagemap::concretePagemap.template init<pagemap_randomize>();
if constexpr (aal_supports<StrictProvenance>)
{
Authmap::init();
}
initialised.store(true, std::memory_order_release);
});
}
public:

Просмотреть файл

@ -22,7 +22,7 @@ namespace snmalloc
* This is infrequently used code, a spin lock simplifies the code
* considerably, and should never be on the fast path.
*/
FlagWord spin_lock{};
CombiningLock spin_lock{};
public:
static constexpr bool Aligned = ParentRange::Aligned;
@ -35,14 +35,18 @@ namespace snmalloc
CapPtr<void, ChunkBounds> alloc_range(size_t size)
{
FlagLock lock(spin_lock);
return parent.alloc_range(size);
CapPtr<void, ChunkBounds> result;
with(spin_lock, [&]() {
{
result = parent.alloc_range(size);
}
});
return result;
}
void dealloc_range(CapPtr<void, ChunkBounds> base, size_t size)
{
FlagLock lock(spin_lock);
parent.dealloc_range(base, size);
with(spin_lock, [&]() { parent.dealloc_range(base, size); });
}
};
};

Просмотреть файл

@ -0,0 +1,224 @@
#pragma once
#include "../aal/aal.h"
#include "../pal/pal.h"
#include <atomic>
#include <functional>
namespace snmalloc
{
class CombineLockNode;
struct CombiningLock
{
// Fast path lock incase there is no contention.
std::atomic<bool> flag{false};
// MCS queue of work items
std::atomic<CombineLockNode*> head{nullptr};
};
/**
* @brief Combinations of MCS queue lock with Flat Combining
*
* Each element in the queue has a pointer to a work item.
* This means when under contention the thread holding the lock
* can perform the work.
*
* As the work items are arbitrary lambdas there are no simplifications
* for combining related work items. I.e. original Flat Combining paper
* might sort a collection of inserts, and perform them in a single traversal.
*
* Note that, we should perhaps add a Futex/WakeOnAddress mode to improve
* performance in the contended case, rather than spinning.
*/
class CombineLockNode
{
template<typename F>
friend class CombineLockNodeTempl;
enum class LockStatus
{
// The work for this node has not been completed.
WAITING,
// The work for this thread has been completed, and it is not the
// last element in the queue.
DONE,
// The work for this thread has not been completed, and it is the
// head of the queue.
READY
};
// Status of the queue, set by the thread at the head of the queue,
// When it makes the thread for this node either the head of the queue
// or completes its work.
std::atomic<LockStatus> status{LockStatus::WAITING};
// Used to store the queue
std::atomic<CombineLockNode*> next{nullptr};
// Stores the C++ lambda associated with this node in the queue.
void (*f_raw)(CombineLockNode*);
void release(CombiningLock& lock)
{
lock.flag.store(false, std::memory_order_release);
}
void set_status(LockStatus s)
{
status.store(s, std::memory_order_release);
}
constexpr CombineLockNode(void (*f)(CombineLockNode*)) : f_raw(f) {}
SNMALLOC_FAST_PATH void attach(CombiningLock& lock)
{
// Test if no one is waiting
if (lock.head.load(std::memory_order_relaxed) == nullptr)
{
// No one was waiting so low contention. Attempt to acquire the flag
// lock.
if (lock.flag.exchange(true, std::memory_order_acquire) == false)
{
// We grabbed the lock.
f_raw(this);
// Release the lock
release(lock);
return;
}
}
attach_slow(lock);
}
SNMALLOC_SLOW_PATH void attach_slow(CombiningLock& lock)
{
// There is contention for the lock, we need to add our work to the
// queue of pending work
auto prev = lock.head.exchange(this, std::memory_order_acq_rel);
if (prev != nullptr)
{
// If we aren't the head, link into predecessor
prev->next.store(this, std::memory_order_release);
// Wait to for predecessor to complete
while (status.load(std::memory_order_relaxed) == LockStatus::WAITING)
Aal::pause();
// Determine if another thread completed our work.
if (status.load(std::memory_order_acquire) == LockStatus::DONE)
return;
}
else
{
// We are the head of the queue. Spin until we acquire the fast path
// lock. As we are in the queue future requests shouldn't try to
// acquire the fast path lock, but stale views of the queue being empty
// could still be concurrent with this thread.
while (lock.flag.exchange(true, std::memory_order_acquire))
{
while (lock.flag.load(std::memory_order_relaxed))
{
Aal::pause();
}
}
// We could set
// status = LockStatus::Ready
// However, the subsequent state assumes it is Ready, and
// nothing would read it.
}
// We are the head of the queue, and responsible for
// waking/performing our and subsequent work.
auto curr = this;
while (true)
{
// Perform work for head of the queue
curr->f_raw(curr);
// Determine if there are more elements.
auto n = curr->next.load(std::memory_order_acquire);
if (n != nullptr)
{
// Signal this work was completed and move on to
// next item.
curr->set_status(LockStatus::DONE);
curr = n;
continue;
}
// This could be the end of the queue, attempt to close the
// queue.
auto curr_c = curr;
if (lock.head.compare_exchange_strong(
curr_c,
nullptr,
std::memory_order_release,
std::memory_order_relaxed))
{
// Queue was successfully closed.
// Notify last element the work was completed.
curr->set_status(LockStatus::DONE);
release(lock);
return;
}
// Failed to close the queue wait for next thread to be
// added.
while (curr->next.load(std::memory_order_relaxed) == nullptr)
Aal::pause();
// As we had to wait, give the job to the next thread
// to carry on performing the work.
n = curr->next.load(std::memory_order_acquire);
n->set_status(LockStatus::READY);
// Notify the thread that we completed its work.
// Note that this needs to be done last, as we can't read
// curr->next after setting curr->status
curr->set_status(LockStatus::DONE);
return;
}
}
};
template<typename F>
class CombineLockNodeTempl : CombineLockNode
{
template<typename FF>
friend void with(CombiningLock&, FF&&);
// This holds the closure for the lambda
F f;
// Untyped version of calling f to store in the node.
static void invoke(CombineLockNode* self)
{
auto self_templ = reinterpret_cast<CombineLockNodeTempl*>(self);
self_templ->f();
}
CombineLockNodeTempl(CombiningLock& lock, F&& f_)
: CombineLockNode(invoke), f(f_)
{
attach(lock);
}
};
/**
* Lock primitive. This takes a reference to a Lock, and a thunk to
* call when the lock is available. The thunk should be independent of
* the current thread as the thunk may be executed by a different thread.
*/
template<typename F>
inline void with(CombiningLock& lock, F&& f)
{
CombineLockNodeTempl<F> node{lock, std::forward<F>(f)};
}
} // namespace snmalloc

Просмотреть файл

@ -6,6 +6,7 @@
#include "../pal/pal.h"
#include "aba.h"
#include "allocconfig.h"
#include "combininglock.h"
#include "entropy.h"
#include "flaglock.h"
#include "mpmcstack.h"

Просмотреть файл

@ -133,4 +133,11 @@ namespace snmalloc
lock.flag.store(false, std::memory_order_release);
}
};
template<typename F>
inline void with(FlagWord& lock, F&& f)
{
FlagLock l(lock);
f();
}
} // namespace snmalloc

Просмотреть файл

@ -35,14 +35,15 @@ namespace snmalloc
if (SNMALLOC_UNLIKELY(!initialised.load(std::memory_order_acquire)))
{
FlagLock lock(flag);
if (!initialised)
{
init(&obj);
initialised.store(true, std::memory_order_release);
if (first != nullptr)
*first = true;
}
with(flag, [&]() {
if (!initialised)
{
init(&obj);
initialised.store(true, std::memory_order_release);
if (first != nullptr)
*first = true;
}
});
}
return obj;
}

Просмотреть файл

@ -100,8 +100,9 @@ namespace snmalloc
static T* acquire()
{
PoolState<T>& pool = get_state();
{
FlagLock f(pool.lock);
T* result{nullptr};
with(pool.lock, [&]() {
if (pool.front != nullptr)
{
auto p = pool.front;
@ -112,17 +113,21 @@ namespace snmalloc
}
pool.front = next;
p->set_in_use();
return p.unsafe_ptr();
result = p.unsafe_ptr();
}
}
});
if (result != nullptr)
return result;
auto p = ConstructT::make();
FlagLock f(pool.lock);
p->list_next = pool.list;
pool.list = p;
with(pool.lock, [&]() {
p->list_next = pool.list;
pool.list = p;
p->set_in_use();
p->set_in_use();
});
return p.unsafe_ptr();
}
@ -146,11 +151,13 @@ namespace snmalloc
// Returns a linked list of all objects in the stack, emptying the stack.
if (p == nullptr)
{
FlagLock f(pool.lock);
auto result = pool.front;
pool.front = nullptr;
pool.back = nullptr;
return result.unsafe_ptr();
T* result;
with(pool.lock, [&]() {
result = pool.front.unsafe_ptr();
pool.front = nullptr;
pool.back = nullptr;
});
return result;
}
return p->next.unsafe_ptr();
@ -165,18 +172,18 @@ namespace snmalloc
{
PoolState<T>& pool = get_state();
last->next = nullptr;
FlagLock f(pool.lock);
with(pool.lock, [&]() {
if (pool.front == nullptr)
{
pool.front = capptr::Alloc<T>::unsafe_from(first);
}
else
{
pool.back->next = capptr::Alloc<T>::unsafe_from(first);
}
if (pool.front == nullptr)
{
pool.front = capptr::Alloc<T>::unsafe_from(first);
}
else
{
pool.back->next = capptr::Alloc<T>::unsafe_from(first);
}
pool.back = capptr::Alloc<T>::unsafe_from(last);
pool.back = capptr::Alloc<T>::unsafe_from(last);
});
}
/**
@ -188,18 +195,19 @@ namespace snmalloc
{
PoolState<T>& pool = get_state();
last->next = nullptr;
FlagLock f(pool.lock);
if (pool.front == nullptr)
{
pool.back = capptr::Alloc<T>::unsafe_from(last);
}
else
{
last->next = pool.front;
pool.back->next = capptr::Alloc<T>::unsafe_from(first);
}
pool.front = capptr::Alloc<T>::unsafe_from(first);
with(pool.lock, [&]() {
if (pool.front == nullptr)
{
pool.back = capptr::Alloc<T>::unsafe_from(last);
}
else
{
last->next = pool.front;
pool.back->next = capptr::Alloc<T>::unsafe_from(first);
}
pool.front = capptr::Alloc<T>::unsafe_from(first);
});
}
static T* iterate(T* p = nullptr)