Provide consistent alignment to `swap_ranges` benchmark (#5043)

Co-authored-by: Casey Carter <cartec69@gmail.com>
Co-authored-by: Stephan T. Lavavej <stl@nuwen.net>
This commit is contained in:
Alex Guteniev 2024-10-30 07:45:12 -07:00 коммит произвёл GitHub
Родитель cb1e359f93
Коммит 51d34c4b78
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
2 изменённых файлов: 120 добавлений и 17 удалений

Просмотреть файл

@ -0,0 +1,76 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#pragma once
#include <cstddef>
#include <cstdlib>
#include <new>
template <class T, size_t Alignment, size_t Skew>
struct skewed_allocator {
using value_type = T;
static_assert(Alignment % alignof(T) == 0, "Chosen Alignment will produce unaligned T objects");
static_assert(Skew % alignof(T) == 0, "Chosen Skew will produce unaligned T objects");
template <class U>
struct rebind {
using other = skewed_allocator<U, Alignment, Skew>;
};
skewed_allocator() = default;
template <class U>
skewed_allocator(const skewed_allocator<U, Alignment, Skew>&) {}
template <class U>
bool operator==(const skewed_allocator<U, Alignment, Skew>&) const {
return true;
}
T* allocate(const size_t n) {
const auto p = static_cast<unsigned char*>(_aligned_malloc(n * sizeof(T) + Skew, Alignment));
if (!p) {
throw std::bad_alloc{};
}
return reinterpret_cast<T*>(p + Skew);
}
void deallocate(T* const p, size_t) {
if (p) {
_aligned_free(reinterpret_cast<unsigned char*>(p) - Skew);
}
}
};
// The purpose is to provide consistent behavior for benchmarks.
// 64 would be a reasonable alignment for practical perf uses,
// as it is both the cache line size and the maximum vector instruction size (on x64).
// However, aligning to the page size will provide even more consistency
// by ensuring that the same number of page boundaries is crossed each time.
inline constexpr size_t page_size = 4096;
// A realistic skew relative to allocation granularity, when a variable is placed
// next to a pointer in a structure or on the stack. Also corresponds to the default packing.
inline constexpr size_t realistic_skew = 8;
template <class T>
using highly_aligned_allocator = skewed_allocator<T, page_size, 0>;
template <class T>
using not_highly_aligned_allocator = skewed_allocator<T, page_size, realistic_skew>;
#pragma warning(push)
#pragma warning(disable : 4324) // structure was padded due to alignment specifier
template <class T>
struct alignas(page_size) highly_aligned {
T value;
};
template <class T>
struct alignas(page_size) not_highly_aligned {
char pad[realistic_skew];
T value;
};
#pragma warning(pop)

Просмотреть файл

@ -7,13 +7,17 @@
#include <cstdint>
#include <vector>
#include "skewed_allocator.hpp"
using namespace std;
template <size_t N, class T>
template <size_t N, class T, template <class> class Padder>
void std_swap(benchmark::State& state) {
T a[N];
Padder<T[N]> padded_a;
auto& a = padded_a.value;
memset(a, 'a', sizeof(a));
T b[N];
Padder<T[N]> padded_b;
auto& b = padded_b.value;
memset(b, 'b', sizeof(b));
for (auto _ : state) {
@ -23,10 +27,10 @@ void std_swap(benchmark::State& state) {
}
}
template <class T>
template <class T, template <class> class Alloc>
void std_swap_ranges(benchmark::State& state) {
vector<T> a(static_cast<size_t>(state.range(0)), T{'a'});
vector<T> b(static_cast<size_t>(state.range(0)), T{'b'});
vector<T, Alloc<T>> a(static_cast<size_t>(state.range(0)), T{'a'});
vector<T, Alloc<T>> b(static_cast<size_t>(state.range(0)), T{'b'});
for (auto _ : state) {
swap_ranges(a.begin(), a.end(), b.begin());
@ -35,18 +39,41 @@ void std_swap_ranges(benchmark::State& state) {
}
}
BENCHMARK(std_swap<1, uint8_t>);
BENCHMARK(std_swap<5, uint8_t>);
BENCHMARK(std_swap<15, uint8_t>);
BENCHMARK(std_swap<26, uint8_t>);
BENCHMARK(std_swap<38, uint8_t>);
BENCHMARK(std_swap<60, uint8_t>);
BENCHMARK(std_swap<125, uint8_t>);
BENCHMARK(std_swap<800, uint8_t>);
BENCHMARK(std_swap<3000, uint8_t>);
BENCHMARK(std_swap<9000, uint8_t>);
BENCHMARK(std_swap<1, uint8_t, highly_aligned>);
BENCHMARK(std_swap<5, uint8_t, highly_aligned>);
BENCHMARK(std_swap<15, uint8_t, highly_aligned>);
BENCHMARK(std_swap<26, uint8_t, highly_aligned>);
BENCHMARK(std_swap<38, uint8_t, highly_aligned>);
BENCHMARK(std_swap<60, uint8_t, highly_aligned>);
BENCHMARK(std_swap<125, uint8_t, highly_aligned>);
BENCHMARK(std_swap<800, uint8_t, highly_aligned>);
BENCHMARK(std_swap<3000, uint8_t, highly_aligned>);
BENCHMARK(std_swap<9000, uint8_t, highly_aligned>);
BENCHMARK(std_swap_ranges<uint8_t>)
BENCHMARK(std_swap<1, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<5, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<15, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<26, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<38, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<60, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<125, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<800, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<3000, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<9000, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap_ranges<uint8_t, highly_aligned_allocator>)
->Arg(1)
->Arg(5)
->Arg(15)
->Arg(26)
->Arg(38)
->Arg(60)
->Arg(125)
->Arg(800)
->Arg(3000)
->Arg(9000);
BENCHMARK(std_swap_ranges<uint8_t, not_highly_aligned_allocator>)
->Arg(1)
->Arg(5)
->Arg(15)