зеркало из https://github.com/microsoft/STL.git
Provide consistent alignment to `swap_ranges` benchmark (#5043)
Co-authored-by: Casey Carter <cartec69@gmail.com> Co-authored-by: Stephan T. Lavavej <stl@nuwen.net>
This commit is contained in:
Родитель
cb1e359f93
Коммит
51d34c4b78
|
@ -0,0 +1,76 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <new>
|
||||
|
||||
template <class T, size_t Alignment, size_t Skew>
|
||||
struct skewed_allocator {
|
||||
using value_type = T;
|
||||
static_assert(Alignment % alignof(T) == 0, "Chosen Alignment will produce unaligned T objects");
|
||||
static_assert(Skew % alignof(T) == 0, "Chosen Skew will produce unaligned T objects");
|
||||
|
||||
template <class U>
|
||||
struct rebind {
|
||||
using other = skewed_allocator<U, Alignment, Skew>;
|
||||
};
|
||||
|
||||
skewed_allocator() = default;
|
||||
template <class U>
|
||||
skewed_allocator(const skewed_allocator<U, Alignment, Skew>&) {}
|
||||
|
||||
template <class U>
|
||||
bool operator==(const skewed_allocator<U, Alignment, Skew>&) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
T* allocate(const size_t n) {
|
||||
const auto p = static_cast<unsigned char*>(_aligned_malloc(n * sizeof(T) + Skew, Alignment));
|
||||
if (!p) {
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
return reinterpret_cast<T*>(p + Skew);
|
||||
}
|
||||
|
||||
void deallocate(T* const p, size_t) {
|
||||
if (p) {
|
||||
_aligned_free(reinterpret_cast<unsigned char*>(p) - Skew);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// The purpose is to provide consistent behavior for benchmarks.
|
||||
// 64 would be a reasonable alignment for practical perf uses,
|
||||
// as it is both the cache line size and the maximum vector instruction size (on x64).
|
||||
// However, aligning to the page size will provide even more consistency
|
||||
// by ensuring that the same number of page boundaries is crossed each time.
|
||||
inline constexpr size_t page_size = 4096;
|
||||
|
||||
// A realistic skew relative to allocation granularity, when a variable is placed
|
||||
// next to a pointer in a structure or on the stack. Also corresponds to the default packing.
|
||||
inline constexpr size_t realistic_skew = 8;
|
||||
|
||||
template <class T>
|
||||
using highly_aligned_allocator = skewed_allocator<T, page_size, 0>;
|
||||
|
||||
template <class T>
|
||||
using not_highly_aligned_allocator = skewed_allocator<T, page_size, realistic_skew>;
|
||||
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4324) // structure was padded due to alignment specifier
|
||||
|
||||
template <class T>
|
||||
struct alignas(page_size) highly_aligned {
|
||||
T value;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct alignas(page_size) not_highly_aligned {
|
||||
char pad[realistic_skew];
|
||||
T value;
|
||||
};
|
||||
|
||||
#pragma warning(pop)
|
|
@ -7,13 +7,17 @@
|
|||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "skewed_allocator.hpp"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <size_t N, class T>
|
||||
template <size_t N, class T, template <class> class Padder>
|
||||
void std_swap(benchmark::State& state) {
|
||||
T a[N];
|
||||
Padder<T[N]> padded_a;
|
||||
auto& a = padded_a.value;
|
||||
memset(a, 'a', sizeof(a));
|
||||
T b[N];
|
||||
Padder<T[N]> padded_b;
|
||||
auto& b = padded_b.value;
|
||||
memset(b, 'b', sizeof(b));
|
||||
|
||||
for (auto _ : state) {
|
||||
|
@ -23,10 +27,10 @@ void std_swap(benchmark::State& state) {
|
|||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
template <class T, template <class> class Alloc>
|
||||
void std_swap_ranges(benchmark::State& state) {
|
||||
vector<T> a(static_cast<size_t>(state.range(0)), T{'a'});
|
||||
vector<T> b(static_cast<size_t>(state.range(0)), T{'b'});
|
||||
vector<T, Alloc<T>> a(static_cast<size_t>(state.range(0)), T{'a'});
|
||||
vector<T, Alloc<T>> b(static_cast<size_t>(state.range(0)), T{'b'});
|
||||
|
||||
for (auto _ : state) {
|
||||
swap_ranges(a.begin(), a.end(), b.begin());
|
||||
|
@ -35,18 +39,41 @@ void std_swap_ranges(benchmark::State& state) {
|
|||
}
|
||||
}
|
||||
|
||||
BENCHMARK(std_swap<1, uint8_t>);
|
||||
BENCHMARK(std_swap<5, uint8_t>);
|
||||
BENCHMARK(std_swap<15, uint8_t>);
|
||||
BENCHMARK(std_swap<26, uint8_t>);
|
||||
BENCHMARK(std_swap<38, uint8_t>);
|
||||
BENCHMARK(std_swap<60, uint8_t>);
|
||||
BENCHMARK(std_swap<125, uint8_t>);
|
||||
BENCHMARK(std_swap<800, uint8_t>);
|
||||
BENCHMARK(std_swap<3000, uint8_t>);
|
||||
BENCHMARK(std_swap<9000, uint8_t>);
|
||||
BENCHMARK(std_swap<1, uint8_t, highly_aligned>);
|
||||
BENCHMARK(std_swap<5, uint8_t, highly_aligned>);
|
||||
BENCHMARK(std_swap<15, uint8_t, highly_aligned>);
|
||||
BENCHMARK(std_swap<26, uint8_t, highly_aligned>);
|
||||
BENCHMARK(std_swap<38, uint8_t, highly_aligned>);
|
||||
BENCHMARK(std_swap<60, uint8_t, highly_aligned>);
|
||||
BENCHMARK(std_swap<125, uint8_t, highly_aligned>);
|
||||
BENCHMARK(std_swap<800, uint8_t, highly_aligned>);
|
||||
BENCHMARK(std_swap<3000, uint8_t, highly_aligned>);
|
||||
BENCHMARK(std_swap<9000, uint8_t, highly_aligned>);
|
||||
|
||||
BENCHMARK(std_swap_ranges<uint8_t>)
|
||||
BENCHMARK(std_swap<1, uint8_t, not_highly_aligned>);
|
||||
BENCHMARK(std_swap<5, uint8_t, not_highly_aligned>);
|
||||
BENCHMARK(std_swap<15, uint8_t, not_highly_aligned>);
|
||||
BENCHMARK(std_swap<26, uint8_t, not_highly_aligned>);
|
||||
BENCHMARK(std_swap<38, uint8_t, not_highly_aligned>);
|
||||
BENCHMARK(std_swap<60, uint8_t, not_highly_aligned>);
|
||||
BENCHMARK(std_swap<125, uint8_t, not_highly_aligned>);
|
||||
BENCHMARK(std_swap<800, uint8_t, not_highly_aligned>);
|
||||
BENCHMARK(std_swap<3000, uint8_t, not_highly_aligned>);
|
||||
BENCHMARK(std_swap<9000, uint8_t, not_highly_aligned>);
|
||||
|
||||
BENCHMARK(std_swap_ranges<uint8_t, highly_aligned_allocator>)
|
||||
->Arg(1)
|
||||
->Arg(5)
|
||||
->Arg(15)
|
||||
->Arg(26)
|
||||
->Arg(38)
|
||||
->Arg(60)
|
||||
->Arg(125)
|
||||
->Arg(800)
|
||||
->Arg(3000)
|
||||
->Arg(9000);
|
||||
|
||||
BENCHMARK(std_swap_ranges<uint8_t, not_highly_aligned_allocator>)
|
||||
->Arg(1)
|
||||
->Arg(5)
|
||||
->Arg(15)
|
||||
|
|
Загрузка…
Ссылка в новой задаче