custom memmove implementation proposal. (#593)

mostly like memcpy with optional bound checking but
capable of handling overlapping cases thus using
reverse copy instead.
This commit is contained in:
David CARLIER 2024-11-16 07:39:38 +00:00 коммит произвёл GitHub
Родитель f7fe702f77
Коммит 01885f5a04
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
3 изменённых файлов: 133 добавлений и 3 удалений

Просмотреть файл

@ -42,6 +42,16 @@ namespace snmalloc
}
}
template<size_t Size, size_t PrefetchOffset = 0>
SNMALLOC_FAST_PATH_INLINE void
block_reverse_copy(void* dst, const void* src, size_t len)
{
for (size_t i = (len - 1); int64_t(i + Size) >= 0; i -= Size)
{
copy_one<Size>(pointer_offset(dst, i), pointer_offset(src, i));
}
}
/**
* Perform an overlapping copy of the end. This will copy one (potentially
* unaligned) `T` from the end of the source to the end of the destination.
@ -459,4 +469,42 @@ namespace snmalloc
Arch::copy(dst, src, len);
return orig_dst;
}
template<
bool Checked,
bool ReadsChecked = CheckReads,
typename Arch = DefaultArch>
SNMALLOC_FAST_PATH_INLINE void*
memmove(void* dst, const void* src, size_t len)
{
auto orig_dst = dst;
// we don't need to do external
// pointer checks if we hit it. It's also the fastest case, to encourage
// the compiler to favour the other cases.
if (SNMALLOC_UNLIKELY(len == 0 || dst == src))
{
return dst;
}
// Check the bounds of the arguments.
if (SNMALLOC_UNLIKELY(!check_bounds<(Checked && ReadsChecked)>(src, len)))
return report_fatal_bounds_error(
src, len, "memmove with source out of bounds of heap allocation");
if (SNMALLOC_UNLIKELY(!check_bounds<Checked>(dst, len)))
return report_fatal_bounds_error(
dst, len, "memmove with destination out of bounds of heap allocation");
if ((address_cast(dst) - address_cast(src)) < len)
{
// slow 'safe' reverse copy, we avoid optimised rollouts
// to cope with typical memmove use cases, moving
// one element to another address within the same
// contiguous space.
block_reverse_copy<1>(dst, src, len);
return dst;
}
Arch::copy(dst, src, len);
return orig_dst;
}
} // namespace snmalloc

Просмотреть файл

@ -10,4 +10,13 @@ extern "C"
{
return snmalloc::memcpy<true>(dst, src, len);
}
/**
* Snmalloc checked memmove.
*/
SNMALLOC_EXPORT void*
SNMALLOC_NAME_MANGLE(memmove)(void* dst, const void* src, size_t len)
{
return snmalloc::memmove<true>(dst, src, len);
}
}

Просмотреть файл

@ -68,15 +68,24 @@ extern "C" void abort()
* fills one with a well-known pattern, and then copies subsets of this at
* one-byte increments to a target. This gives us unaligned starts.
*/
template<bool overlap>
void check_size(size_t size)
{
START_TEST("checking {}-byte memcpy", size);
if constexpr (!overlap)
{
START_TEST("checking {}-byte memcpy", size);
}
else
{
START_TEST("checking {}-byte memmove", size);
}
auto* s = static_cast<unsigned char*>(my_malloc(size + 1));
auto* d = static_cast<unsigned char*>(my_malloc(size + 1));
d[size] = 0;
s[size] = 255;
for (size_t start = 0; start < size; start++)
{
void* ret;
unsigned char* src = s + start;
unsigned char* dst = d + start;
size_t sz = (size - start);
@ -88,7 +97,14 @@ void check_size(size_t size)
{
dst[i] = 0;
}
void* ret = my_memcpy(dst, src, sz);
if constexpr (!overlap)
{
ret = my_memcpy(dst, src, sz);
}
else
{
ret = my_memmove(dst, src, sz);
}
EXPECT(ret == dst, "Return value should be {}, was {}", dst, ret);
for (size_t i = 0; i < sz; ++i)
{
@ -147,6 +163,50 @@ void check_bounds(size_t size, size_t out_of_bounds)
my_free(d);
}
void check_overlaps1()
{
size_t size = 16;
START_TEST("memmove overlaps1");
auto* s = static_cast<unsigned int*>(my_malloc(size * sizeof(unsigned int)));
for (size_t i = 0; i < size; ++i)
{
s[i] = static_cast<unsigned int>(i);
}
my_memmove(&s[2], &s[4], sizeof(s[0]));
EXPECT(s[2] == s[4], "overlap error: {} {}", s[2], s[4]);
my_memmove(&s[15], &s[5], sizeof(s[0]));
EXPECT(s[15] == s[5], "overlap error: {} {}", s[15], s[5]);
auto ptr = s;
my_memmove(ptr, s, size * sizeof(s[0]));
EXPECT(ptr == s, "overlap error: {} {}", ptr, s);
my_free(s);
}
template<bool after>
void check_overlaps2(size_t size)
{
START_TEST("memmove overlaps2, size {}", size);
auto sz = size / 2;
auto offset = size / 2;
auto* s = static_cast<unsigned int*>(my_malloc(size * sizeof(unsigned int)));
for (size_t i = 0; i < size; ++i)
{
s[i] = static_cast<unsigned int>(i);
}
auto dst = after ? s + offset : s;
auto src = after ? s : s + offset;
size_t i = after ? 0 : offset;
size_t u = 0;
my_memmove(dst, src, sz * sizeof(unsigned int));
while (u < sz)
{
EXPECT(dst[u] == i, "overlap error: {} {}", dst[u], i);
u++;
i++;
}
my_free(s);
}
int main()
{
// Skip the checks that expect bounds checks to fail when we are not the
@ -175,7 +235,20 @@ int main()
# endif
for (size_t x = 0; x < 2048; x++)
{
check_size(x);
check_size<false>(x);
}
for (size_t x = 0; x < 2048; x++)
{
check_size<true>(x);
}
check_overlaps1();
for (size_t x = 8; x < 256; x += 64)
{
check_overlaps2<false>(x);
check_overlaps2<true>(x);
}
}
#endif