diff --git a/src/snmalloc/global/memcpy.h b/src/snmalloc/global/memcpy.h index 3054484d..2e7440cc 100644 --- a/src/snmalloc/global/memcpy.h +++ b/src/snmalloc/global/memcpy.h @@ -42,6 +42,16 @@ namespace snmalloc } } + template + SNMALLOC_FAST_PATH_INLINE void + block_reverse_copy(void* dst, const void* src, size_t len) + { + for (size_t i = (len - 1); int64_t(i + Size) >= 0; i -= Size) + { + copy_one(pointer_offset(dst, i), pointer_offset(src, i)); + } + } + /** * Perform an overlapping copy of the end. This will copy one (potentially * unaligned) `T` from the end of the source to the end of the destination. @@ -459,4 +469,42 @@ namespace snmalloc Arch::copy(dst, src, len); return orig_dst; } + + template< + bool Checked, + bool ReadsChecked = CheckReads, + typename Arch = DefaultArch> + SNMALLOC_FAST_PATH_INLINE void* + memmove(void* dst, const void* src, size_t len) + { + auto orig_dst = dst; + // we don't need to do external + // pointer checks if we hit it. It's also the fastest case, to encourage + // the compiler to favour the other cases. + if (SNMALLOC_UNLIKELY(len == 0 || dst == src)) + { + return dst; + } + + // Check the bounds of the arguments. + if (SNMALLOC_UNLIKELY(!check_bounds<(Checked && ReadsChecked)>(src, len))) + return report_fatal_bounds_error( + src, len, "memmove with source out of bounds of heap allocation"); + if (SNMALLOC_UNLIKELY(!check_bounds(dst, len))) + return report_fatal_bounds_error( + dst, len, "memmove with destination out of bounds of heap allocation"); + + if ((address_cast(dst) - address_cast(src)) < len) + { + // slow 'safe' reverse copy, we avoid optimised rollouts + // to cope with typical memmove use cases, moving + // one element to another address within the same + // contiguous space. + block_reverse_copy<1>(dst, src, len); + return dst; + } + + Arch::copy(dst, src, len); + return orig_dst; + } } // namespace snmalloc diff --git a/src/snmalloc/override/memcpy.cc b/src/snmalloc/override/memcpy.cc index c6053ae0..de26263a 100644 --- a/src/snmalloc/override/memcpy.cc +++ b/src/snmalloc/override/memcpy.cc @@ -10,4 +10,13 @@ extern "C" { return snmalloc::memcpy(dst, src, len); } + + /** + * Snmalloc checked memmove. + */ + SNMALLOC_EXPORT void* + SNMALLOC_NAME_MANGLE(memmove)(void* dst, const void* src, size_t len) + { + return snmalloc::memmove(dst, src, len); + } } diff --git a/src/test/func/memcpy/func-memcpy.cc b/src/test/func/memcpy/func-memcpy.cc index f435b457..bf04682a 100644 --- a/src/test/func/memcpy/func-memcpy.cc +++ b/src/test/func/memcpy/func-memcpy.cc @@ -68,15 +68,24 @@ extern "C" void abort() * fills one with a well-known pattern, and then copies subsets of this at * one-byte increments to a target. This gives us unaligned starts. */ +template void check_size(size_t size) { - START_TEST("checking {}-byte memcpy", size); + if constexpr (!overlap) + { + START_TEST("checking {}-byte memcpy", size); + } + else + { + START_TEST("checking {}-byte memmove", size); + } auto* s = static_cast(my_malloc(size + 1)); auto* d = static_cast(my_malloc(size + 1)); d[size] = 0; s[size] = 255; for (size_t start = 0; start < size; start++) { + void* ret; unsigned char* src = s + start; unsigned char* dst = d + start; size_t sz = (size - start); @@ -88,7 +97,14 @@ void check_size(size_t size) { dst[i] = 0; } - void* ret = my_memcpy(dst, src, sz); + if constexpr (!overlap) + { + ret = my_memcpy(dst, src, sz); + } + else + { + ret = my_memmove(dst, src, sz); + } EXPECT(ret == dst, "Return value should be {}, was {}", dst, ret); for (size_t i = 0; i < sz; ++i) { @@ -147,6 +163,50 @@ void check_bounds(size_t size, size_t out_of_bounds) my_free(d); } +void check_overlaps1() +{ + size_t size = 16; + START_TEST("memmove overlaps1"); + auto* s = static_cast(my_malloc(size * sizeof(unsigned int))); + for (size_t i = 0; i < size; ++i) + { + s[i] = static_cast(i); + } + my_memmove(&s[2], &s[4], sizeof(s[0])); + EXPECT(s[2] == s[4], "overlap error: {} {}", s[2], s[4]); + my_memmove(&s[15], &s[5], sizeof(s[0])); + EXPECT(s[15] == s[5], "overlap error: {} {}", s[15], s[5]); + auto ptr = s; + my_memmove(ptr, s, size * sizeof(s[0])); + EXPECT(ptr == s, "overlap error: {} {}", ptr, s); + my_free(s); +} + +template +void check_overlaps2(size_t size) +{ + START_TEST("memmove overlaps2, size {}", size); + auto sz = size / 2; + auto offset = size / 2; + auto* s = static_cast(my_malloc(size * sizeof(unsigned int))); + for (size_t i = 0; i < size; ++i) + { + s[i] = static_cast(i); + } + auto dst = after ? s + offset : s; + auto src = after ? s : s + offset; + size_t i = after ? 0 : offset; + size_t u = 0; + my_memmove(dst, src, sz * sizeof(unsigned int)); + while (u < sz) + { + EXPECT(dst[u] == i, "overlap error: {} {}", dst[u], i); + u++; + i++; + } + my_free(s); +} + int main() { // Skip the checks that expect bounds checks to fail when we are not the @@ -175,7 +235,20 @@ int main() # endif for (size_t x = 0; x < 2048; x++) { - check_size(x); + check_size(x); + } + + for (size_t x = 0; x < 2048; x++) + { + check_size(x); + } + + check_overlaps1(); + + for (size_t x = 8; x < 256; x += 64) + { + check_overlaps2(x); + check_overlaps2(x); } } #endif