WSL2-Linux-Kernel/lib/xxhash.c

/*
 * xxHash - Extremely Fast Hash algorithm
 * Copyright (C) 2012-2016, Yann Collet.
 *
 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 *   * Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *   * Redistributions in binary form must reproduce the above
 *     copyright notice, this list of conditions and the following disclaimer
 *     in the documentation and/or other materials provided with the
 *     distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * This program is free software; you can redistribute it and/or modify it under
 * the terms of the GNU General Public License version 2 as published by the
 * Free Software Foundation. This program is dual-licensed; you may select
 * either version 2 of the GNU General Public License ("GPL") or BSD license
 * ("BSD").
 *
 * You can contact the author at:
 * - xxHash homepage: https://cyan4973.github.io/xxHash/
 * - xxHash source repository: https://github.com/Cyan4973/xxHash
 */

#include <asm/unaligned.h>
#include <linux/errno.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/xxhash.h>

/*-*************************************
 * Macros
 **************************************/
#define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r)))
#define xxh_rotl64(x, r) ((x << r) | (x >> (64 - r)))

#ifdef __LITTLE_ENDIAN
# define XXH_CPU_LITTLE_ENDIAN 1
#else
# define XXH_CPU_LITTLE_ENDIAN 0
#endif

/*-*************************************
 * Constants
 **************************************/
static const uint32_t PRIME32_1 = 2654435761U;
static const uint32_t PRIME32_2 = 2246822519U;
static const uint32_t PRIME32_3 = 3266489917U;
static const uint32_t PRIME32_4 =  668265263U;
static const uint32_t PRIME32_5 =  374761393U;

static const uint64_t PRIME64_1 = 11400714785074694791ULL;
static const uint64_t PRIME64_2 = 14029467366897019727ULL;
static const uint64_t PRIME64_3 =  1609587929392839161ULL;
static const uint64_t PRIME64_4 =  9650029242287828579ULL;
static const uint64_t PRIME64_5 =  2870177450012600261ULL;

/*-**************************
 *  Utils
 ***************************/
void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src)
{
	memcpy(dst, src, sizeof(*dst));
}
EXPORT_SYMBOL(xxh32_copy_state);

void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src)
{
	memcpy(dst, src, sizeof(*dst));
}
EXPORT_SYMBOL(xxh64_copy_state);

/*-***************************
 * Simple Hash Functions
 ****************************/
static uint32_t xxh32_round(uint32_t seed, const uint32_t input)
{
	seed += input * PRIME32_2;
	seed = xxh_rotl32(seed, 13);
	seed *= PRIME32_1;
	return seed;
}

uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)
{
	const uint8_t *p = (const uint8_t *)input;
	const uint8_t *b_end = p + len;
	uint32_t h32;

	if (len >= 16) {
		const uint8_t *const limit = b_end - 16;
		uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
		uint32_t v2 = seed + PRIME32_2;
		uint32_t v3 = seed + 0;
		uint32_t v4 = seed - PRIME32_1;

		do {
			v1 = xxh32_round(v1, get_unaligned_le32(p));
			p += 4;
			v2 = xxh32_round(v2, get_unaligned_le32(p));
			p += 4;
			v3 = xxh32_round(v3, get_unaligned_le32(p));
			p += 4;
			v4 = xxh32_round(v4, get_unaligned_le32(p));
			p += 4;
		} while (p <= limit);

		h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) +
			xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18);
	} else {
		h32 = seed + PRIME32_5;
	}

	h32 += (uint32_t)len;

	while (p + 4 <= b_end) {
		h32 += get_unaligned_le32(p) * PRIME32_3;
		h32 = xxh_rotl32(h32, 17) * PRIME32_4;
		p += 4;
	}

	while (p < b_end) {
		h32 += (*p) * PRIME32_5;
		h32 = xxh_rotl32(h32, 11) * PRIME32_1;
		p++;
	}

	h32 ^= h32 >> 15;
	h32 *= PRIME32_2;
	h32 ^= h32 >> 13;
	h32 *= PRIME32_3;
	h32 ^= h32 >> 16;

	return h32;
}
EXPORT_SYMBOL(xxh32);

static uint64_t xxh64_round(uint64_t acc, const uint64_t input)
{
	acc += input * PRIME64_2;
	acc = xxh_rotl64(acc, 31);
	acc *= PRIME64_1;
	return acc;
}

static uint64_t xxh64_merge_round(uint64_t acc, uint64_t val)
{
	val = xxh64_round(0, val);
	acc ^= val;
	acc = acc * PRIME64_1 + PRIME64_4;
	return acc;
}

uint64_t xxh64(const void *input, const size_t len, const uint64_t seed)
{
	const uint8_t *p = (const uint8_t *)input;
	const uint8_t *const b_end = p + len;
	uint64_t h64;

	if (len >= 32) {
		const uint8_t *const limit = b_end - 32;
		uint64_t v1 = seed + PRIME64_1 + PRIME64_2;
		uint64_t v2 = seed + PRIME64_2;
		uint64_t v3 = seed + 0;
		uint64_t v4 = seed - PRIME64_1;

		do {
			v1 = xxh64_round(v1, get_unaligned_le64(p));
			p += 8;
			v2 = xxh64_round(v2, get_unaligned_le64(p));
			p += 8;
			v3 = xxh64_round(v3, get_unaligned_le64(p));
			p += 8;
			v4 = xxh64_round(v4, get_unaligned_le64(p));
			p += 8;
		} while (p <= limit);

		h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
			xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
		h64 = xxh64_merge_round(h64, v1);
		h64 = xxh64_merge_round(h64, v2);
		h64 = xxh64_merge_round(h64, v3);
		h64 = xxh64_merge_round(h64, v4);

	} else {
		h64  = seed + PRIME64_5;
	}

	h64 += (uint64_t)len;

	while (p + 8 <= b_end) {
		const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));

		h64 ^= k1;
		h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
		p += 8;
	}

	if (p + 4 <= b_end) {
		h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
		h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
		p += 4;
	}

	while (p < b_end) {
		h64 ^= (*p) * PRIME64_5;
		h64 = xxh_rotl64(h64, 11) * PRIME64_1;
		p++;
	}

	h64 ^= h64 >> 33;
	h64 *= PRIME64_2;
	h64 ^= h64 >> 29;
	h64 *= PRIME64_3;
	h64 ^= h64 >> 32;

	return h64;
}
EXPORT_SYMBOL(xxh64);

/*-**************************************************
 * Advanced Hash Functions
 ***************************************************/
void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed)
{
	/* use a local state for memcpy() to avoid strict-aliasing warnings */
	struct xxh32_state state;

	memset(&state, 0, sizeof(state));
	state.v1 = seed + PRIME32_1 + PRIME32_2;
	state.v2 = seed + PRIME32_2;
	state.v3 = seed + 0;
	state.v4 = seed - PRIME32_1;
	memcpy(statePtr, &state, sizeof(state));
}
EXPORT_SYMBOL(xxh32_reset);

void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed)
{
	/* use a local state for memcpy() to avoid strict-aliasing warnings */
	struct xxh64_state state;

	memset(&state, 0, sizeof(state));
	state.v1 = seed + PRIME64_1 + PRIME64_2;
	state.v2 = seed + PRIME64_2;
	state.v3 = seed + 0;
	state.v4 = seed - PRIME64_1;
	memcpy(statePtr, &state, sizeof(state));
}
EXPORT_SYMBOL(xxh64_reset);

int xxh32_update(struct xxh32_state *state, const void *input, const size_t len)
{
	const uint8_t *p = (const uint8_t *)input;
	const uint8_t *const b_end = p + len;

	if (input == NULL)
		return -EINVAL;

	state->total_len_32 += (uint32_t)len;
	state->large_len |= (len >= 16) | (state->total_len_32 >= 16);

	if (state->memsize + len < 16) { /* fill in tmp buffer */
		memcpy((uint8_t *)(state->mem32) + state->memsize, input, len);
		state->memsize += (uint32_t)len;
		return 0;
	}

	if (state->memsize) { /* some data left from previous update */
		const uint32_t *p32 = state->mem32;

		memcpy((uint8_t *)(state->mem32) + state->memsize, input,
			16 - state->memsize);

		state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32));
		p32++;
		state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32));
		p32++;
		state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32));
		p32++;
		state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32));
		p32++;

		p += 16-state->memsize;
		state->memsize = 0;
	}

	if (p <= b_end - 16) {
		const uint8_t *const limit = b_end - 16;
		uint32_t v1 = state->v1;
		uint32_t v2 = state->v2;
		uint32_t v3 = state->v3;
		uint32_t v4 = state->v4;

		do {
			v1 = xxh32_round(v1, get_unaligned_le32(p));
			p += 4;
			v2 = xxh32_round(v2, get_unaligned_le32(p));
			p += 4;
			v3 = xxh32_round(v3, get_unaligned_le32(p));
			p += 4;
			v4 = xxh32_round(v4, get_unaligned_le32(p));
			p += 4;
		} while (p <= limit);

		state->v1 = v1;
		state->v2 = v2;
		state->v3 = v3;
		state->v4 = v4;
	}

	if (p < b_end) {
		memcpy(state->mem32, p, (size_t)(b_end-p));
		state->memsize = (uint32_t)(b_end-p);
	}

	return 0;
}
EXPORT_SYMBOL(xxh32_update);

uint32_t xxh32_digest(const struct xxh32_state *state)
{
	const uint8_t *p = (const uint8_t *)state->mem32;
	const uint8_t *const b_end = (const uint8_t *)(state->mem32) +
		state->memsize;
	uint32_t h32;

	if (state->large_len) {
		h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) +
			xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18);
	} else {
		h32 = state->v3 /* == seed */ + PRIME32_5;
	}

	h32 += state->total_len_32;

	while (p + 4 <= b_end) {
		h32 += get_unaligned_le32(p) * PRIME32_3;
		h32 = xxh_rotl32(h32, 17) * PRIME32_4;
		p += 4;
	}

	while (p < b_end) {
		h32 += (*p) * PRIME32_5;
		h32 = xxh_rotl32(h32, 11) * PRIME32_1;
		p++;
	}

	h32 ^= h32 >> 15;
	h32 *= PRIME32_2;
	h32 ^= h32 >> 13;
	h32 *= PRIME32_3;
	h32 ^= h32 >> 16;

	return h32;
}
EXPORT_SYMBOL(xxh32_digest);

int xxh64_update(struct xxh64_state *state, const void *input, const size_t len)
{
	const uint8_t *p = (const uint8_t *)input;
	const uint8_t *const b_end = p + len;

	if (input == NULL)
		return -EINVAL;

	state->total_len += len;

	if (state->memsize + len < 32) { /* fill in tmp buffer */
		memcpy(((uint8_t *)state->mem64) + state->memsize, input, len);
		state->memsize += (uint32_t)len;
		return 0;
	}

	if (state->memsize) { /* tmp buffer is full */
		uint64_t *p64 = state->mem64;

		memcpy(((uint8_t *)p64) + state->memsize, input,
			32 - state->memsize);

		state->v1 = xxh64_round(state->v1, get_unaligned_le64(p64));
		p64++;
		state->v2 = xxh64_round(state->v2, get_unaligned_le64(p64));
		p64++;
		state->v3 = xxh64_round(state->v3, get_unaligned_le64(p64));
		p64++;
		state->v4 = xxh64_round(state->v4, get_unaligned_le64(p64));

		p += 32 - state->memsize;
		state->memsize = 0;
	}

	if (p + 32 <= b_end) {
		const uint8_t *const limit = b_end - 32;
		uint64_t v1 = state->v1;
		uint64_t v2 = state->v2;
		uint64_t v3 = state->v3;
		uint64_t v4 = state->v4;

		do {
			v1 = xxh64_round(v1, get_unaligned_le64(p));
			p += 8;
			v2 = xxh64_round(v2, get_unaligned_le64(p));
			p += 8;
			v3 = xxh64_round(v3, get_unaligned_le64(p));
			p += 8;
			v4 = xxh64_round(v4, get_unaligned_le64(p));
			p += 8;
		} while (p <= limit);

		state->v1 = v1;
		state->v2 = v2;
		state->v3 = v3;
		state->v4 = v4;
	}

	if (p < b_end) {
		memcpy(state->mem64, p, (size_t)(b_end-p));
		state->memsize = (uint32_t)(b_end - p);
	}

	return 0;
}
EXPORT_SYMBOL(xxh64_update);

uint64_t xxh64_digest(const struct xxh64_state *state)
{
	const uint8_t *p = (const uint8_t *)state->mem64;
	const uint8_t *const b_end = (const uint8_t *)state->mem64 +
		state->memsize;
	uint64_t h64;

	if (state->total_len >= 32) {
		const uint64_t v1 = state->v1;
		const uint64_t v2 = state->v2;
		const uint64_t v3 = state->v3;
		const uint64_t v4 = state->v4;

		h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
			xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
		h64 = xxh64_merge_round(h64, v1);
		h64 = xxh64_merge_round(h64, v2);
		h64 = xxh64_merge_round(h64, v3);
		h64 = xxh64_merge_round(h64, v4);
	} else {
		h64  = state->v3 + PRIME64_5;
	}

	h64 += (uint64_t)state->total_len;

	while (p + 8 <= b_end) {
		const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));

		h64 ^= k1;
		h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
		p += 8;
	}

	if (p + 4 <= b_end) {
		h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
		h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
		p += 4;
	}

	while (p < b_end) {
		h64 ^= (*p) * PRIME64_5;
		h64 = xxh_rotl64(h64, 11) * PRIME64_1;
		p++;
	}

	h64 ^= h64 >> 33;
	h64 *= PRIME64_2;
	h64 ^= h64 >> 29;
	h64 *= PRIME64_3;
	h64 ^= h64 >> 32;

	return h64;
}
EXPORT_SYMBOL(xxh64_digest);

MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("xxHash");
lib: Add xxhash module Adds xxhash kernel module with xxh32 and xxh64 hashes. xxhash is an extremely fast non-cryptographic hash algorithm for checksumming. The zstd compression and decompression modules added in the next patch require xxhash. I extracted it out from zstd since it is useful on its own. I copied the code from the upstream XXHash source repository and translated it into kernel style. I ran benchmarks and tests in the kernel and tests in userland. I benchmarked xxhash as a special character device. I ran in four modes, no-op, xxh32, xxh64, and crc32. The no-op mode simply copies the data to kernel space and ignores it. The xxh32, xxh64, and crc32 modes compute hashes on the copied data. I also ran it with four different buffer sizes. The benchmark file is located in the upstream zstd source repository under `contrib/linux-kernel/xxhash_test.c` [1]. I ran the benchmarks on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM. The VM is running on a MacBook Pro with a 3.1 GHz Intel Core i7 processor, 16 GB of RAM, and a SSD. I benchmarked using the file `filesystem.squashfs` from `ubuntu-16.10-desktop-amd64.iso`, which is 1,536,217,088 B large. Run the following commands for the benchmark: modprobe xxhash_test mknod xxhash_test c 245 0 time cp filesystem.squashfs xxhash_test The time is reported by the time of the userland `cp`. The GB/s is computed with 1,536,217,008 B / time(buffer size, hash) which includes the time to copy from userland. The Normalized GB/s is computed with 1,536,217,088 B / (time(buffer size, hash) - time(buffer size, none)). \| Buffer Size (B) \| Hash \| Time (s) \| GB/s \| Adjusted GB/s \| \|-----------------\|-------\|----------\|------\|---------------\| \| 1024 \| none \| 0.408 \| 3.77 \| - \| \| 1024 \| xxh32 \| 0.649 \| 2.37 \| 6.37 \| \| 1024 \| xxh64 \| 0.542 \| 2.83 \| 11.46 \| \| 1024 \| crc32 \| 1.290 \| 1.19 \| 1.74 \| \| 4096 \| none \| 0.380 \| 4.04 \| - \| \| 4096 \| xxh32 \| 0.645 \| 2.38 \| 5.79 \| \| 4096 \| xxh64 \| 0.500 \| 3.07 \| 12.80 \| \| 4096 \| crc32 \| 1.168 \| 1.32 \| 1.95 \| \| 8192 \| none \| 0.351 \| 4.38 \| - \| \| 8192 \| xxh32 \| 0.614 \| 2.50 \| 5.84 \| \| 8192 \| xxh64 \| 0.464 \| 3.31 \| 13.60 \| \| 8192 \| crc32 \| 1.163 \| 1.32 \| 1.89 \| \| 16384 \| none \| 0.346 \| 4.43 \| - \| \| 16384 \| xxh32 \| 0.590 \| 2.60 \| 6.30 \| \| 16384 \| xxh64 \| 0.466 \| 3.30 \| 12.80 \| \| 16384 \| crc32 \| 1.183 \| 1.30 \| 1.84 \| Tested in userland using the test-suite in the zstd repo under `contrib/linux-kernel/test/XXHashUserlandTest.cpp` [2] by mocking the kernel functions. A line in each branch of every function in `xxhash.c` was commented out to ensure that the test-suite fails. Additionally tested while testing zstd and with SMHasher [3]. [1] https://phabricator.intern.facebook.com/P57526246 [2] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/test/XXHashUserlandTest.cpp [3] https://github.com/aappleby/smhasher zstd source repository: https://github.com/facebook/zstd XXHash source repository: https://github.com/cyan4973/xxhash Signed-off-by: Nick Terrell <terrelln@fb.com> Signed-off-by: Chris Mason <clm@fb.com> 2017-08-04 23:19:17 +03:00			`/*`
			`* xxHash - Extremely Fast Hash algorithm`
			`* Copyright (C) 2012-2016, Yann Collet.`
			`*`
			`* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)`
			`*`
			`* Redistribution and use in source and binary forms, with or without`
			`* modification, are permitted provided that the following conditions are`
			`* met:`
			`*`
			`* * Redistributions of source code must retain the above copyright`
			`* notice, this list of conditions and the following disclaimer.`
			`* * Redistributions in binary form must reproduce the above`
			`* copyright notice, this list of conditions and the following disclaimer`
			`* in the documentation and/or other materials provided with the`
			`* distribution.`
			`*`
			`* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS`
			`* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT`
			`* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR`
			`* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT`
			`* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,`
			`* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT`
			`* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,`
			`* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY`
			`* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT`
			`* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE`
			`* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
			`*`
			`* This program is free software; you can redistribute it and/or modify it under`
			`* the terms of the GNU General Public License version 2 as published by the`
			`* Free Software Foundation. This program is dual-licensed; you may select`
			`* either version 2 of the GNU General Public License ("GPL") or BSD license`
			`* ("BSD").`
			`*`
			`* You can contact the author at:`
lib/: replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Signed-off-by: Alexander A. Klimov <grandmaster@al2klimov.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Acked-by: Coly Li <colyli@suse.de> [crc64.c] Link: http://lkml.kernel.org/r/20200726112154.16510-1-grandmaster@al2klimov.de Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2020-08-12 04:34:50 +03:00			`* - xxHash homepage: https://cyan4973.github.io/xxHash/`
lib: Add xxhash module Adds xxhash kernel module with xxh32 and xxh64 hashes. xxhash is an extremely fast non-cryptographic hash algorithm for checksumming. The zstd compression and decompression modules added in the next patch require xxhash. I extracted it out from zstd since it is useful on its own. I copied the code from the upstream XXHash source repository and translated it into kernel style. I ran benchmarks and tests in the kernel and tests in userland. I benchmarked xxhash as a special character device. I ran in four modes, no-op, xxh32, xxh64, and crc32. The no-op mode simply copies the data to kernel space and ignores it. The xxh32, xxh64, and crc32 modes compute hashes on the copied data. I also ran it with four different buffer sizes. The benchmark file is located in the upstream zstd source repository under `contrib/linux-kernel/xxhash_test.c` [1]. I ran the benchmarks on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM. The VM is running on a MacBook Pro with a 3.1 GHz Intel Core i7 processor, 16 GB of RAM, and a SSD. I benchmarked using the file `filesystem.squashfs` from `ubuntu-16.10-desktop-amd64.iso`, which is 1,536,217,088 B large. Run the following commands for the benchmark: modprobe xxhash_test mknod xxhash_test c 245 0 time cp filesystem.squashfs xxhash_test The time is reported by the time of the userland `cp`. The GB/s is computed with 1,536,217,008 B / time(buffer size, hash) which includes the time to copy from userland. The Normalized GB/s is computed with 1,536,217,088 B / (time(buffer size, hash) - time(buffer size, none)). \| Buffer Size (B) \| Hash \| Time (s) \| GB/s \| Adjusted GB/s \| \|-----------------\|-------\|----------\|------\|---------------\| \| 1024 \| none \| 0.408 \| 3.77 \| - \| \| 1024 \| xxh32 \| 0.649 \| 2.37 \| 6.37 \| \| 1024 \| xxh64 \| 0.542 \| 2.83 \| 11.46 \| \| 1024 \| crc32 \| 1.290 \| 1.19 \| 1.74 \| \| 4096 \| none \| 0.380 \| 4.04 \| - \| \| 4096 \| xxh32 \| 0.645 \| 2.38 \| 5.79 \| \| 4096 \| xxh64 \| 0.500 \| 3.07 \| 12.80 \| \| 4096 \| crc32 \| 1.168 \| 1.32 \| 1.95 \| \| 8192 \| none \| 0.351 \| 4.38 \| - \| \| 8192 \| xxh32 \| 0.614 \| 2.50 \| 5.84 \| \| 8192 \| xxh64 \| 0.464 \| 3.31 \| 13.60 \| \| 8192 \| crc32 \| 1.163 \| 1.32 \| 1.89 \| \| 16384 \| none \| 0.346 \| 4.43 \| - \| \| 16384 \| xxh32 \| 0.590 \| 2.60 \| 6.30 \| \| 16384 \| xxh64 \| 0.466 \| 3.30 \| 12.80 \| \| 16384 \| crc32 \| 1.183 \| 1.30 \| 1.84 \| Tested in userland using the test-suite in the zstd repo under `contrib/linux-kernel/test/XXHashUserlandTest.cpp` [2] by mocking the kernel functions. A line in each branch of every function in `xxhash.c` was commented out to ensure that the test-suite fails. Additionally tested while testing zstd and with SMHasher [3]. [1] https://phabricator.intern.facebook.com/P57526246 [2] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/test/XXHashUserlandTest.cpp [3] https://github.com/aappleby/smhasher zstd source repository: https://github.com/facebook/zstd XXHash source repository: https://github.com/cyan4973/xxhash Signed-off-by: Nick Terrell <terrelln@fb.com> Signed-off-by: Chris Mason <clm@fb.com> 2017-08-04 23:19:17 +03:00			`* - xxHash source repository: https://github.com/Cyan4973/xxHash`
			`*/`

			`#include <asm/unaligned.h>`
			`#include <linux/errno.h>`
			`#include <linux/compiler.h>`
			`#include <linux/kernel.h>`
			`#include <linux/module.h>`
			`#include <linux/string.h>`
			`#include <linux/xxhash.h>`

			`/-************************************`
			`* Macros`
			`**************************************/`
			`#define xxh_rotl32(x, r) ((x << r) \| (x >> (32 - r)))`
			`#define xxh_rotl64(x, r) ((x << r) \| (x >> (64 - r)))`

			`#ifdef __LITTLE_ENDIAN`
			`# define XXH_CPU_LITTLE_ENDIAN 1`
			`#else`
			`# define XXH_CPU_LITTLE_ENDIAN 0`
			`#endif`

			`/-************************************`
			`* Constants`
			`**************************************/`
			`static const uint32_t PRIME32_1 = 2654435761U;`
			`static const uint32_t PRIME32_2 = 2246822519U;`
			`static const uint32_t PRIME32_3 = 3266489917U;`
			`static const uint32_t PRIME32_4 = 668265263U;`
			`static const uint32_t PRIME32_5 = 374761393U;`

			`static const uint64_t PRIME64_1 = 11400714785074694791ULL;`
			`static const uint64_t PRIME64_2 = 14029467366897019727ULL;`
			`static const uint64_t PRIME64_3 = 1609587929392839161ULL;`
			`static const uint64_t PRIME64_4 = 9650029242287828579ULL;`
			`static const uint64_t PRIME64_5 = 2870177450012600261ULL;`

			`/-*************************`
			`* Utils`
			`***************************/`
			`void xxh32_copy_state(struct xxh32_state dst, const struct xxh32_state src)`
			`{`
			`memcpy(dst, src, sizeof(*dst));`
			`}`
			`EXPORT_SYMBOL(xxh32_copy_state);`

			`void xxh64_copy_state(struct xxh64_state dst, const struct xxh64_state src)`
			`{`
			`memcpy(dst, src, sizeof(*dst));`
			`}`
			`EXPORT_SYMBOL(xxh64_copy_state);`

			`/-**************************`
			`* Simple Hash Functions`
			`****************************/`
			`static uint32_t xxh32_round(uint32_t seed, const uint32_t input)`
			`{`
			`seed += input * PRIME32_2;`
			`seed = xxh_rotl32(seed, 13);`
			`seed *= PRIME32_1;`
			`return seed;`
			`}`

			`uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)`
			`{`
			`const uint8_t p = (const uint8_t )input;`
			`const uint8_t *b_end = p + len;`
			`uint32_t h32;`

			`if (len >= 16) {`
			`const uint8_t *const limit = b_end - 16;`
			`uint32_t v1 = seed + PRIME32_1 + PRIME32_2;`
			`uint32_t v2 = seed + PRIME32_2;`
			`uint32_t v3 = seed + 0;`
			`uint32_t v4 = seed - PRIME32_1;`

			`do {`
			`v1 = xxh32_round(v1, get_unaligned_le32(p));`
			`p += 4;`
			`v2 = xxh32_round(v2, get_unaligned_le32(p));`
			`p += 4;`
			`v3 = xxh32_round(v3, get_unaligned_le32(p));`
			`p += 4;`
			`v4 = xxh32_round(v4, get_unaligned_le32(p));`
			`p += 4;`
			`} while (p <= limit);`

			`h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) +`
			`xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18);`
			`} else {`
			`h32 = seed + PRIME32_5;`
			`}`

			`h32 += (uint32_t)len;`

			`while (p + 4 <= b_end) {`
			`h32 += get_unaligned_le32(p) * PRIME32_3;`
			`h32 = xxh_rotl32(h32, 17) * PRIME32_4;`
			`p += 4;`
			`}`

			`while (p < b_end) {`
			`h32 += (p) PRIME32_5;`
			`h32 = xxh_rotl32(h32, 11) * PRIME32_1;`
			`p++;`
			`}`

			`h32 ^= h32 >> 15;`
			`h32 *= PRIME32_2;`
			`h32 ^= h32 >> 13;`
			`h32 *= PRIME32_3;`
			`h32 ^= h32 >> 16;`

			`return h32;`
			`}`
			`EXPORT_SYMBOL(xxh32);`

			`static uint64_t xxh64_round(uint64_t acc, const uint64_t input)`
			`{`
			`acc += input * PRIME64_2;`
			`acc = xxh_rotl64(acc, 31);`
			`acc *= PRIME64_1;`
			`return acc;`
			`}`

			`static uint64_t xxh64_merge_round(uint64_t acc, uint64_t val)`
			`{`
			`val = xxh64_round(0, val);`
			`acc ^= val;`
			`acc = acc * PRIME64_1 + PRIME64_4;`
			`return acc;`
			`}`

			`uint64_t xxh64(const void *input, const size_t len, const uint64_t seed)`
			`{`
			`const uint8_t p = (const uint8_t )input;`
			`const uint8_t *const b_end = p + len;`
			`uint64_t h64;`

			`if (len >= 32) {`
			`const uint8_t *const limit = b_end - 32;`
			`uint64_t v1 = seed + PRIME64_1 + PRIME64_2;`
			`uint64_t v2 = seed + PRIME64_2;`
			`uint64_t v3 = seed + 0;`
			`uint64_t v4 = seed - PRIME64_1;`

			`do {`
			`v1 = xxh64_round(v1, get_unaligned_le64(p));`
			`p += 8;`
			`v2 = xxh64_round(v2, get_unaligned_le64(p));`
			`p += 8;`
			`v3 = xxh64_round(v3, get_unaligned_le64(p));`
			`p += 8;`
			`v4 = xxh64_round(v4, get_unaligned_le64(p));`
			`p += 8;`
			`} while (p <= limit);`

			`h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +`
			`xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);`
			`h64 = xxh64_merge_round(h64, v1);`
			`h64 = xxh64_merge_round(h64, v2);`
			`h64 = xxh64_merge_round(h64, v3);`
			`h64 = xxh64_merge_round(h64, v4);`

			`} else {`
			`h64 = seed + PRIME64_5;`
			`}`

			`h64 += (uint64_t)len;`

			`while (p + 8 <= b_end) {`
			`const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));`

			`h64 ^= k1;`
			`h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;`
			`p += 8;`
			`}`

			`if (p + 4 <= b_end) {`
			`h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;`
			`h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;`
			`p += 4;`
			`}`

			`while (p < b_end) {`
			`h64 ^= (p) PRIME64_5;`
			`h64 = xxh_rotl64(h64, 11) * PRIME64_1;`
			`p++;`
			`}`

			`h64 ^= h64 >> 33;`
			`h64 *= PRIME64_2;`
			`h64 ^= h64 >> 29;`
			`h64 *= PRIME64_3;`
			`h64 ^= h64 >> 32;`

			`return h64;`
			`}`
			`EXPORT_SYMBOL(xxh64);`

			`/-*************************************************`
			`* Advanced Hash Functions`
			`***************************************************/`
			`void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed)`
			`{`
			`/* use a local state for memcpy() to avoid strict-aliasing warnings */`
			`struct xxh32_state state;`

			`memset(&state, 0, sizeof(state));`
			`state.v1 = seed + PRIME32_1 + PRIME32_2;`
			`state.v2 = seed + PRIME32_2;`
			`state.v3 = seed + 0;`
			`state.v4 = seed - PRIME32_1;`
			`memcpy(statePtr, &state, sizeof(state));`
			`}`
			`EXPORT_SYMBOL(xxh32_reset);`

			`void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed)`
			`{`
			`/* use a local state for memcpy() to avoid strict-aliasing warnings */`
			`struct xxh64_state state;`

			`memset(&state, 0, sizeof(state));`
			`state.v1 = seed + PRIME64_1 + PRIME64_2;`
			`state.v2 = seed + PRIME64_2;`
			`state.v3 = seed + 0;`
			`state.v4 = seed - PRIME64_1;`
			`memcpy(statePtr, &state, sizeof(state));`
			`}`
			`EXPORT_SYMBOL(xxh64_reset);`

			`int xxh32_update(struct xxh32_state state, const void input, const size_t len)`
			`{`
			`const uint8_t p = (const uint8_t )input;`
			`const uint8_t *const b_end = p + len;`

			`if (input == NULL)`
			`return -EINVAL;`

			`state->total_len_32 += (uint32_t)len;`
			`state->large_len \|= (len >= 16) \| (state->total_len_32 >= 16);`

			`if (state->memsize + len < 16) { /* fill in tmp buffer */`
			`memcpy((uint8_t *)(state->mem32) + state->memsize, input, len);`
			`state->memsize += (uint32_t)len;`
			`return 0;`
			`}`

			`if (state->memsize) { /* some data left from previous update */`
			`const uint32_t *p32 = state->mem32;`

			`memcpy((uint8_t *)(state->mem32) + state->memsize, input,`
			`16 - state->memsize);`

			`state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32));`
			`p32++;`
			`state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32));`
			`p32++;`
			`state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32));`
			`p32++;`
			`state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32));`
			`p32++;`

			`p += 16-state->memsize;`
			`state->memsize = 0;`
			`}`

			`if (p <= b_end - 16) {`
			`const uint8_t *const limit = b_end - 16;`
			`uint32_t v1 = state->v1;`
			`uint32_t v2 = state->v2;`
			`uint32_t v3 = state->v3;`
			`uint32_t v4 = state->v4;`

			`do {`
			`v1 = xxh32_round(v1, get_unaligned_le32(p));`
			`p += 4;`
			`v2 = xxh32_round(v2, get_unaligned_le32(p));`
			`p += 4;`
			`v3 = xxh32_round(v3, get_unaligned_le32(p));`
			`p += 4;`
			`v4 = xxh32_round(v4, get_unaligned_le32(p));`
			`p += 4;`
			`} while (p <= limit);`

			`state->v1 = v1;`
			`state->v2 = v2;`
			`state->v3 = v3;`
			`state->v4 = v4;`
			`}`

			`if (p < b_end) {`
			`memcpy(state->mem32, p, (size_t)(b_end-p));`
			`state->memsize = (uint32_t)(b_end-p);`
			`}`

			`return 0;`
			`}`
			`EXPORT_SYMBOL(xxh32_update);`

			`uint32_t xxh32_digest(const struct xxh32_state *state)`
			`{`
			`const uint8_t p = (const uint8_t )state->mem32;`
			`const uint8_t const b_end = (const uint8_t )(state->mem32) +`
			`state->memsize;`
			`uint32_t h32;`

			`if (state->large_len) {`
			`h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) +`
			`xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18);`
			`} else {`
			`h32 = state->v3 /* == seed */ + PRIME32_5;`
			`}`

			`h32 += state->total_len_32;`

			`while (p + 4 <= b_end) {`
			`h32 += get_unaligned_le32(p) * PRIME32_3;`
			`h32 = xxh_rotl32(h32, 17) * PRIME32_4;`
			`p += 4;`
			`}`

			`while (p < b_end) {`
			`h32 += (p) PRIME32_5;`
			`h32 = xxh_rotl32(h32, 11) * PRIME32_1;`
			`p++;`
			`}`

			`h32 ^= h32 >> 15;`
			`h32 *= PRIME32_2;`
			`h32 ^= h32 >> 13;`
			`h32 *= PRIME32_3;`
			`h32 ^= h32 >> 16;`

			`return h32;`
			`}`
			`EXPORT_SYMBOL(xxh32_digest);`

			`int xxh64_update(struct xxh64_state state, const void input, const size_t len)`
			`{`
			`const uint8_t p = (const uint8_t )input;`
			`const uint8_t *const b_end = p + len;`

			`if (input == NULL)`
			`return -EINVAL;`

			`state->total_len += len;`

			`if (state->memsize + len < 32) { /* fill in tmp buffer */`
			`memcpy(((uint8_t *)state->mem64) + state->memsize, input, len);`
			`state->memsize += (uint32_t)len;`
			`return 0;`
			`}`

			`if (state->memsize) { /* tmp buffer is full */`
			`uint64_t *p64 = state->mem64;`

			`memcpy(((uint8_t *)p64) + state->memsize, input,`
			`32 - state->memsize);`

			`state->v1 = xxh64_round(state->v1, get_unaligned_le64(p64));`
			`p64++;`
			`state->v2 = xxh64_round(state->v2, get_unaligned_le64(p64));`
			`p64++;`
			`state->v3 = xxh64_round(state->v3, get_unaligned_le64(p64));`
			`p64++;`
			`state->v4 = xxh64_round(state->v4, get_unaligned_le64(p64));`

			`p += 32 - state->memsize;`
			`state->memsize = 0;`
			`}`

			`if (p + 32 <= b_end) {`
			`const uint8_t *const limit = b_end - 32;`
			`uint64_t v1 = state->v1;`
			`uint64_t v2 = state->v2;`
			`uint64_t v3 = state->v3;`
			`uint64_t v4 = state->v4;`

			`do {`
			`v1 = xxh64_round(v1, get_unaligned_le64(p));`
			`p += 8;`
			`v2 = xxh64_round(v2, get_unaligned_le64(p));`
			`p += 8;`
			`v3 = xxh64_round(v3, get_unaligned_le64(p));`
			`p += 8;`
			`v4 = xxh64_round(v4, get_unaligned_le64(p));`
			`p += 8;`
			`} while (p <= limit);`

			`state->v1 = v1;`
			`state->v2 = v2;`
			`state->v3 = v3;`
			`state->v4 = v4;`
			`}`

			`if (p < b_end) {`
			`memcpy(state->mem64, p, (size_t)(b_end-p));`
			`state->memsize = (uint32_t)(b_end - p);`
			`}`

			`return 0;`
			`}`
			`EXPORT_SYMBOL(xxh64_update);`

			`uint64_t xxh64_digest(const struct xxh64_state *state)`
			`{`
			`const uint8_t p = (const uint8_t )state->mem64;`
			`const uint8_t const b_end = (const uint8_t )state->mem64 +`
			`state->memsize;`
			`uint64_t h64;`

			`if (state->total_len >= 32) {`
			`const uint64_t v1 = state->v1;`
			`const uint64_t v2 = state->v2;`
			`const uint64_t v3 = state->v3;`
			`const uint64_t v4 = state->v4;`

			`h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +`
			`xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);`
			`h64 = xxh64_merge_round(h64, v1);`
			`h64 = xxh64_merge_round(h64, v2);`
			`h64 = xxh64_merge_round(h64, v3);`
			`h64 = xxh64_merge_round(h64, v4);`
			`} else {`
			`h64 = state->v3 + PRIME64_5;`
			`}`

			`h64 += (uint64_t)state->total_len;`

			`while (p + 8 <= b_end) {`
			`const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));`

			`h64 ^= k1;`
			`h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;`
			`p += 8;`
			`}`

			`if (p + 4 <= b_end) {`
			`h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;`
			`h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;`
			`p += 4;`
			`}`

			`while (p < b_end) {`
			`h64 ^= (p) PRIME64_5;`
			`h64 = xxh_rotl64(h64, 11) * PRIME64_1;`
			`p++;`
			`}`

			`h64 ^= h64 >> 33;`
			`h64 *= PRIME64_2;`
			`h64 ^= h64 >> 29;`
			`h64 *= PRIME64_3;`
			`h64 ^= h64 >> 32;`

			`return h64;`
			`}`
			`EXPORT_SYMBOL(xxh64_digest);`

			`MODULE_LICENSE("Dual BSD/GPL");`
			`MODULE_DESCRIPTION("xxHash");`