WSL2-Linux-Kernel/lib/raid6/mmx.c

/* -*- linux-c -*- ------------------------------------------------------- *
 *
 *   Copyright 2002 H. Peter Anvin - All Rights Reserved
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
 *   Boston MA 02111-1307, USA; either version 2 of the License, or
 *   (at your option) any later version; incorporated herein by reference.
 *
 * ----------------------------------------------------------------------- */

/*
 * raid6/mmx.c
 *
 * MMX implementation of RAID-6 syndrome functions
 */

#ifdef CONFIG_X86_32

#include <linux/raid/pq.h>
#include "x86.h"

/* Shared with raid6/sse1.c */
const struct raid6_mmx_constants {
	u64 x1d;
} raid6_mmx_constants = {
	0x1d1d1d1d1d1d1d1dULL,
};

static int raid6_have_mmx(void)
{
	/* Not really "boot_cpu" but "all_cpus" */
	return boot_cpu_has(X86_FEATURE_MMX);
}

/*
 * Plain MMX implementation
 */
static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
{
	u8 **dptr = (u8 **)ptrs;
	u8 *p, *q;
	int d, z, z0;

	z0 = disks - 3;		/* Highest data disk */
	p = dptr[z0+1];		/* XOR parity */
	q = dptr[z0+2];		/* RS syndrome */

	kernel_fpu_begin();

	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
	asm volatile("pxor %mm5,%mm5");	/* Zero temp */

	for ( d = 0 ; d < bytes ; d += 8 ) {
		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
		asm volatile("movq %mm2,%mm4");	/* Q[0] */
		for ( z = z0-1 ; z >= 0 ; z-- ) {
			asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
			asm volatile("pcmpgtb %mm4,%mm5");
			asm volatile("paddb %mm4,%mm4");
			asm volatile("pand %mm0,%mm5");
			asm volatile("pxor %mm5,%mm4");
			asm volatile("pxor %mm5,%mm5");
			asm volatile("pxor %mm6,%mm2");
			asm volatile("pxor %mm6,%mm4");
		}
		asm volatile("movq %%mm2,%0" : "=m" (p[d]));
		asm volatile("pxor %mm2,%mm2");
		asm volatile("movq %%mm4,%0" : "=m" (q[d]));
		asm volatile("pxor %mm4,%mm4");
	}

	kernel_fpu_end();
}

const struct raid6_calls raid6_mmxx1 = {
	raid6_mmx1_gen_syndrome,
	NULL,			/* XOR not yet implemented */
	raid6_have_mmx,
	"mmxx1",
	0
};

/*
 * Unrolled-by-2 MMX implementation
 */
static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
{
	u8 **dptr = (u8 **)ptrs;
	u8 *p, *q;
	int d, z, z0;

	z0 = disks - 3;		/* Highest data disk */
	p = dptr[z0+1];		/* XOR parity */
	q = dptr[z0+2];		/* RS syndrome */

	kernel_fpu_begin();

	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
	asm volatile("pxor %mm5,%mm5");	/* Zero temp */
	asm volatile("pxor %mm7,%mm7"); /* Zero temp */

	for ( d = 0 ; d < bytes ; d += 16 ) {
		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
		asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8]));
		asm volatile("movq %mm2,%mm4"); /* Q[0] */
		asm volatile("movq %mm3,%mm6"); /* Q[1] */
		for ( z = z0-1 ; z >= 0 ; z-- ) {
			asm volatile("pcmpgtb %mm4,%mm5");
			asm volatile("pcmpgtb %mm6,%mm7");
			asm volatile("paddb %mm4,%mm4");
			asm volatile("paddb %mm6,%mm6");
			asm volatile("pand %mm0,%mm5");
			asm volatile("pand %mm0,%mm7");
			asm volatile("pxor %mm5,%mm4");
			asm volatile("pxor %mm7,%mm6");
			asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
			asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
			asm volatile("pxor %mm5,%mm2");
			asm volatile("pxor %mm7,%mm3");
			asm volatile("pxor %mm5,%mm4");
			asm volatile("pxor %mm7,%mm6");
			asm volatile("pxor %mm5,%mm5");
			asm volatile("pxor %mm7,%mm7");
		}
		asm volatile("movq %%mm2,%0" : "=m" (p[d]));
		asm volatile("movq %%mm3,%0" : "=m" (p[d+8]));
		asm volatile("movq %%mm4,%0" : "=m" (q[d]));
		asm volatile("movq %%mm6,%0" : "=m" (q[d+8]));
	}

	kernel_fpu_end();
}

const struct raid6_calls raid6_mmxx2 = {
	raid6_mmx2_gen_syndrome,
	NULL,			/* XOR not yet implemented */
	raid6_have_mmx,
	"mmxx2",
	0
};

#endif
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`/* -- linux-c -- ------------------------------------------------------- *`
			`*`
			`* Copyright 2002 H. Peter Anvin - All Rights Reserved`
			`*`
			`* This program is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation, Inc., 53 Temple Place Ste 330,`
md: fix typo in FSF address Hello, I found a typo Bosto"m" in FSF address. And I am checking around linux source code. Here is the only place which uses Bosto"m" (not Boston). Signed-off-by: Atsushi SAKAI <sakaia@jp.fujitsu.com> Signed-off-by: NeilBrown <neilb@suse.de> 2009-03-31 07:57:37 +04:00			`* Boston MA 02111-1307, USA; either version 2 of the License, or`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`* (at your option) any later version; incorporated herein by reference.`
			`*`
			`* ----------------------------------------------------------------------- */`

			`/*`
Further tidyup of raid6 naming in lib/raid6 Rename raid6/raid6x86.h to raid6/x86.h and modify some comments. Signed-off-by: NeilBrown <neilb@suse.de> 2010-08-12 00:44:54 +04:00			`* raid6/mmx.c`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`*`
			`* MMX implementation of RAID-6 syndrome functions`
			`*/`

lib/raid6: build proper files on corresponding arch sse and avx2 stuff only exist on x86 arch, and we don't need to build altivec on x86. And we can do that at lib/raid6/Makefile. Proposed-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com> Reviewed-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com> Signed-off-by: NeilBrown <neilb@suse.de> 2012-12-01 01:10:40 +04:00			`#ifdef CONFIG_X86_32`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00
md/raid6: move raid6 data processing to raid6_pq.ko Move the raid6 data processing routines into a standalone module (raid6_pq) to prepare them to be called from async_tx wrappers and other non-md drivers/modules. This precludes a circular dependency of raid456 needing the async modules for data processing while those modules in turn depend on raid456 for the base level synchronous raid6 routines. To support this move: 1/ The exportable definitions in raid6.h move to include/linux/raid/pq.h 2/ The raid6_call, recovery calls, and table symbols are exported 3/ Extra #ifdef __KERNEL__ statements to enable the userspace raid6test to compile Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: NeilBrown <neilb@suse.de> 2009-03-31 08:09:39 +04:00			`#include <linux/raid/pq.h>`
Further tidyup of raid6 naming in lib/raid6 Rename raid6/raid6x86.h to raid6/x86.h and modify some comments. Signed-off-by: NeilBrown <neilb@suse.de> 2010-08-12 00:44:54 +04:00			`#include "x86.h"`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00
Further tidyup of raid6 naming in lib/raid6 Rename raid6/raid6x86.h to raid6/x86.h and modify some comments. Signed-off-by: NeilBrown <neilb@suse.de> 2010-08-12 00:44:54 +04:00			`/* Shared with raid6/sse1.c */`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`const struct raid6_mmx_constants {`
			`u64 x1d;`
			`} raid6_mmx_constants = {`
			`0x1d1d1d1d1d1d1d1dULL,`
			`};`

			`static int raid6_have_mmx(void)`
			`{`
			`/* Not really "boot_cpu" but "all_cpus" */`
			`return boot_cpu_has(X86_FEATURE_MMX);`
			`}`

			`/*`
			`* Plain MMX implementation`
			`*/`
			`static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)`
			`{`
			`u8 dptr = (u8 )ptrs;`
			`u8 p, q;`
			`int d, z, z0;`

			`z0 = disks - 3; /* Highest data disk */`
			`p = dptr[z0+1]; /* XOR parity */`
			`q = dptr[z0+2]; /* RS syndrome */`

[PATCH] md: RAID6: clean up CPUID and FPU enter/exit code - Use kernel_fpu_begin() and kernel_fpu_end() - Use boot_cpu_has() for feature testing even in userspace Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2007-03-01 07:11:25 +03:00			`kernel_fpu_begin();`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00
			`asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));`
			`asm volatile("pxor %mm5,%mm5"); /* Zero temp */`

			`for ( d = 0 ; d < bytes ; d += 8 ) {`
			`asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */`
			`asm volatile("movq %mm2,%mm4"); /* Q[0] */`
			`for ( z = z0-1 ; z >= 0 ; z-- ) {`
			`asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));`
			`asm volatile("pcmpgtb %mm4,%mm5");`
			`asm volatile("paddb %mm4,%mm4");`
			`asm volatile("pand %mm0,%mm5");`
			`asm volatile("pxor %mm5,%mm4");`
			`asm volatile("pxor %mm5,%mm5");`
			`asm volatile("pxor %mm6,%mm2");`
			`asm volatile("pxor %mm6,%mm4");`
			`}`
			`asm volatile("movq %%mm2,%0" : "=m" (p[d]));`
			`asm volatile("pxor %mm2,%mm2");`
			`asm volatile("movq %%mm4,%0" : "=m" (q[d]));`
			`asm volatile("pxor %mm4,%mm4");`
			`}`

[PATCH] md: RAID6: clean up CPUID and FPU enter/exit code - Use kernel_fpu_begin() and kernel_fpu_end() - Use boot_cpu_has() for feature testing even in userspace Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2007-03-01 07:11:25 +03:00			`kernel_fpu_end();`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`}`

			`const struct raid6_calls raid6_mmxx1 = {`
			`raid6_mmx1_gen_syndrome,`
md/raid6 algorithms: delta syndrome functions v3: s-o-b comment, explanation of performance and descision for the start/stop implementation Implementing rmw functionality for RAID6 requires optimized syndrome calculation. Up to now we can only generate a complete syndrome. The target P/Q pages are always overwritten. With this patch we provide a framework for inplace P/Q modification. In the first place simply fill those functions with NULL values. xor_syndrome() has two additional parameters: start & stop. These will indicate the first and last page that are changing during a rmw run. That makes it possible to avoid several unneccessary loops and speed up calculation. The caller needs to implement the following logic to make the functions work. 1) xor_syndrome(disks, start, stop, ...): "Remove" all data of source blocks inside P/Q between (and including) start and end. 2) modify any block with start <= block <= stop 3) xor_syndrome(disks, start, stop, ...): "Reinsert" all data of source blocks into P/Q between (and including) start and end. Pages between start and stop that won't be changed should be filled with a pointer to the kernel zero page. The reasons for not taking NULL pages are: 1) Algorithms cross the whole source data line by line. Thus avoid additional branches. 2) Having a NULL page avoids calculating the XOR P parity but still need calulation steps for the Q parity. Depending on the algorithm unrolling that might be only a difference of 2 instructions per loop. The benchmark numbers of the gen_syndrome() functions are displayed in the kernel log. Do the same for the xor_syndrome() functions. This will help to analyze performance problems and give an rough estimate how well the algorithm works. The choice of the fastest algorithm will still depend on the gen_syndrome() performance. With the start/stop page implementation the speed can vary a lot in real life. E.g. a change of page 0 & page 15 on a stripe will be harder to compute than the case where page 0 & page 1 are XOR candidates. To be not to enthusiatic about the expected speeds we will run a worse case test that simulates a change on the upper half of the stripe. So we do: 1) calculation of P/Q for the upper pages 2) continuation of Q for the lower (empty) pages Signed-off-by: Markus Stockhausen <stockhausen@collogia.de> Signed-off-by: NeilBrown <neilb@suse.de> 2014-12-15 04:57:04 +03:00			`NULL, /* XOR not yet implemented */`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`raid6_have_mmx,`
			`"mmxx1",`
			`0`
			`};`

			`/*`
			`* Unrolled-by-2 MMX implementation`
			`*/`
			`static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)`
			`{`
			`u8 dptr = (u8 )ptrs;`
			`u8 p, q;`
			`int d, z, z0;`

			`z0 = disks - 3; /* Highest data disk */`
			`p = dptr[z0+1]; /* XOR parity */`
			`q = dptr[z0+2]; /* RS syndrome */`

[PATCH] md: RAID6: clean up CPUID and FPU enter/exit code - Use kernel_fpu_begin() and kernel_fpu_end() - Use boot_cpu_has() for feature testing even in userspace Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2007-03-01 07:11:25 +03:00			`kernel_fpu_begin();`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00
			`asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));`
			`asm volatile("pxor %mm5,%mm5"); /* Zero temp */`
			`asm volatile("pxor %mm7,%mm7"); /* Zero temp */`

			`for ( d = 0 ; d < bytes ; d += 16 ) {`
			`asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */`
			`asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8]));`
			`asm volatile("movq %mm2,%mm4"); /* Q[0] */`
			`asm volatile("movq %mm3,%mm6"); /* Q[1] */`
			`for ( z = z0-1 ; z >= 0 ; z-- ) {`
			`asm volatile("pcmpgtb %mm4,%mm5");`
			`asm volatile("pcmpgtb %mm6,%mm7");`
			`asm volatile("paddb %mm4,%mm4");`
			`asm volatile("paddb %mm6,%mm6");`
			`asm volatile("pand %mm0,%mm5");`
			`asm volatile("pand %mm0,%mm7");`
			`asm volatile("pxor %mm5,%mm4");`
			`asm volatile("pxor %mm7,%mm6");`
			`asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));`
			`asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));`
			`asm volatile("pxor %mm5,%mm2");`
			`asm volatile("pxor %mm7,%mm3");`
			`asm volatile("pxor %mm5,%mm4");`
			`asm volatile("pxor %mm7,%mm6");`
			`asm volatile("pxor %mm5,%mm5");`
			`asm volatile("pxor %mm7,%mm7");`
			`}`
			`asm volatile("movq %%mm2,%0" : "=m" (p[d]));`
			`asm volatile("movq %%mm3,%0" : "=m" (p[d+8]));`
			`asm volatile("movq %%mm4,%0" : "=m" (q[d]));`
			`asm volatile("movq %%mm6,%0" : "=m" (q[d+8]));`
			`}`

[PATCH] md: RAID6: clean up CPUID and FPU enter/exit code - Use kernel_fpu_begin() and kernel_fpu_end() - Use boot_cpu_has() for feature testing even in userspace Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2007-03-01 07:11:25 +03:00			`kernel_fpu_end();`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`}`

			`const struct raid6_calls raid6_mmxx2 = {`
			`raid6_mmx2_gen_syndrome,`
md/raid6 algorithms: delta syndrome functions v3: s-o-b comment, explanation of performance and descision for the start/stop implementation Implementing rmw functionality for RAID6 requires optimized syndrome calculation. Up to now we can only generate a complete syndrome. The target P/Q pages are always overwritten. With this patch we provide a framework for inplace P/Q modification. In the first place simply fill those functions with NULL values. xor_syndrome() has two additional parameters: start & stop. These will indicate the first and last page that are changing during a rmw run. That makes it possible to avoid several unneccessary loops and speed up calculation. The caller needs to implement the following logic to make the functions work. 1) xor_syndrome(disks, start, stop, ...): "Remove" all data of source blocks inside P/Q between (and including) start and end. 2) modify any block with start <= block <= stop 3) xor_syndrome(disks, start, stop, ...): "Reinsert" all data of source blocks into P/Q between (and including) start and end. Pages between start and stop that won't be changed should be filled with a pointer to the kernel zero page. The reasons for not taking NULL pages are: 1) Algorithms cross the whole source data line by line. Thus avoid additional branches. 2) Having a NULL page avoids calculating the XOR P parity but still need calulation steps for the Q parity. Depending on the algorithm unrolling that might be only a difference of 2 instructions per loop. The benchmark numbers of the gen_syndrome() functions are displayed in the kernel log. Do the same for the xor_syndrome() functions. This will help to analyze performance problems and give an rough estimate how well the algorithm works. The choice of the fastest algorithm will still depend on the gen_syndrome() performance. With the start/stop page implementation the speed can vary a lot in real life. E.g. a change of page 0 & page 15 on a stripe will be harder to compute than the case where page 0 & page 1 are XOR candidates. To be not to enthusiatic about the expected speeds we will run a worse case test that simulates a change on the upper half of the stripe. So we do: 1) calculation of P/Q for the upper pages 2) continuation of Q for the lower (empty) pages Signed-off-by: Markus Stockhausen <stockhausen@collogia.de> Signed-off-by: NeilBrown <neilb@suse.de> 2014-12-15 04:57:04 +03:00			`NULL, /* XOR not yet implemented */`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`raid6_have_mmx,`
			`"mmxx2",`
			`0`
			`};`

			`#endif`