diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h index 01ba1f8e64d1..4c15eb11a917 100644 --- a/include/asm-x86/system.h +++ b/include/asm-x86/system.h @@ -202,4 +202,109 @@ extern void free_init_pages(char *what, unsigned long begin, unsigned long end); void default_idle(void); +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ +#ifdef CONFIG_X86_32 +/* + * For now, "wmb()" doesn't actually do anything, as all + * Intel CPU's follow what Intel calls a *Processor Order*, + * in which all writes are seen in the program order even + * outside the CPU. + * + * I expect future Intel CPU's to have a weaker ordering, + * but I'd also expect them to finally get their act together + * and add some real memory barriers if so. + * + * Some non intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ +#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) +#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) +#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) +#else +#define mb() asm volatile("mfence":::"memory") +#define rmb() asm volatile("lfence":::"memory") +#define wmb() asm volatile("sfence" ::: "memory") +#endif + +/** + * read_barrier_depends - Flush all pending reads that subsequents reads + * depend on. + * + * No data-dependent reads from memory-like regions are ever reordered + * over this barrier. All reads preceding this primitive are guaranteed + * to access memory (but not necessarily other CPUs' caches) before any + * reads following this primitive that depend on the data return by + * any of the preceding reads. This primitive is much lighter weight than + * rmb() on most CPUs, and is never heavier weight than is + * rmb(). + * + * These ordering constraints are respected by both the local CPU + * and the compiler. + * + * Ordering is not guaranteed by anything other than these primitives, + * not even by data dependencies. See the documentation for + * memory_barrier() for examples and URLs to more information. + * + * For example, the following code would force ordering (the initial + * value of "a" is zero, "b" is one, and "p" is "&a"): + * + * + * CPU 0 CPU 1 + * + * b = 2; + * memory_barrier(); + * p = &b; q = p; + * read_barrier_depends(); + * d = *q; + * + * + * because the read of "*q" depends on the read of "p" and these + * two reads are separated by a read_barrier_depends(). However, + * the following code, with the same initial values for "a" and "b": + * + * + * CPU 0 CPU 1 + * + * a = 2; + * memory_barrier(); + * b = 3; y = b; + * read_barrier_depends(); + * x = a; + * + * + * does not enforce ordering, since there is no data dependency between + * the read of "a" and the read of "b". Therefore, on some CPUs, such + * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() + * in cases like this where there are no data dependencies. + **/ + +#define read_barrier_depends() do { } while (0) + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#ifdef CONFIG_X86_PPRO_FENCE +# define smp_rmb() rmb() +#else +# define smp_rmb() barrier() +#endif +#ifdef CONFIG_X86_OOSTORE +# define smp_wmb() wmb() +#else +# define smp_wmb() barrier() +#endif +#define smp_read_barrier_depends() read_barrier_depends() +#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while (0) +#define set_mb(var, value) do { var = value; barrier(); } while (0) +#endif + + #endif diff --git a/include/asm-x86/system_32.h b/include/asm-x86/system_32.h index c05568290add..7da0716fb317 100644 --- a/include/asm-x86/system_32.h +++ b/include/asm-x86/system_32.h @@ -36,105 +36,6 @@ extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struc #endif /* __KERNEL__ */ -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - * - * For now, "wmb()" doesn't actually do anything, as all - * Intel CPU's follow what Intel calls a *Processor Order*, - * in which all writes are seen in the program order even - * outside the CPU. - * - * I expect future Intel CPU's to have a weaker ordering, - * but I'd also expect them to finally get their act together - * and add some real memory barriers if so. - * - * Some non intel clones support out of order store. wmb() ceases to be a - * nop for these. - */ - - -#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) -#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) -#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) - -/** - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - **/ - -#define read_barrier_depends() do { } while(0) - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#ifdef CONFIG_X86_PPRO_FENCE -# define smp_rmb() rmb() -#else -# define smp_rmb() barrier() -#endif -#ifdef CONFIG_X86_OOSTORE -# define smp_wmb() wmb() -#else -# define smp_wmb() barrier() -#endif -#define smp_read_barrier_depends() read_barrier_depends() -#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { var = value; barrier(); } while (0) -#endif - #include /* diff --git a/include/asm-x86/system_64.h b/include/asm-x86/system_64.h index 560470ea27c8..9def35eb75e3 100644 --- a/include/asm-x86/system_64.h +++ b/include/asm-x86/system_64.h @@ -48,31 +48,6 @@ #endif /* __KERNEL__ */ -#ifdef CONFIG_SMP -#define smp_mb() mb() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do {} while(0) -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do {} while(0) -#endif - - -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - */ -#define mb() asm volatile("mfence":::"memory") -#define rmb() asm volatile("lfence":::"memory") -#define wmb() asm volatile("sfence" ::: "memory") - -#define read_barrier_depends() do {} while(0) -#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) - static inline unsigned long read_cr8(void) { unsigned long cr8;