diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index fc23040d5a26..f770805f1215 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -17,15 +17,6 @@ .text -#ifdef CONFIG_PPC64 -#define IN_SYNC twi 0,r5,0; isync -#define EIEIO_32 -#define SYNC_64 sync -#else /* CONFIG_PPC32 */ -#define IN_SYNC -#define EIEIO_32 eieio -#define SYNC_64 -#endif /* * Returns (address we are running at) - (address we were linked at) * for use before the text and data are mapped to KERNELBASE. @@ -70,6 +61,7 @@ _GLOBAL(add_reloc_offset) * The *_ns versions don't do byte-swapping. */ _GLOBAL(_insb) + sync cmpwi 0,r5,0 mtctr r5 subi r4,r4,1 @@ -78,7 +70,8 @@ _GLOBAL(_insb) eieio stbu r5,1(r4) bdnz 00b - IN_SYNC + twi 0,r5,0 + isync blr _GLOBAL(_outsb) @@ -86,14 +79,15 @@ _GLOBAL(_outsb) mtctr r5 subi r4,r4,1 blelr- + sync 00: lbzu r5,1(r4) stb r5,0(r3) - EIEIO_32 bdnz 00b - SYNC_64 + sync blr _GLOBAL(_insw) + sync cmpwi 0,r5,0 mtctr r5 subi r4,r4,2 @@ -102,7 +96,8 @@ _GLOBAL(_insw) eieio sthu r5,2(r4) bdnz 00b - IN_SYNC + twi 0,r5,0 + isync blr _GLOBAL(_outsw) @@ -110,14 +105,15 @@ _GLOBAL(_outsw) mtctr r5 subi r4,r4,2 blelr- + sync 00: lhzu r5,2(r4) - EIEIO_32 sthbrx r5,0,r3 bdnz 00b - SYNC_64 + sync blr _GLOBAL(_insl) + sync cmpwi 0,r5,0 mtctr r5 subi r4,r4,4 @@ -126,7 +122,8 @@ _GLOBAL(_insl) eieio stwu r5,4(r4) bdnz 00b - IN_SYNC + twi 0,r5,0 + isync blr _GLOBAL(_outsl) @@ -134,17 +131,18 @@ _GLOBAL(_outsl) mtctr r5 subi r4,r4,4 blelr- + sync 00: lwzu r5,4(r4) stwbrx r5,0,r3 - EIEIO_32 bdnz 00b - SYNC_64 + sync blr #ifdef CONFIG_PPC32 _GLOBAL(__ide_mm_insw) #endif _GLOBAL(_insw_ns) + sync cmpwi 0,r5,0 mtctr r5 subi r4,r4,2 @@ -153,7 +151,8 @@ _GLOBAL(_insw_ns) eieio sthu r5,2(r4) bdnz 00b - IN_SYNC + twi 0,r5,0 + isync blr #ifdef CONFIG_PPC32 @@ -164,17 +163,18 @@ _GLOBAL(_outsw_ns) mtctr r5 subi r4,r4,2 blelr- + sync 00: lhzu r5,2(r4) sth r5,0(r3) - EIEIO_32 bdnz 00b - SYNC_64 + sync blr #ifdef CONFIG_PPC32 _GLOBAL(__ide_mm_insl) #endif _GLOBAL(_insl_ns) + sync cmpwi 0,r5,0 mtctr r5 subi r4,r4,4 @@ -183,7 +183,8 @@ _GLOBAL(_insl_ns) eieio stwu r5,4(r4) bdnz 00b - IN_SYNC + twi 0,r5,0 + isync blr #ifdef CONFIG_PPC32 @@ -194,10 +195,10 @@ _GLOBAL(_outsl_ns) mtctr r5 subi r4,r4,4 blelr- + sync 00: lwzu r5,4(r4) stw r5,0(r3) - EIEIO_32 bdnz 00b - SYNC_64 + sync blr diff --git a/include/asm-powerpc/eeh.h b/include/asm-powerpc/eeh.h index 4df3e80118f4..6a784396660b 100644 --- a/include/asm-powerpc/eeh.h +++ b/include/asm-powerpc/eeh.h @@ -205,6 +205,7 @@ static inline void eeh_memset_io(volatile void __iomem *addr, int c, lc |= lc << 8; lc |= lc << 16; + __asm__ __volatile__ ("sync" : : : "memory"); while(n && !EEH_CHECK_ALIGN(p, 4)) { *((volatile u8 *)p) = c; p++; @@ -229,6 +230,7 @@ static inline void eeh_memcpy_fromio(void *dest, const volatile void __iomem *sr void *destsave = dest; unsigned long nsave = n; + __asm__ __volatile__ ("sync" : : : "memory"); while(n && (!EEH_CHECK_ALIGN(vsrc, 4) || !EEH_CHECK_ALIGN(dest, 4))) { *((u8 *)dest) = *((volatile u8 *)vsrc); __asm__ __volatile__ ("eieio" : : : "memory"); @@ -266,6 +268,7 @@ static inline void eeh_memcpy_toio(volatile void __iomem *dest, const void *src, { void *vdest = (void __force *) dest; + __asm__ __volatile__ ("sync" : : : "memory"); while(n && (!EEH_CHECK_ALIGN(vdest, 4) || !EEH_CHECK_ALIGN(src, 4))) { *((volatile u8 *)vdest) = *((u8 *)src); src++; diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h index 36c4c34bf565..212428db0d8b 100644 --- a/include/asm-powerpc/io.h +++ b/include/asm-powerpc/io.h @@ -19,6 +19,7 @@ extern int check_legacy_ioport(unsigned long base_port); #include #include #include +#include #ifdef CONFIG_PPC_ISERIES #include #endif @@ -162,7 +163,11 @@ extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, int ns); extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl); extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, int nl); -#define mmiowb() +static inline void mmiowb(void) +{ + __asm__ __volatile__ ("sync" : : : "memory"); + get_paca()->io_sync = 0; +} /* * output pause versions need a delay at least for the @@ -278,22 +283,23 @@ static inline int in_8(const volatile unsigned char __iomem *addr) { int ret; - __asm__ __volatile__("lbz%U1%X1 %0,%1; twi 0,%0,0; isync" + __asm__ __volatile__("sync; lbz%U1%X1 %0,%1; twi 0,%0,0; isync" : "=r" (ret) : "m" (*addr)); return ret; } static inline void out_8(volatile unsigned char __iomem *addr, int val) { - __asm__ __volatile__("stb%U0%X0 %1,%0; sync" + __asm__ __volatile__("sync; stb%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); + get_paca()->io_sync = 1; } static inline int in_le16(const volatile unsigned short __iomem *addr) { int ret; - __asm__ __volatile__("lhbrx %0,0,%1; twi 0,%0,0; isync" + __asm__ __volatile__("sync; lhbrx %0,0,%1; twi 0,%0,0; isync" : "=r" (ret) : "r" (addr), "m" (*addr)); return ret; } @@ -302,28 +308,30 @@ static inline int in_be16(const volatile unsigned short __iomem *addr) { int ret; - __asm__ __volatile__("lhz%U1%X1 %0,%1; twi 0,%0,0; isync" + __asm__ __volatile__("sync; lhz%U1%X1 %0,%1; twi 0,%0,0; isync" : "=r" (ret) : "m" (*addr)); return ret; } static inline void out_le16(volatile unsigned short __iomem *addr, int val) { - __asm__ __volatile__("sthbrx %1,0,%2; sync" + __asm__ __volatile__("sync; sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr)); + get_paca()->io_sync = 1; } static inline void out_be16(volatile unsigned short __iomem *addr, int val) { - __asm__ __volatile__("sth%U0%X0 %1,%0; sync" + __asm__ __volatile__("sync; sth%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); + get_paca()->io_sync = 1; } static inline unsigned in_le32(const volatile unsigned __iomem *addr) { unsigned ret; - __asm__ __volatile__("lwbrx %0,0,%1; twi 0,%0,0; isync" + __asm__ __volatile__("sync; lwbrx %0,0,%1; twi 0,%0,0; isync" : "=r" (ret) : "r" (addr), "m" (*addr)); return ret; } @@ -332,21 +340,23 @@ static inline unsigned in_be32(const volatile unsigned __iomem *addr) { unsigned ret; - __asm__ __volatile__("lwz%U1%X1 %0,%1; twi 0,%0,0; isync" + __asm__ __volatile__("sync; lwz%U1%X1 %0,%1; twi 0,%0,0; isync" : "=r" (ret) : "m" (*addr)); return ret; } static inline void out_le32(volatile unsigned __iomem *addr, int val) { - __asm__ __volatile__("stwbrx %1,0,%2; sync" : "=m" (*addr) + __asm__ __volatile__("sync; stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr)); + get_paca()->io_sync = 1; } static inline void out_be32(volatile unsigned __iomem *addr, int val) { - __asm__ __volatile__("stw%U0%X0 %1,%0; sync" + __asm__ __volatile__("sync; stw%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); + get_paca()->io_sync = 1; } static inline unsigned long in_le64(const volatile unsigned long __iomem *addr) @@ -354,6 +364,7 @@ static inline unsigned long in_le64(const volatile unsigned long __iomem *addr) unsigned long tmp, ret; __asm__ __volatile__( + "sync\n" "ld %1,0(%2)\n" "twi 0,%1,0\n" "isync\n" @@ -372,7 +383,7 @@ static inline unsigned long in_be64(const volatile unsigned long __iomem *addr) { unsigned long ret; - __asm__ __volatile__("ld%U1%X1 %0,%1; twi 0,%0,0; isync" + __asm__ __volatile__("sync; ld%U1%X1 %0,%1; twi 0,%0,0; isync" : "=r" (ret) : "m" (*addr)); return ret; } @@ -389,14 +400,16 @@ static inline void out_le64(volatile unsigned long __iomem *addr, unsigned long "rldicl %1,%1,32,0\n" "rlwimi %0,%1,8,8,31\n" "rlwimi %0,%1,24,16,23\n" - "std %0,0(%3)\n" - "sync" + "sync\n" + "std %0,0(%3)" : "=&r" (tmp) , "=&r" (val) : "1" (val) , "b" (addr) , "m" (*addr)); + get_paca()->io_sync = 1; } static inline void out_be64(volatile unsigned long __iomem *addr, unsigned long val) { - __asm__ __volatile__("std%U0%X0 %1,%0; sync" : "=m" (*addr) : "r" (val)); + __asm__ __volatile__("sync; std%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); + get_paca()->io_sync = 1; } #ifndef CONFIG_PPC_ISERIES diff --git a/include/asm-powerpc/paca.h b/include/asm-powerpc/paca.h index 2d4585f06209..3d5d590bc4b0 100644 --- a/include/asm-powerpc/paca.h +++ b/include/asm-powerpc/paca.h @@ -93,6 +93,7 @@ struct paca_struct { u64 saved_r1; /* r1 save for RTAS calls */ u64 saved_msr; /* MSR saved here by enter_rtas */ u8 proc_enabled; /* irq soft-enable flag */ + u8 io_sync; /* writel() needs spin_unlock sync */ /* Stuff for accurate time accounting */ u64 user_time; /* accumulated usermode TB ticks */ diff --git a/include/asm-powerpc/spinlock.h b/include/asm-powerpc/spinlock.h index 895cb6d3a42a..c31e4382a775 100644 --- a/include/asm-powerpc/spinlock.h +++ b/include/asm-powerpc/spinlock.h @@ -36,6 +36,19 @@ #define LOCK_TOKEN 1 #endif +#if defined(CONFIG_PPC64) && defined(CONFIG_SMP) +#define CLEAR_IO_SYNC (get_paca()->io_sync = 0) +#define SYNC_IO do { \ + if (unlikely(get_paca()->io_sync)) { \ + mb(); \ + get_paca()->io_sync = 0; \ + } \ + } while (0) +#else +#define CLEAR_IO_SYNC +#define SYNC_IO +#endif + /* * This returns the old value in the lock, so we succeeded * in getting the lock if the return value is 0. @@ -61,6 +74,7 @@ static __inline__ unsigned long __spin_trylock(raw_spinlock_t *lock) static int __inline__ __raw_spin_trylock(raw_spinlock_t *lock) { + CLEAR_IO_SYNC; return __spin_trylock(lock) == 0; } @@ -91,6 +105,7 @@ extern void __rw_yield(raw_rwlock_t *lock); static void __inline__ __raw_spin_lock(raw_spinlock_t *lock) { + CLEAR_IO_SYNC; while (1) { if (likely(__spin_trylock(lock) == 0)) break; @@ -107,6 +122,7 @@ static void __inline__ __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long { unsigned long flags_dis; + CLEAR_IO_SYNC; while (1) { if (likely(__spin_trylock(lock) == 0)) break; @@ -124,6 +140,7 @@ static void __inline__ __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long static __inline__ void __raw_spin_unlock(raw_spinlock_t *lock) { + SYNC_IO; __asm__ __volatile__("# __raw_spin_unlock\n\t" LWSYNC_ON_SMP: : :"memory"); lock->slock = 0; diff --git a/include/asm-ppc/io.h b/include/asm-ppc/io.h index 89c6f1bc3aab..680555be22ec 100644 --- a/include/asm-ppc/io.h +++ b/include/asm-ppc/io.h @@ -63,7 +63,7 @@ extern inline int in_8(const volatile unsigned char __iomem *addr) int ret; __asm__ __volatile__( - "lbz%U1%X1 %0,%1;\n" + "sync; lbz%U1%X1 %0,%1;\n" "twi 0,%0,0;\n" "isync" : "=r" (ret) : "m" (*addr)); return ret; @@ -78,7 +78,7 @@ extern inline int in_le16(const volatile unsigned short __iomem *addr) { int ret; - __asm__ __volatile__("lhbrx %0,0,%1;\n" + __asm__ __volatile__("sync; lhbrx %0,0,%1;\n" "twi 0,%0,0;\n" "isync" : "=r" (ret) : "r" (addr), "m" (*addr)); @@ -89,7 +89,7 @@ extern inline int in_be16(const volatile unsigned short __iomem *addr) { int ret; - __asm__ __volatile__("lhz%U1%X1 %0,%1;\n" + __asm__ __volatile__("sync; lhz%U1%X1 %0,%1;\n" "twi 0,%0,0;\n" "isync" : "=r" (ret) : "m" (*addr)); return ret; @@ -97,20 +97,20 @@ extern inline int in_be16(const volatile unsigned short __iomem *addr) extern inline void out_le16(volatile unsigned short __iomem *addr, int val) { - __asm__ __volatile__("sthbrx %1,0,%2; eieio" : "=m" (*addr) : + __asm__ __volatile__("sync; sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr)); } extern inline void out_be16(volatile unsigned short __iomem *addr, int val) { - __asm__ __volatile__("sth%U0%X0 %1,%0; eieio" : "=m" (*addr) : "r" (val)); + __asm__ __volatile__("sync; sth%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); } extern inline unsigned in_le32(const volatile unsigned __iomem *addr) { unsigned ret; - __asm__ __volatile__("lwbrx %0,0,%1;\n" + __asm__ __volatile__("sync; lwbrx %0,0,%1;\n" "twi 0,%0,0;\n" "isync" : "=r" (ret) : "r" (addr), "m" (*addr)); @@ -121,7 +121,7 @@ extern inline unsigned in_be32(const volatile unsigned __iomem *addr) { unsigned ret; - __asm__ __volatile__("lwz%U1%X1 %0,%1;\n" + __asm__ __volatile__("sync; lwz%U1%X1 %0,%1;\n" "twi 0,%0,0;\n" "isync" : "=r" (ret) : "m" (*addr)); return ret; @@ -129,13 +129,13 @@ extern inline unsigned in_be32(const volatile unsigned __iomem *addr) extern inline void out_le32(volatile unsigned __iomem *addr, int val) { - __asm__ __volatile__("stwbrx %1,0,%2; eieio" : "=m" (*addr) : + __asm__ __volatile__("sync; stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr)); } extern inline void out_be32(volatile unsigned __iomem *addr, int val) { - __asm__ __volatile__("stw%U0%X0 %1,%0; eieio" : "=m" (*addr) : "r" (val)); + __asm__ __volatile__("sync; stw%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); } #if defined (CONFIG_8260_PCI9) #define readb(addr) in_8((volatile u8 *)(addr)) @@ -259,6 +259,7 @@ extern __inline__ unsigned int name(unsigned int port) \ { \ unsigned int x; \ __asm__ __volatile__( \ + "sync\n" \ "0:" op " %0,0,%1\n" \ "1: twi 0,%0,0\n" \ "2: isync\n" \ @@ -284,6 +285,7 @@ extern __inline__ unsigned int name(unsigned int port) \ extern __inline__ void name(unsigned int val, unsigned int port) \ { \ __asm__ __volatile__( \ + "sync\n" \ "0:" op " %0,0,%1\n" \ "1: sync\n" \ "2:\n" \