From 0078778a86b14f85bf50e96d9ddeb3b70b55805d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 9 May 2018 12:20:18 +1000 Subject: [PATCH 1/5] powerpc/mm/radix: implement LPID based TLB flushes to be used by KVM Implement a local TLB flush for invalidating an LPID with variants for process or partition scope. And a global TLB flush for invalidating a partition scoped page of an LPID. These will be used by KVM in subsequent patches. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- .../include/asm/book3s/64/tlbflush-radix.h | 7 + arch/powerpc/mm/tlb-radix.c | 207 ++++++++++++++++++ 2 files changed, 214 insertions(+) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 19b45ba6caf9..ef5c3f2994c9 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -51,4 +51,11 @@ extern void radix__flush_tlb_all(void); extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, unsigned long address); +extern void radix__flush_tlb_lpid_page(unsigned int lpid, + unsigned long addr, + unsigned long page_size); +extern void radix__flush_pwc_lpid(unsigned int lpid); +extern void radix__local_flush_tlb_lpid(unsigned int lpid); +extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid); + #endif diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index a5d7309c2d05..5ac3206c51cc 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -118,6 +118,53 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric) trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static inline void __tlbiel_lpid(unsigned long lpid, int set, + unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = PPC_BIT(52); /* IS = 2 */ + rb |= set << PPC_BITLSHIFT(51); + rs = 0; /* LPID comes from LPIDR */ + prs = 0; /* partition scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(lpid, 1, rb, rs, ric, prs, r); +} + +static inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = PPC_BIT(52); /* IS = 2 */ + rs = lpid; + prs = 0; /* partition scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(lpid, 0, rb, rs, ric, prs, r); +} + +static inline void __tlbiel_lpid_guest(unsigned long lpid, int set, + unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = PPC_BIT(52); /* IS = 2 */ + rb |= set << PPC_BITLSHIFT(51); + rs = 0; /* LPID comes from LPIDR */ + prs = 1; /* process scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(lpid, 1, rb, rs, ric, prs, r); +} + + static inline void __tlbiel_va(unsigned long va, unsigned long pid, unsigned long ap, unsigned long ric) { @@ -150,6 +197,22 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid, trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = lpid; + prs = 0; /* partition scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(lpid, 0, rb, rs, ric, prs, r); +} + static inline void fixup_tlbie(void) { unsigned long pid = 0; @@ -161,6 +224,16 @@ static inline void fixup_tlbie(void) } } +static inline void fixup_tlbie_lpid(unsigned long lpid) +{ + unsigned long va = ((1UL << 52) - 1); + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); + } +} + /* * We use 128 set in radix mode and 256 set in hpt mode. */ @@ -214,6 +287,86 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } +static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric) +{ + int set; + + VM_BUG_ON(mfspr(SPRN_LPID) != lpid); + + asm volatile("ptesync": : :"memory"); + + /* + * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, + * also flush the entire Page Walk Cache. + */ + __tlbiel_lpid(lpid, 0, ric); + + /* For PWC, only one flush is needed */ + if (ric == RIC_FLUSH_PWC) { + asm volatile("ptesync": : :"memory"); + return; + } + + /* For the remaining sets, just flush the TLB */ + for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) + __tlbiel_lpid(lpid, set, RIC_FLUSH_TLB); + + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); +} + +static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) +{ + asm volatile("ptesync": : :"memory"); + + /* + * Workaround the fact that the "ric" argument to __tlbie_pid + * must be a compile-time contraint to match the "i" constraint + * in the asm statement. + */ + switch (ric) { + case RIC_FLUSH_TLB: + __tlbie_lpid(lpid, RIC_FLUSH_TLB); + break; + case RIC_FLUSH_PWC: + __tlbie_lpid(lpid, RIC_FLUSH_PWC); + break; + case RIC_FLUSH_ALL: + default: + __tlbie_lpid(lpid, RIC_FLUSH_ALL); + } + fixup_tlbie_lpid(lpid); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} + +static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric) +{ + int set; + + VM_BUG_ON(mfspr(SPRN_LPID) != lpid); + + asm volatile("ptesync": : :"memory"); + + /* + * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, + * also flush the entire Page Walk Cache. + */ + __tlbiel_lpid_guest(lpid, 0, ric); + + /* For PWC, only one flush is needed */ + if (ric == RIC_FLUSH_PWC) { + asm volatile("ptesync": : :"memory"); + return; + } + + /* For the remaining sets, just flush the TLB */ + for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) + __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB); + + asm volatile("ptesync": : :"memory"); +} + + static inline void __tlbiel_va_range(unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, unsigned long psize) @@ -268,6 +421,17 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, asm volatile("eieio; tlbsync; ptesync": : :"memory"); } +static inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, + unsigned long psize, unsigned long ric) +{ + unsigned long ap = mmu_get_ap(psize); + + asm volatile("ptesync": : :"memory"); + __tlbie_lpid_va(va, lpid, ap, ric); + fixup_tlbie_lpid(lpid); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} + static inline void _tlbie_va_range(unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, unsigned long psize, bool also_pwc) @@ -534,6 +698,49 @@ static int radix_get_mmu_psize(int page_size) return psize; } +/* + * Flush partition scoped LPID address translation for all CPUs. + */ +void radix__flush_tlb_lpid_page(unsigned int lpid, + unsigned long addr, + unsigned long page_size) +{ + int psize = radix_get_mmu_psize(page_size); + + _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); +} +EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); + +/* + * Flush partition scoped PWC from LPID for all CPUs. + */ +void radix__flush_pwc_lpid(unsigned int lpid) +{ + _tlbie_lpid(lpid, RIC_FLUSH_PWC); +} +EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); + +/* + * Flush partition scoped translations from LPID (=LPIDR) + */ +void radix__local_flush_tlb_lpid(unsigned int lpid) +{ + _tlbiel_lpid(lpid, RIC_FLUSH_ALL); +} +EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid); + +/* + * Flush process scoped translations from LPID (=LPIDR). + * Important difference, the guest normally manages its own translations, + * but some cases e.g., vCPU CPU migration require KVM to flush. + */ +void radix__local_flush_tlb_lpid_guest(unsigned int lpid) +{ + _tlbiel_lpid_guest(lpid, RIC_FLUSH_ALL); +} +EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid_guest); + + static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, unsigned long end, int psize); From 9f9eae5ce717f497812dfc1bda5219bc589b455d Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 28 Mar 2018 21:58:11 +0200 Subject: [PATCH 2/5] powerpc/kvm: Prefer fault_in_pages_readable function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Directly use fault_in_pages_readable instead of manual __get_user code. Fix warning treated as error with W=1: arch/powerpc/kernel/kvm.c:675:6: error: variable ‘tmp’ set but not used [-Werror=unused-but-set-variable] Suggested-by: Christophe Leroy Signed-off-by: Mathieu Malaterre Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/kvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 9ad37f827a97..683b5b3805bd 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -672,14 +673,13 @@ static void kvm_use_magic_page(void) { u32 *p; u32 *start, *end; - u32 tmp; u32 features; /* Tell the host to map the magic page to -4096 on all CPUs */ on_each_cpu(kvm_map_magic_page, &features, 1); /* Quick self-test to see if the mapping works */ - if (__get_user(tmp, (u32*)KVM_MAGIC_PAGE)) { + if (!fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) { kvm_patching_worked = false; return; } From d1c7211281c5e1799f00b2228157530e0f7a671c Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:44 +0800 Subject: [PATCH 3/5] powerpc: Export msr_check_and_set() to modules PR KVM will need to reuse msr_check_and_set(). This patch exports this API for reuse. Signed-off-by: Simon Guo Reviewed-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 1237f13fed51..25db000fa5b3 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -154,6 +154,7 @@ unsigned long msr_check_and_set(unsigned long bits) return newmsr; } +EXPORT_SYMBOL_GPL(msr_check_and_set); void __msr_check_and_clear(unsigned long bits) { From ab3759b5734544dd1430527c3d89730990cfa4bb Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:45 +0800 Subject: [PATCH 4/5] powerpc/reg: Add TEXASR related macros This patches add some macros for CR0/TEXASR bits so that PR KVM TM logic (tbegin./treclaim./tabort.) can make use of them later. Signed-off-by: Simon Guo Reviewed-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 32 +++++++++++++++++---- arch/powerpc/platforms/powernv/copy-paste.h | 3 +- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index cb0f272ce123..75c5b2cd9d66 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -146,6 +146,12 @@ #define MSR_64BIT 0 #endif +/* Condition Register related */ +#define CR0_SHIFT 28 +#define CR0_MASK 0xF +#define CR0_TBEGIN_FAILURE (0x2 << 28) /* 0b0010 */ + + /* Power Management - Processor Stop Status and Control Register Fields */ #define PSSCR_RL_MASK 0x0000000F /* Requested Level */ #define PSSCR_MTL_MASK 0x000000F0 /* Maximum Transition Level */ @@ -239,13 +245,27 @@ #define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */ #define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */ #define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ -#define TEXASR_ABORT __MASK(63-31) /* terminated by tabort or treclaim */ -#define TEXASR_SUSP __MASK(63-32) /* tx failed in suspended state */ -#define TEXASR_HV __MASK(63-34) /* MSR[HV] when failure occurred */ -#define TEXASR_PR __MASK(63-35) /* MSR[PR] when failure occurred */ -#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */ -#define TEXASR_EXACT __MASK(63-37) /* TFIAR value is exact */ + +#define TEXASR_FC_LG (63 - 7) /* Failure Code */ +#define TEXASR_AB_LG (63 - 31) /* Abort */ +#define TEXASR_SU_LG (63 - 32) /* Suspend */ +#define TEXASR_HV_LG (63 - 34) /* Hypervisor state*/ +#define TEXASR_PR_LG (63 - 35) /* Privilege level */ +#define TEXASR_FS_LG (63 - 36) /* failure summary */ +#define TEXASR_EX_LG (63 - 37) /* TFIAR exact bit */ +#define TEXASR_ROT_LG (63 - 38) /* ROT bit */ + +#define TEXASR_ABORT __MASK(TEXASR_AB_LG) /* terminated by tabort or treclaim */ +#define TEXASR_SUSP __MASK(TEXASR_SU_LG) /* tx failed in suspended state */ +#define TEXASR_HV __MASK(TEXASR_HV_LG) /* MSR[HV] when failure occurred */ +#define TEXASR_PR __MASK(TEXASR_PR_LG) /* MSR[PR] when failure occurred */ +#define TEXASR_FS __MASK(TEXASR_FS_LG) /* TEXASR Failure Summary */ +#define TEXASR_EXACT __MASK(TEXASR_EX_LG) /* TFIAR value is exact */ +#define TEXASR_ROT __MASK(TEXASR_ROT_LG) +#define TEXASR_FC (ASM_CONST(0xFF) << TEXASR_FC_LG) + #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ + #define SPRN_TIDR 144 /* Thread ID register */ #define SPRN_CTRLF 0x088 #define SPRN_CTRLT 0x098 diff --git a/arch/powerpc/platforms/powernv/copy-paste.h b/arch/powerpc/platforms/powernv/copy-paste.h index c9a503623431..3fa62de96d9c 100644 --- a/arch/powerpc/platforms/powernv/copy-paste.h +++ b/arch/powerpc/platforms/powernv/copy-paste.h @@ -7,9 +7,8 @@ * 2 of the License, or (at your option) any later version. */ #include +#include -#define CR0_SHIFT 28 -#define CR0_MASK 0xF /* * Copy/paste instructions: * From eacbb218fbbab5923775059f7232a9622dc47b2a Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:46 +0800 Subject: [PATCH 5/5] powerpc: Export tm_enable()/tm_disable/tm_abort() APIs This patch exports tm_enable()/tm_disable/tm_abort() APIs, which will be used for PR KVM transactional memory logic. Signed-off-by: Simon Guo Reviewed-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/asm-prototypes.h | 3 +++ arch/powerpc/include/asm/tm.h | 2 -- arch/powerpc/kernel/tm.S | 12 ++++++++++++ arch/powerpc/mm/hash_utils_64.c | 1 + 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index d9713ad62e3c..dfdcb2374c28 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -141,4 +141,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip); void pnv_power9_force_smt4_catch(void); void pnv_power9_force_smt4_release(void); +void tm_enable(void); +void tm_disable(void); +void tm_abort(uint8_t cause); #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h index b1658c97047c..e94f6db5e367 100644 --- a/arch/powerpc/include/asm/tm.h +++ b/arch/powerpc/include/asm/tm.h @@ -10,12 +10,10 @@ #ifndef __ASSEMBLY__ -extern void tm_enable(void); extern void tm_reclaim(struct thread_struct *thread, uint8_t cause); extern void tm_reclaim_current(uint8_t cause); extern void tm_recheckpoint(struct thread_struct *thread); -extern void tm_abort(uint8_t cause); extern void tm_save_sprs(struct thread_struct *thread); extern void tm_restore_sprs(struct thread_struct *thread); diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index b92ac8e711db..ff12f47a96b6 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_VSX /* See fpu.S, this is borrowed from there */ @@ -55,6 +56,16 @@ _GLOBAL(tm_enable) or r4, r4, r3 mtmsrd r4 1: blr +EXPORT_SYMBOL_GPL(tm_enable); + +_GLOBAL(tm_disable) + mfmsr r4 + li r3, MSR_TM >> 32 + sldi r3, r3, 32 + andc r4, r4, r3 + mtmsrd r4 + blr +EXPORT_SYMBOL_GPL(tm_disable); _GLOBAL(tm_save_sprs) mfspr r0, SPRN_TFHAR @@ -78,6 +89,7 @@ _GLOBAL(tm_restore_sprs) _GLOBAL(tm_abort) TABORT(R3) blr +EXPORT_SYMBOL_GPL(tm_abort); /* void tm_reclaim(struct thread_struct *thread, * uint8_t cause) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 0bd3790d35df..1bd8b4c1aab8 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -64,6 +64,7 @@ #include #include #include +#include #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt)