diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 0abeb0e2d616..37671feb2bf6 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -87,6 +87,9 @@ typedef struct { /* Number of bits in the mm_cpumask */ atomic_t active_cpus; + /* Number of users of the external (Nest) MMU */ + atomic_t copros; + /* NPU NMMU context */ struct npu_context *npu_context; diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 8eea90f80e45..19b45ba6caf9 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -47,9 +47,6 @@ extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmad #endif extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr); -extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, - unsigned long page_size); -extern void radix__flush_tlb_lpid(unsigned long lpid); extern void radix__flush_tlb_all(void); extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, unsigned long address); diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index a2c5c95882cf..2e2bacbdf6ed 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -203,6 +203,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000) #define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) +#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x2000000000000000) #define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) #define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x8000000000000000) @@ -465,7 +466,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ - CPU_FTR_PKEY) + CPU_FTR_PKEY | CPU_FTR_P9_TLBIE_BUG) #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ (~CPU_FTR_SAO)) #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 051b3d63afe3..3a15b6db9501 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -92,15 +92,23 @@ static inline void dec_mm_active_cpus(struct mm_struct *mm) static inline void mm_context_add_copro(struct mm_struct *mm) { /* - * On hash, should only be called once over the lifetime of - * the context, as we can't decrement the active cpus count - * and flush properly for the time being. + * If any copro is in use, increment the active CPU count + * in order to force TLB invalidations to be global as to + * propagate to the Nest MMU. */ - inc_mm_active_cpus(mm); + if (atomic_inc_return(&mm->context.copros) == 1) + inc_mm_active_cpus(mm); } static inline void mm_context_remove_copro(struct mm_struct *mm) { + int c; + + c = atomic_dec_if_positive(&mm->context.copros); + + /* Detect imbalance between add and remove */ + WARN_ON(c < 0); + /* * Need to broadcast a global flush of the full mm before * decrementing active_cpus count, as the next TLBI may be @@ -111,7 +119,7 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) * for the time being. Invalidations will remain global if * used on hash. */ - if (radix_enabled()) { + if (c == 0 && radix_enabled()) { flush_all_mm(mm); dec_mm_active_cpus(mm); } diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 945e2c29ad2d..8ca5d5b74618 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -709,6 +709,9 @@ static __init void cpufeatures_cpu_quirks(void) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; else if ((version & 0xffffefff) == 0x004e0201) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; + + if ((version & 0xffff0000) == 0x004e0000) + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; } static void __init cpufeatures_setup_finished(void) @@ -720,6 +723,9 @@ static void __init cpufeatures_setup_finished(void) cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE; } + /* Make sure powerpc_base_platform is non-NULL */ + powerpc_base_platform = cur_cpu_spec->platform; + system_registers.lpcr = mfspr(SPRN_LPCR); system_registers.hfscr = mfspr(SPRN_HFSCR); system_registers.fscr = mfspr(SPRN_FSCR); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3ac87e53b3da..1ecfd8ffb098 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -706,7 +706,7 @@ EXC_COMMON_BEGIN(bad_addr_slb) ld r3, PACA_EXSLB+EX_DAR(r13) std r3, _DAR(r1) beq cr6, 2f - li r10, 0x480 /* fix trap number for I-SLB miss */ + li r10, 0x481 /* fix trap number for I-SLB miss */ std r10, _TRAP(r1) 2: bl save_nvgprs addi r3, r1, STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f88038847790..061aa0f47bb1 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -476,6 +476,14 @@ void force_external_irq_replay(void) */ WARN_ON(!arch_irqs_disabled()); + /* + * Interrupts must always be hard disabled before irq_happened is + * modified (to prevent lost update in case of interrupt between + * load and store). + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + /* Indicate in the PACA that we have an interrupt to replay */ local_paca->irq_happened |= PACA_IRQ_EE; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 5cb4e4687107..5d9bafe9a371 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -157,6 +157,9 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, asm volatile("ptesync": : :"memory"); asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) + asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) + : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); asm volatile("ptesync": : :"memory"); } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 8888e625a999..e1c083fbe434 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -473,6 +473,17 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, trace_tlbie(kvm->arch.lpid, 0, rbvalues[i], kvm->arch.lpid, 0, 0, 0); } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + /* + * Need the extra ptesync to make sure we don't + * re-order the tlbie + */ + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : + "r" (rbvalues[0]), "r" (kvm->arch.lpid)); + } + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); kvm->arch.tlbie_lock = 0; } else { diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index a0675e91ad7d..656933c85925 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -201,6 +201,15 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize, return va; } +static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize) +{ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + /* Need the extra ptesync to ensure we don't reorder tlbie*/ + asm volatile("ptesync": : :"memory"); + ___tlbie(vpn, psize, apsize, ssize); + } +} + static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) { unsigned long rb; @@ -278,6 +287,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize, asm volatile("ptesync": : :"memory"); } else { __tlbie(vpn, psize, apsize, ssize); + fixup_tlbie(vpn, psize, apsize, ssize); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } if (lock_tlbie && !use_local) @@ -771,7 +781,7 @@ static void native_hpte_clear(void) */ static void native_flush_hash_range(unsigned long number, int local) { - unsigned long vpn; + unsigned long vpn = 0; unsigned long hash, index, hidx, shift, slot; struct hash_pte *hptep; unsigned long hpte_v; @@ -843,6 +853,10 @@ static void native_flush_hash_range(unsigned long number, int local) __tlbie(vpn, psize, psize, ssize); } pte_iterate_hashed_end(); } + /* + * Just do one more with the last used values. + */ + fixup_tlbie(vpn, psize, psize, ssize); asm volatile("eieio; tlbsync; ptesync":::"memory"); if (lock_tlbie) diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 929d9ef7083f..3f980baade4c 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -173,6 +173,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) mm_iommu_init(mm); #endif atomic_set(&mm->context.active_cpus, 0); + atomic_set(&mm->context.copros, 0); return 0; } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 28c980eb4422..adf469f312f2 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -481,6 +481,7 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0); } + /* do we need fixup here ?*/ asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 71d1b19ad1c0..a07f5372a4bf 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -119,6 +119,49 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric) trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static inline void __tlbiel_va(unsigned long va, unsigned long pid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = pid << PPC_BITLSHIFT(31); + prs = 1; /* process scoped */ + r = 1; /* raidx format */ + + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 1, rb, rs, ric, prs, r); +} + +static inline void __tlbie_va(unsigned long va, unsigned long pid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = pid << PPC_BITLSHIFT(31); + prs = 1; /* process scoped */ + r = 1; /* raidx format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); +} + +static inline void fixup_tlbie(void) +{ + unsigned long pid = 0; + unsigned long va = ((1UL << 52) - 1); + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); + } +} + /* * We use 128 set in radix mode and 256 set in hpt mode. */ @@ -151,26 +194,27 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) static inline void _tlbie_pid(unsigned long pid, unsigned long ric) { asm volatile("ptesync": : :"memory"); - __tlbie_pid(pid, ric); + + /* + * Workaround the fact that the "ric" argument to __tlbie_pid + * must be a compile-time contraint to match the "i" constraint + * in the asm statement. + */ + switch (ric) { + case RIC_FLUSH_TLB: + __tlbie_pid(pid, RIC_FLUSH_TLB); + break; + case RIC_FLUSH_PWC: + __tlbie_pid(pid, RIC_FLUSH_PWC); + break; + case RIC_FLUSH_ALL: + default: + __tlbie_pid(pid, RIC_FLUSH_ALL); + } + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } -static inline void __tlbiel_va(unsigned long va, unsigned long pid, - unsigned long ap, unsigned long ric) -{ - unsigned long rb,rs,prs,r; - - rb = va & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = pid << PPC_BITLSHIFT(31); - prs = 1; /* process scoped */ - r = 1; /* raidx format */ - - asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(0, 1, rb, rs, ric, prs, r); -} - static inline void __tlbiel_va_range(unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, unsigned long psize) @@ -203,22 +247,6 @@ static inline void _tlbiel_va_range(unsigned long start, unsigned long end, asm volatile("ptesync": : :"memory"); } -static inline void __tlbie_va(unsigned long va, unsigned long pid, - unsigned long ap, unsigned long ric) -{ - unsigned long rb,rs,prs,r; - - rb = va & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = pid << PPC_BITLSHIFT(31); - prs = 1; /* process scoped */ - r = 1; /* raidx format */ - - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(0, 0, rb, rs, ric, prs, r); -} - static inline void __tlbie_va_range(unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, unsigned long psize) @@ -237,6 +265,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, asm volatile("ptesync": : :"memory"); __tlbie_va(va, pid, ap, ric); + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -248,6 +277,7 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end, if (also_pwc) __tlbie_pid(pid, RIC_FLUSH_PWC); __tlbie_va_range(start, end, pid, page_size, psize); + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -311,6 +341,16 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd } EXPORT_SYMBOL(radix__local_flush_tlb_page); +static bool mm_needs_flush_escalation(struct mm_struct *mm) +{ + /* + * P9 nest MMU has issues with the page walk cache + * caching PTEs and not flushing them properly when + * RIC = 0 for a PID/LPID invalidate + */ + return atomic_read(&mm->context.copros) != 0; +} + #ifdef CONFIG_SMP void radix__flush_tlb_mm(struct mm_struct *mm) { @@ -321,9 +361,12 @@ void radix__flush_tlb_mm(struct mm_struct *mm) return; preempt_disable(); - if (!mm_is_thread_local(mm)) - _tlbie_pid(pid, RIC_FLUSH_TLB); - else + if (!mm_is_thread_local(mm)) { + if (mm_needs_flush_escalation(mm)) + _tlbie_pid(pid, RIC_FLUSH_ALL); + else + _tlbie_pid(pid, RIC_FLUSH_TLB); + } else _tlbiel_pid(pid, RIC_FLUSH_TLB); preempt_enable(); } @@ -435,10 +478,14 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } if (full) { - if (local) + if (local) { _tlbiel_pid(pid, RIC_FLUSH_TLB); - else - _tlbie_pid(pid, RIC_FLUSH_TLB); + } else { + if (mm_needs_flush_escalation(mm)) + _tlbie_pid(pid, RIC_FLUSH_ALL); + else + _tlbie_pid(pid, RIC_FLUSH_TLB); + } } else { bool hflush = false; unsigned long hstart, hend; @@ -465,6 +512,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, if (hflush) __tlbie_va_range(hstart, hend, pid, HPAGE_PMD_SIZE, MMU_PAGE_2M); + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } } @@ -548,6 +596,9 @@ static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, } if (full) { + if (!local && mm_needs_flush_escalation(mm)) + also_pwc = true; + if (local) _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); else @@ -603,46 +654,6 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, - unsigned long page_size) -{ - unsigned long rb,rs,prs,r; - unsigned long ap; - unsigned long ric = RIC_FLUSH_TLB; - - ap = mmu_get_ap(radix_get_mmu_psize(page_size)); - rb = gpa & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = lpid & ((1UL << 32) - 1); - prs = 0; /* process scoped */ - r = 1; /* raidx format */ - - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - asm volatile("eieio; tlbsync; ptesync": : :"memory"); - trace_tlbie(lpid, 0, rb, rs, ric, prs, r); -} -EXPORT_SYMBOL(radix__flush_tlb_lpid_va); - -void radix__flush_tlb_lpid(unsigned long lpid) -{ - unsigned long rb,rs,prs,r; - unsigned long ric = RIC_FLUSH_ALL; - - rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */ - rs = lpid & ((1UL << 32) - 1); - prs = 0; /* partition scoped */ - r = 1; /* raidx format */ - - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - asm volatile("eieio; tlbsync; ptesync": : :"memory"); - trace_tlbie(lpid, 0, rb, rs, ric, prs, r); -} -EXPORT_SYMBOL(radix__flush_tlb_lpid); - void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) {