From 12564485ed8caac3c18572793ec01330792c7191 Mon Sep 17 00:00:00 2001 From: Shawn Anastasio Date: Fri, 21 Aug 2020 13:55:56 -0500 Subject: [PATCH 01/18] Revert "powerpc/64s: Remove PROT_SAO support" This reverts commit 5c9fa16e8abd342ce04dc830c1ebb2a03abf6c05. Since PROT_SAO can still be useful for certain classes of software, reintroduce it. Concerns about guest migration for LPARs using SAO will be addressed next. Signed-off-by: Shawn Anastasio Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200821185558.35561-2-shawn@anastas.io --- arch/powerpc/include/asm/book3s/64/pgtable.h | 8 ++-- arch/powerpc/include/asm/cputable.h | 10 ++--- arch/powerpc/include/asm/mman.h | 26 ++++++++++-- arch/powerpc/include/asm/nohash/64/pgtable.h | 2 + arch/powerpc/include/uapi/asm/mman.h | 2 +- arch/powerpc/kernel/dt_cpu_ftrs.c | 2 +- arch/powerpc/mm/book3s64/hash_utils.c | 2 + include/linux/mm.h | 2 + include/trace/events/mmflags.h | 2 + mm/ksm.c | 4 ++ tools/testing/selftests/powerpc/mm/.gitignore | 1 + tools/testing/selftests/powerpc/mm/Makefile | 4 +- tools/testing/selftests/powerpc/mm/prot_sao.c | 42 +++++++++++++++++++ 13 files changed, 90 insertions(+), 17 deletions(-) create mode 100644 tools/testing/selftests/powerpc/mm/prot_sao.c diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 6de56c3b33c4..495fc0ccb453 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -20,13 +20,9 @@ #define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) #define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC) #define _PAGE_PRIVILEGED 0x00008 /* kernel access only */ - -#define _PAGE_CACHE_CTL 0x00030 /* Bits for the folowing cache modes */ - /* No bits set is normal cacheable memory */ - /* 0x00010 unused, is SAO bit on radix POWER9 */ +#define _PAGE_SAO 0x00010 /* Strong access order */ #define _PAGE_NON_IDEMPOTENT 0x00020 /* non idempotent memory */ #define _PAGE_TOLERANT 0x00030 /* tolerant memory, cache inhibited */ - #define _PAGE_DIRTY 0x00080 /* C: page changed */ #define _PAGE_ACCESSED 0x00100 /* R: page referenced */ /* @@ -828,6 +824,8 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, return hash__set_pte_at(mm, addr, ptep, pte, percpu); } +#define _PAGE_CACHE_CTL (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT) + #define pgprot_noncached pgprot_noncached static inline pgprot_t pgprot_noncached(pgprot_t prot) { diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index e005b4581023..32a15dc49e8c 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -196,7 +196,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_SPURR LONG_ASM_CONST(0x0000000001000000) #define CPU_FTR_DSCR LONG_ASM_CONST(0x0000000002000000) #define CPU_FTR_VSX LONG_ASM_CONST(0x0000000004000000) -// Free LONG_ASM_CONST(0x0000000008000000) +#define CPU_FTR_SAO LONG_ASM_CONST(0x0000000008000000) #define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000000010000000) #define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0000000020000000) #define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0000000040000000) @@ -441,7 +441,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | CPU_FTR_ASYM_SMT | \ + CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | \ CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX ) @@ -450,7 +450,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | \ + CPU_FTR_DSCR | CPU_FTR_SAO | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ @@ -461,7 +461,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | \ + CPU_FTR_DSCR | CPU_FTR_SAO | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ @@ -479,7 +479,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | \ + CPU_FTR_DSCR | CPU_FTR_SAO | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index 7c07728af300..4ba303ea27f5 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -13,20 +13,38 @@ #include #include -#ifdef CONFIG_PPC_MEM_KEYS static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, unsigned long pkey) { - return pkey_to_vmflag_bits(pkey); +#ifdef CONFIG_PPC_MEM_KEYS + return (((prot & PROT_SAO) ? VM_SAO : 0) | pkey_to_vmflag_bits(pkey)); +#else + return ((prot & PROT_SAO) ? VM_SAO : 0); +#endif } #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) { - return __pgprot(vmflag_to_pte_pkey_bits(vm_flags)); +#ifdef CONFIG_PPC_MEM_KEYS + return (vm_flags & VM_SAO) ? + __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) : + __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags)); +#else + return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0); +#endif } #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags) -#endif + +static inline bool arch_validate_prot(unsigned long prot, unsigned long addr) +{ + if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO)) + return false; + if ((prot & PROT_SAO) && !cpu_has_feature(CPU_FTR_SAO)) + return false; + return true; +} +#define arch_validate_prot arch_validate_prot #endif /* CONFIG_PPC64 */ #endif /* _ASM_POWERPC_MMAN_H */ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 59ee9fa4ae09..6cb8aa357191 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -82,6 +82,8 @@ */ #include +#define _PAGE_SAO 0 + #define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) /* diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h index 3a700351feca..c0c737215b00 100644 --- a/arch/powerpc/include/uapi/asm/mman.h +++ b/arch/powerpc/include/uapi/asm/mman.h @@ -11,7 +11,7 @@ #include -#define PROT_SAO 0x10 /* Unsupported since v5.9 */ +#define PROT_SAO 0x10 /* Strong Access Ordering */ #define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */ #define MAP_NORESERVE 0x40 /* don't reserve swap pages */ diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 8dc46f38680b..f204ad79b6b5 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -653,7 +653,7 @@ static struct dt_cpu_feature_match __initdata {"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL}, {"processor-utilization-of-resources-register", feat_enable_purr, 0}, {"no-execute", feat_enable, 0}, - /* strong-access-ordering is unused */ + {"strong-access-ordering", feat_enable, CPU_FTR_SAO}, {"cache-inhibited-large-page", feat_enable_large_ci, 0}, {"coprocessor-icswx", feat_enable, 0}, {"hypervisor-virtualization-interrupt", feat_enable_hvi, 0}, diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 890a71c5293e..c663e7ba801f 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -232,6 +232,8 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) rflags |= HPTE_R_I; else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT) rflags |= (HPTE_R_I | HPTE_R_G); + else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) + rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M); else /* * Add memory coherence if cache inhibited is not set diff --git a/include/linux/mm.h b/include/linux/mm.h index 1983e08f5906..5abe6df4247e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -321,6 +321,8 @@ extern unsigned int kobjsize(const void *objp); #if defined(CONFIG_X86) # define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ +#elif defined(CONFIG_PPC) +# define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 #elif defined(CONFIG_IA64) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 939092dbcb8b..5fb752034386 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -114,6 +114,8 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" ) #if defined(CONFIG_X86) #define __VM_ARCH_SPECIFIC_1 {VM_PAT, "pat" } +#elif defined(CONFIG_PPC) +#define __VM_ARCH_SPECIFIC_1 {VM_SAO, "sao" } #elif defined(CONFIG_PARISC) || defined(CONFIG_IA64) #define __VM_ARCH_SPECIFIC_1 {VM_GROWSUP, "growsup" } #elif !defined(CONFIG_MMU) diff --git a/mm/ksm.c b/mm/ksm.c index 0aa2247bddd7..90a625b02a1d 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2453,6 +2453,10 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, if (vma_is_dax(vma)) return 0; +#ifdef VM_SAO + if (*vm_flags & VM_SAO) + return 0; +#endif #ifdef VM_SPARC_ADI if (*vm_flags & VM_SPARC_ADI) return 0; diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index 91c775c23c66..aac4a59f9e28 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -2,6 +2,7 @@ hugetlb_vs_thp_test subpage_prot tempfile +prot_sao segv_errors wild_bctr large_vm_fork_separation diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 250ce172e0da..defe488d6bf1 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -2,7 +2,7 @@ noarg: $(MAKE) -C ../ -TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot segv_errors wild_bctr \ +TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ large_vm_fork_separation bad_accesses pkey_exec_prot \ pkey_siginfo stack_expansion_signal stack_expansion_ldst @@ -14,6 +14,8 @@ include ../../lib.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c +$(OUTPUT)/prot_sao: ../utils.c + $(OUTPUT)/wild_bctr: CFLAGS += -m64 $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64 $(OUTPUT)/bad_accesses: CFLAGS += -m64 diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c new file mode 100644 index 000000000000..e2eed65b7735 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/prot_sao.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2016, Michael Ellerman, IBM Corp. + */ + +#include +#include +#include +#include + +#include + +#include "utils.h" + +#define SIZE (64 * 1024) + +int test_prot_sao(void) +{ + char *p; + + /* 2.06 or later should support SAO */ + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + + /* + * Ensure we can ask for PROT_SAO. + * We can't really verify that it does the right thing, but at least we + * confirm the kernel will accept it. + */ + p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE | PROT_SAO, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + FAIL_IF(p == MAP_FAILED); + + /* Write to the mapping, to at least cause a fault */ + memset(p, 0xaa, SIZE); + + return 0; +} + +int main(void) +{ + return test_harness(test_prot_sao, "prot-sao"); +} From 9b725a90a8f127802e19466d4e336e701bcea0d2 Mon Sep 17 00:00:00 2001 From: Shawn Anastasio Date: Fri, 21 Aug 2020 13:55:57 -0500 Subject: [PATCH 02/18] powerpc/64s: Disallow PROT_SAO in LPARs by default Since migration of guests using SAO to ISA 3.1 hosts may cause issues, disable PROT_SAO in LPARs by default and introduce a new Kconfig option PPC_PROT_SAO_LPAR to allow users to enable it if desired. Signed-off-by: Shawn Anastasio Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200821185558.35561-3-shawn@anastas.io --- arch/powerpc/Kconfig | 12 ++++++++++++ arch/powerpc/include/asm/mman.h | 9 +++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1f48bbfb3ce9..65bed1fdeaad 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -860,6 +860,18 @@ config PPC_SUBPAGE_PROT If unsure, say N here. +config PPC_PROT_SAO_LPAR + bool "Support PROT_SAO mappings in LPARs" + depends on PPC_BOOK3S_64 + help + This option adds support for PROT_SAO mappings from userspace + inside LPARs on supported CPUs. + + This may cause issues when performing guest migration from + a CPU that supports SAO to one that does not. + + If unsure, say N here. + config PPC_COPRO_BASE bool diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index 4ba303ea27f5..7cb6d18f5cd6 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -40,8 +40,13 @@ static inline bool arch_validate_prot(unsigned long prot, unsigned long addr) { if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO)) return false; - if ((prot & PROT_SAO) && !cpu_has_feature(CPU_FTR_SAO)) - return false; + if (prot & PROT_SAO) { + if (!cpu_has_feature(CPU_FTR_SAO)) + return false; + if (firmware_has_feature(FW_FEATURE_LPAR) && + !IS_ENABLED(CONFIG_PPC_PROT_SAO_LPAR)) + return false; + } return true; } #define arch_validate_prot arch_validate_prot From 24ded46f53f954b9cf246c5d4e3770c7a8aa84ce Mon Sep 17 00:00:00 2001 From: Shawn Anastasio Date: Fri, 21 Aug 2020 13:55:58 -0500 Subject: [PATCH 03/18] selftests/powerpc: Update PROT_SAO test to skip ISA 3.1 Since SAO support was removed from ISA 3.1, skip the prot_sao test if PPC_FEATURE2_ARCH_3_1 is set. Signed-off-by: Shawn Anastasio Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200821185558.35561-4-shawn@anastas.io --- tools/testing/selftests/powerpc/mm/prot_sao.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c index e2eed65b7735..e0cf8ebbf8cd 100644 --- a/tools/testing/selftests/powerpc/mm/prot_sao.c +++ b/tools/testing/selftests/powerpc/mm/prot_sao.c @@ -18,8 +18,9 @@ int test_prot_sao(void) { char *p; - /* 2.06 or later should support SAO */ - SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + /* SAO was introduced in 2.06 and removed in 3.1 */ + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06) || + have_hwcap2(PPC_FEATURE2_ARCH_3_1)); /* * Ensure we can ask for PROT_SAO. From 4d618b9f3fcab84e9ec28c180de46fb2c929d096 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 21 Aug 2020 20:49:10 +1000 Subject: [PATCH 04/18] video: fbdev: controlfb: Fix build for COMPILE_TEST=y && PPC_PMAC=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The build is currently broken, if COMPILE_TEST=y and PPC_PMAC=n: linux/drivers/video/fbdev/controlfb.c: In function ‘control_set_hardware’: linux/drivers/video/fbdev/controlfb.c:276:2: error: implicit declaration of function ‘btext_update_display’ 276 | btext_update_display(p->frame_buffer_phys + CTRLFB_OFF, | ^~~~~~~~~~~~~~~~~~~~ Fix it by including btext.h whenever CONFIG_BOOTX_TEXT is enabled. Fixes: a07a63b0e24d ("video: fbdev: controlfb: add COMPILE_TEST support") Signed-off-by: Michael Ellerman Acked-by: Bartlomiej Zolnierkiewicz Link: https://lore.kernel.org/r/20200821104910.3363818-1-mpe@ellerman.id.au --- drivers/video/fbdev/controlfb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/fbdev/controlfb.c b/drivers/video/fbdev/controlfb.c index 9c4f1be856ec..547abeb39f87 100644 --- a/drivers/video/fbdev/controlfb.c +++ b/drivers/video/fbdev/controlfb.c @@ -49,6 +49,8 @@ #include #ifdef CONFIG_PPC_PMAC #include +#endif +#ifdef CONFIG_BOOTX_TEXT #include #endif From aa661d7fab436d8a782618b3138da1a84ca28a31 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 23 Aug 2020 17:31:16 -0700 Subject: [PATCH 05/18] Documentation/powerpc: fix malformed table in syscall64-abi Fix malformed table warning in powerpc/syscall64-abi.rst by making two tables and moving the headings. Documentation/powerpc/syscall64-abi.rst:53: WARNING: Malformed table. Text in column margin in table line 2. =========== ============= ======================================== --- For the sc instruction, differences with the ELF ABI --- r0 Volatile (System call number.) r3 Volatile (Parameter 1, and return value.) r4-r8 Volatile (Parameters 2-6.) cr0 Volatile (cr0.SO is the return error condition.) cr1, cr5-7 Nonvolatile lr Nonvolatile --- For the scv 0 instruction, differences with the ELF ABI --- r0 Volatile (System call number.) r3 Volatile (Parameter 1, and return value.) r4-r8 Volatile (Parameters 2-6.) =========== ============= ======================================== Fixes: 7fa95f9adaee ("powerpc/64s: system call support for scv/rfscv instructions") Signed-off-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e06de4d3-a36f-2745-9775-467e125436cc@infradead.org --- Documentation/powerpc/syscall64-abi.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/powerpc/syscall64-abi.rst b/Documentation/powerpc/syscall64-abi.rst index 46caaadbb029..379817ca64d2 100644 --- a/Documentation/powerpc/syscall64-abi.rst +++ b/Documentation/powerpc/syscall64-abi.rst @@ -49,16 +49,18 @@ Register preservation rules Register preservation rules match the ELF ABI calling sequence with the following differences: -=========== ============= ======================================== --- For the sc instruction, differences with the ELF ABI --- +=========== ============= ======================================== r0 Volatile (System call number.) r3 Volatile (Parameter 1, and return value.) r4-r8 Volatile (Parameters 2-6.) cr0 Volatile (cr0.SO is the return error condition.) cr1, cr5-7 Nonvolatile lr Nonvolatile +=========== ============= ======================================== --- For the scv 0 instruction, differences with the ELF ABI --- +=========== ============= ======================================== r0 Volatile (System call number.) r3 Volatile (Parameter 1, and return value.) r4-r8 Volatile (Parameters 2-6.) From e5fe56092e753c50093c60e757561984abff335e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 25 Aug 2020 17:53:09 +1000 Subject: [PATCH 06/18] powerpc/64s: scv entry should set PPR Kernel entry sets PPR to HMT_MEDIUM by convention. The scv entry path missed this. Fixes: 7fa95f9adaee ("powerpc/64s: system call support for scv/rfscv instructions") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200825075309.224184-1-npiggin@gmail.com --- arch/powerpc/kernel/entry_64.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 33a42e42c56f..733e40eba4eb 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -113,6 +113,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) ld r11,exception_marker@toc(r2) std r11,-16(r10) /* "regshere" marker */ +BEGIN_FTR_SECTION + HMT_MEDIUM +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + /* * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which * would clobber syscall parameters. Also we always enter with IRQs From b91eb5182405b01a8aeb42e9b5207831767e97ee Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Aug 2020 19:34:24 +1000 Subject: [PATCH 07/18] powerpc/64s: Fix crash in load_fp_state() due to fpexc_mode The recent commit 01eb01877f33 ("powerpc/64s: Fix restore_math unnecessarily changing MSR") changed some of the handling of floating point/vector restore. In particular it caused current->thread.fpexc_mode to be copied into the current MSR (via msr_check_and_set()), rather than just into regs->msr (which is moved into MSR on return to userspace). This can lead to a crash in the kernel if we take a floating point exception when restoring FPSCR: Oops: Exception in kernel mode, sig: 8 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV Modules linked in: CPU: 3 PID: 101213 Comm: ld64.so.2 Not tainted 5.9.0-rc1-00098-g18445bf405cb-dirty #9 NIP: c00000000000fbb4 LR: c00000000001a7ac CTR: c000000000183570 REGS: c0000016b7cfb3b0 TRAP: 0700 Not tainted (5.9.0-rc1-00098-g18445bf405cb-dirty) MSR: 900000000290b933 CR: 44002444 XER: 00000000 CFAR: c00000000001a7a8 IRQMASK: 1 GPR00: c00000000001ae40 c0000016b7cfb640 c0000000011b7f00 c000001542a0f740 GPR04: c000001542a0f720 c000001542a0eb00 0000000000000900 c000001542a0eb00 GPR08: 000000000000000a 0000000000002000 9000000000009033 0000000000000000 GPR12: 0000000000004000 c0000017ffffd900 0000000000000001 c000000000df5a58 GPR16: c000000000e19c18 c0000000010e1123 0000000000000001 c000000000e1a638 GPR20: 0000000000000000 c0000000044b1d00 0000000000000000 c000001542a0f2a0 GPR24: 00000016c7fe0000 c000001542a0f720 c000000001c93da0 c000000000fe5f28 GPR28: c000001542a0f720 0000000000800000 c0000016b7cfbe90 0000000002802900 NIP load_fp_state+0x4/0x214 LR restore_math+0x17c/0x1f0 Call Trace: 0xc0000016b7cfb680 (unreliable) __switch_to+0x330/0x460 __schedule+0x318/0x920 schedule+0x74/0x140 schedule_timeout+0x318/0x3f0 wait_for_completion+0xc8/0x210 call_usermodehelper_exec+0x234/0x280 do_coredump+0xedc/0x13c0 get_signal+0x1d4/0xbe0 do_notify_resume+0x1a0/0x490 interrupt_exit_user_prepare+0x1c4/0x230 interrupt_return+0x14/0x1c0 Instruction dump: ebe10168 e88101a0 7c8ff120 382101e0 e8010010 7c0803a6 4e800020 790605c4 782905c4 7c0008a8 7c0008a8 c8030200 48000088 c8030000 c8230010 Fix it by only loading the fpexc_mode value into regs->msr. Also add a comment to explain that although VSX is subject to the value of fpexc_mode, we don't have to handle that separately because we only allow VSX to be enabled if FP is also enabled. Fixes: 01eb01877f33 ("powerpc/64s: Fix restore_math unnecessarily changing MSR") Reported-by: Milton Miller Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin Link: https://lore.kernel.org/r/20200825093424.3967813-1-mpe@ellerman.id.au --- arch/powerpc/kernel/process.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 016bd831908e..73a57043ee66 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -548,7 +548,7 @@ void notrace restore_math(struct pt_regs *regs) * are live for the user thread). */ if ((!(msr & MSR_FP)) && should_restore_fp()) - new_msr |= MSR_FP | current->thread.fpexc_mode; + new_msr |= MSR_FP; if ((!(msr & MSR_VEC)) && should_restore_altivec()) new_msr |= MSR_VEC; @@ -559,11 +559,17 @@ void notrace restore_math(struct pt_regs *regs) } if (new_msr) { + unsigned long fpexc_mode = 0; + msr_check_and_set(new_msr); - if (new_msr & MSR_FP) + if (new_msr & MSR_FP) { do_restore_fp(); + // This also covers VSX, because VSX implies FP + fpexc_mode = current->thread.fpexc_mode; + } + if (new_msr & MSR_VEC) do_restore_altivec(); @@ -572,7 +578,7 @@ void notrace restore_math(struct pt_regs *regs) msr_check_and_clear(new_msr); - regs->msr |= new_msr; + regs->msr |= new_msr | fpexc_mode; } } #endif From b460b512417ae9c8b51a3bdcc09020cd6c60ff69 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 2 Jun 2020 12:56:12 +1000 Subject: [PATCH 08/18] powerpc/perf: Fix crashes with generic_compat_pmu & BHRB The bhrb_filter_map ("The Branch History Rolling Buffer") callback is only defined in raw CPUs' power_pmu structs. The "architected" CPUs use generic_compat_pmu, which does not have this callback, and crashes occur if a user tries to enable branch stack for an event. This add a NULL pointer check for bhrb_filter_map() which behaves as if the callback returned an error. This does not add the same check for config_bhrb() as the only caller checks for cpuhw->bhrb_users which remains zero if bhrb_filter_map==0. Fixes: be80e758d0c2 ("powerpc/perf: Add generic compat mode pmu driver") Cc: stable@vger.kernel.org # v5.2+ Signed-off-by: Alexey Kardashevskiy Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200602025612.62707-1-aik@ozlabs.ru --- arch/powerpc/perf/core-book3s.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 93d20e1ed845..08643cba1494 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1557,9 +1557,16 @@ nocheck: ret = 0; out: if (has_branch_stack(event)) { - power_pmu_bhrb_enable(event); - cpuhw->bhrb_filter = ppmu->bhrb_filter_map( - event->attr.branch_sample_type); + u64 bhrb_filter = -1; + + if (ppmu->bhrb_filter_map) + bhrb_filter = ppmu->bhrb_filter_map( + event->attr.branch_sample_type); + + if (bhrb_filter != -1) { + cpuhw->bhrb_filter = bhrb_filter; + power_pmu_bhrb_enable(event); + } } perf_pmu_enable(event->pmu); @@ -1881,7 +1888,6 @@ static int power_pmu_event_init(struct perf_event *event) int n; int err; struct cpu_hw_events *cpuhw; - u64 bhrb_filter; if (!ppmu) return -ENOENT; @@ -1987,7 +1993,10 @@ static int power_pmu_event_init(struct perf_event *event) err = power_check_constraints(cpuhw, events, cflags, n + 1); if (has_branch_stack(event)) { - bhrb_filter = ppmu->bhrb_filter_map( + u64 bhrb_filter = -1; + + if (ppmu->bhrb_filter_map) + bhrb_filter = ppmu->bhrb_filter_map( event->attr.branch_sample_type); if (bhrb_filter == -1) { From 82715a0f332843d3a1830d7ebc9ac7c99a00c880 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 26 Aug 2020 02:40:29 -0400 Subject: [PATCH 09/18] powerpc/perf: Fix reading of MSR[HV/PR] bits in trace-imc IMC trace-mode uses MSR[HV/PR] bits to set the cpumode for the instruction pointer captured in each sample. The bits are fetched from the third double word of the trace record. Reading third double word from IMC trace record should use be64_to_cpu() along with READ_ONCE inorder to fetch correct MSR[HV/PR] bits. Patch addresses this change. Currently we are using PERF_RECORD_MISC_HYPERVISOR as cpumode if MSR HV is 1 and PR is 0 which means the address is from host counter. But using PERF_RECORD_MISC_HYPERVISOR for host counter data will fail to resolve the address -> symbol during "perf report" because perf tools side uses PERF_RECORD_MISC_KERNEL to represent the host counter data. Therefore, fix the trace imc sample data to use PERF_RECORD_MISC_KERNEL as cpumode for host kernel information. Fixes: 77ca3951cc37 ("powerpc/perf: Add kernel support for new MSR[HV PR] bits in trace-imc") Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1598424029-1662-1-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/imc-pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index a45d694a5d5d..62d0b54086f8 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1289,7 +1289,7 @@ static int trace_imc_prepare_sample(struct trace_imc_data *mem, header->misc = 0; if (cpu_has_feature(CPU_FTR_ARCH_31)) { - switch (IMC_TRACE_RECORD_VAL_HVPR(mem->val)) { + switch (IMC_TRACE_RECORD_VAL_HVPR(be64_to_cpu(READ_ONCE(mem->val)))) { case 0:/* when MSR HV and PR not set in the trace-record */ header->misc |= PERF_RECORD_MISC_GUEST_KERNEL; break; @@ -1297,7 +1297,7 @@ static int trace_imc_prepare_sample(struct trace_imc_data *mem, header->misc |= PERF_RECORD_MISC_GUEST_USER; break; case 2: /* MSR HV is 1 and PR is 0 */ - header->misc |= PERF_RECORD_MISC_HYPERVISOR; + header->misc |= PERF_RECORD_MISC_KERNEL; break; case 3: /* MSR HV is 1 and PR is 1 */ header->misc |= PERF_RECORD_MISC_USER; From 16d83a540ca4e7f1ebb2b3756869b77451d31414 Mon Sep 17 00:00:00 2001 From: Pratik Rajesh Sampat Date: Wed, 26 Aug 2020 13:59:18 +0530 Subject: [PATCH 10/18] Revert "powerpc/powernv/idle: Replace CPU feature check with PVR check" cpuidle stop state implementation has minor optimizations for P10 where hardware preserves more SPR registers compared to P9. The current P9 driver works for P10, although does few extra save-restores. P9 driver can provide the required power management features like SMT thread folding and core level power savings on a P10 platform. Until the P10 stop driver is available, revert the commit which allows for only P9 systems to utilize cpuidle and blocks all idle stop states for P10. CPU idle states are enabled and tested on the P10 platform with this fix. This reverts commit 8747bf36f312356f8a295a0c39ff092d65ce75ae. Fixes: 8747bf36f312 ("powerpc/powernv/idle: Replace CPU feature check with PVR check") Signed-off-by: Pratik Rajesh Sampat Reviewed-by: Vaidyanathan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200826082918.89306-1-psampat@linux.ibm.com --- arch/powerpc/platforms/powernv/idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 77513a80cef9..345ab062b21a 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -1223,7 +1223,7 @@ static void __init pnv_probe_idle_states(void) return; } - if (pvr_version_is(PVR_POWER9)) + if (cpu_has_feature(CPU_FTR_ARCH_300)) pnv_power9_idle_init(); for (i = 0; i < nr_pnv_idle_states; i++) From 4a133eb351ccc275683ad49305d0b04dde903733 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 27 Aug 2020 18:30:27 +0000 Subject: [PATCH 11/18] powerpc/32s: Disable VMAP stack which CONFIG_ADB_PMU low_sleep_handler() can't restore the context from virtual stack because the stack can hardly be accessed with MMU OFF. For now, disable VMAP stack when CONFIG_ADB_PMU is selected. Fixes: cd08f109e262 ("powerpc/32s: Enable CONFIG_VMAP_STACK") Cc: stable@vger.kernel.org # v5.6+ Reported-by: Giuseppe Sacco Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ec96c15bfa1a7415ab604ee1c98cd45779c08be0.1598553015.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/Kconfig.cputype | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 87737ec86d39..1dc9d3c81872 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -36,7 +36,7 @@ config PPC_BOOK3S_6xx select PPC_HAVE_PMU_SUPPORT select PPC_HAVE_KUEP select PPC_HAVE_KUAP - select HAVE_ARCH_VMAP_STACK + select HAVE_ARCH_VMAP_STACK if !ADB_PMU config PPC_BOOK3S_601 bool "PowerPC 601" From 103a8542cb35b5130f732d00b0419a594ba1b517 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 28 Aug 2020 15:38:52 +0530 Subject: [PATCH 12/18] powerpc/book3s64/radix: Fix boot failure with large amount of guest memory If the hypervisor doesn't support hugepages, the kernel ends up allocating a large number of page table pages. The early page table allocation was wrongly setting the max memblock limit to ppc64_rma_size with radix translation which resulted in boot failure as shown below. Kernel panic - not syncing: early_alloc_pgtable: Failed to allocate 16777216 bytes align=0x1000000 nid=-1 from=0x0000000000000000 max_addr=0xffffffffffffffff CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-24.9-default+ #2 Call Trace: [c0000000016f3d00] [c0000000007c6470] dump_stack+0xc4/0x114 (unreliable) [c0000000016f3d40] [c00000000014c78c] panic+0x164/0x418 [c0000000016f3dd0] [c000000000098890] early_alloc_pgtable+0xe0/0xec [c0000000016f3e60] [c0000000010a5440] radix__early_init_mmu+0x360/0x4b4 [c0000000016f3ef0] [c000000001099bac] early_init_mmu+0x1c/0x3c [c0000000016f3f10] [c00000000109a320] early_setup+0x134/0x170 This was because the kernel was checking for the radix feature before we enable the feature via mmu_features. This resulted in the kernel using hash restrictions on radix. Rework the early init code such that the kernel boot with memblock restrictions as imposed by hash. At that point, the kernel still hasn't finalized the translation the kernel will end up using. We have three different ways of detecting radix. 1. dt_cpu_ftrs_scan -> used only in case of PowerNV 2. ibm,pa-features -> Used when we don't use cpu_dt_ftr_scan 3. CAS -> Where we negotiate with hypervisor about the supported translation. We look at 1 or 2 early in the boot and after that, we look at the CAS vector to finalize the translation the kernel will use. We also support a kernel command line option (disable_radix) to switch to hash. Update the memblock limit after mmu_early_init_devtree() if the kernel is going to use radix translation. This forces some of the memblock allocations we do before mmu_early_init_devtree() to be within the RMA limit. Fixes: 2bfd65e45e87 ("powerpc/mm/radix: Add radix callbacks for early init routines") Reported-by: Shirisha Ganta Signed-off-by: Aneesh Kumar K.V Reviewed-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200828100852.426575-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/mmu.h | 10 +++++----- arch/powerpc/mm/book3s64/radix_pgtable.c | 15 --------------- arch/powerpc/mm/init_64.c | 11 +++++++++-- 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 55442d45c597..b392384a3b15 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -239,14 +239,14 @@ static inline void early_init_mmu_secondary(void) extern void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size); -extern void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, - phys_addr_t first_memblock_size); static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { - if (early_radix_enabled()) - return radix__setup_initial_memory_limit(first_memblock_base, - first_memblock_size); + /* + * Hash has more strict restrictions. At this point we don't + * know which translations we will pick. Hence go with hash + * restrictions. + */ return hash__setup_initial_memory_limit(first_memblock_base, first_memblock_size); } diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 28c784976bed..d5f0c10d752a 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -734,21 +734,6 @@ void radix__mmu_cleanup_all(void) } } -void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, - phys_addr_t first_memblock_size) -{ - /* - * We don't currently support the first MEMBLOCK not mapping 0 - * physical on those processors - */ - BUG_ON(first_memblock_base != 0); - - /* - * Radix mode is not limited by RMA / VRMA addressing. - */ - ppc64_rma_size = ULONG_MAX; -} - #ifdef CONFIG_MEMORY_HOTPLUG static void free_pte_table(pte_t *pte_start, pmd_t *pmd) { diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 02e127fa5777..8459056cce67 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -433,9 +433,16 @@ void __init mmu_early_init_devtree(void) if (!(mfmsr() & MSR_HV)) early_check_vec5(); - if (early_radix_enabled()) + if (early_radix_enabled()) { radix__early_init_devtree(); - else + /* + * We have finalized the translation we are going to use by now. + * Radix mode is not limited by RMA / VRMA addressing. + * Hence don't limit memblock allocations. + */ + ppc64_rma_size = ULONG_MAX; + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); + } else hash__early_init_devtree(); } #endif /* CONFIG_PPC_BOOK3S_64 */ From fc1f178cdb31783ff37296ecae817a1045a1a513 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 1 Sep 2020 22:45:17 +1000 Subject: [PATCH 13/18] selftests/powerpc: Skip PROT_SAO test in guests/LPARS In commit 9b725a90a8f1 ("powerpc/64s: Disallow PROT_SAO in LPARs by default") PROT_SAO was disabled in guests/LPARs by default. So skip the test if we are running in a guest to avoid a spurious failure. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200901124653.523182-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/mm/prot_sao.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c index e0cf8ebbf8cd..30b71b1d78d5 100644 --- a/tools/testing/selftests/powerpc/mm/prot_sao.c +++ b/tools/testing/selftests/powerpc/mm/prot_sao.c @@ -7,6 +7,7 @@ #include #include #include +#include #include @@ -18,9 +19,13 @@ int test_prot_sao(void) { char *p; - /* SAO was introduced in 2.06 and removed in 3.1 */ + /* + * SAO was introduced in 2.06 and removed in 3.1. It's disabled in + * guests/LPARs by default, so also skip if we are running in a guest. + */ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06) || - have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + have_hwcap2(PPC_FEATURE2_ARCH_3_1) || + access("/proc/device-tree/rtas/ibm,hypertas-functions", F_OK) == 0); /* * Ensure we can ask for PROT_SAO. From 675bceb097e6fb9769309093ec6f3d33a2fed9cc Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 2 Sep 2020 09:31:22 +0530 Subject: [PATCH 14/18] powerpc/mm: Remove DEBUG_VM_PGTABLE support on powerpc The test is broken w.r.t page table update rules and results in kernel crash as below. Disable the support until we get the tests updated. [ 21.083519] kernel BUG at arch/powerpc/mm/pgtable.c:304! cpu 0x0: Vector: 700 (Program Check) at [c000000c6d1e76c0] pc: c00000000009a5ec: assert_pte_locked+0x14c/0x380 lr: c0000000005eeeec: pte_update+0x11c/0x190 sp: c000000c6d1e7950 msr: 8000000002029033 current = 0xc000000c6d172c80 paca = 0xc000000003ba0000 irqmask: 0x03 irq_happened: 0x01 pid = 1, comm = swapper/0 kernel BUG at arch/powerpc/mm/pgtable.c:304! [link register ] c0000000005eeeec pte_update+0x11c/0x190 [c000000c6d1e7950] 0000000000000001 (unreliable) [c000000c6d1e79b0] c0000000005eee14 pte_update+0x44/0x190 [c000000c6d1e7a10] c000000001a2ca9c pte_advanced_tests+0x160/0x3d8 [c000000c6d1e7ab0] c000000001a2d4fc debug_vm_pgtable+0x7e8/0x1338 [c000000c6d1e7ba0] c0000000000116ec do_one_initcall+0xac/0x5f0 [c000000c6d1e7c80] c0000000019e4fac kernel_init_freeable+0x4dc/0x5a4 [c000000c6d1e7db0] c000000000012474 kernel_init+0x24/0x160 [c000000c6d1e7e20] c00000000000cbd0 ret_from_kernel_thread+0x5c/0x6c With DEBUG_VM disabled [ 20.530152] BUG: Kernel NULL pointer dereference on read at 0x00000000 [ 20.530183] Faulting instruction address: 0xc0000000000df330 cpu 0x33: Vector: 380 (Data SLB Access) at [c000000c6d19f700] pc: c0000000000df330: memset+0x68/0x104 lr: c00000000009f6d8: hash__pmdp_huge_get_and_clear+0xe8/0x1b0 sp: c000000c6d19f990 msr: 8000000002009033 dar: 0 current = 0xc000000c6d177480 paca = 0xc00000001ec4f400 irqmask: 0x03 irq_happened: 0x01 pid = 1, comm = swapper/0 [link register ] c00000000009f6d8 hash__pmdp_huge_get_and_clear+0xe8/0x1b0 [c000000c6d19f990] c00000000009f748 hash__pmdp_huge_get_and_clear+0x158/0x1b0 (unreliable) [c000000c6d19fa10] c0000000019ebf30 pmd_advanced_tests+0x1f0/0x378 [c000000c6d19fab0] c0000000019ed088 debug_vm_pgtable+0x79c/0x1244 [c000000c6d19fba0] c0000000000116ec do_one_initcall+0xac/0x5f0 [c000000c6d19fc80] c0000000019a4fac kernel_init_freeable+0x4dc/0x5a4 [c000000c6d19fdb0] c000000000012474 kernel_init+0x24/0x160 [c000000c6d19fe20] c00000000000cbd0 ret_from_kernel_thread+0x5c/0x6c 33:mon> Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200902040122.136414-1-aneesh.kumar@linux.ibm.com --- Documentation/features/debug/debug-vm-pgtable/arch-support.txt | 2 +- arch/powerpc/Kconfig | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt index 53da483c8326..1c49723e7534 100644 --- a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt +++ b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt @@ -22,7 +22,7 @@ | nios2: | TODO | | openrisc: | TODO | | parisc: | TODO | - | powerpc: | ok | + | powerpc: | TODO | | riscv: | ok | | s390: | ok | | sh: | TODO | diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 65bed1fdeaad..787e829b6f25 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -116,7 +116,6 @@ config PPC # select ARCH_32BIT_OFF_T if PPC32 select ARCH_HAS_DEBUG_VIRTUAL - select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE From 4c62285439f80f8996c38e0bda79b1125a192365 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 2 Sep 2020 22:14:39 +1000 Subject: [PATCH 15/18] Revert "powerpc/build: vdso linker warning for orphan sections" This reverts commit f2af201002a8bc22500c04cc474ea480bf361351. It added a usage of cc-ldoption, but cc-ldoption was removed in commit 055efab3120b ("kbuild: drop support for cc-ldoption"). Reported-by: Nick Desaulniers Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/vdso32/Makefile | 2 +- arch/powerpc/kernel/vdso32/vdso32.lds.S | 1 - arch/powerpc/kernel/vdso64/Makefile | 2 +- arch/powerpc/kernel/vdso64/vdso64.lds.S | 3 +-- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 87ab1152d5ce..e147bbdc12cd 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -50,7 +50,7 @@ $(obj-vdso32): %.o: %.S FORCE # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn) -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) + cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 4c985467a668..5206c2eb2a1d 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -111,7 +111,6 @@ SECTIONS *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) - *(.glink .iplt .plt .rela*) } } diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 38c317f25141..32ebb3522ea1 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -34,7 +34,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE # actual build commands quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn) + cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 4e3a8d4ee614..256fb9720298 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -30,7 +30,7 @@ SECTIONS . = ALIGN(16); .text : { *(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*) - *(.sfpr) + *(.sfpr .glink) } :text PROVIDE(__etext = .); PROVIDE(_etext = .); @@ -111,7 +111,6 @@ SECTIONS *(.branch_lt) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) - *(.glink .iplt .plt .rela*) } } From 437ef802e0adc9f162a95213a3488e8646e5fc03 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 8 Sep 2020 11:51:06 +1000 Subject: [PATCH 16/18] powerpc/dma: Fix dma_map_ops::get_required_mask There are 2 problems with it: 1. "<" vs expected "<<" 2. the shift number is an IOMMU page number mask, not an address mask as the IOMMU page shift is missing. This did not hit us before f1565c24b596 ("powerpc: use the generic dma_ops_bypass mode") because we had additional code to handle bypass mask so this chunk (almost?) never executed.However there were reports that aacraid does not work with "iommu=nobypass". After f1565c24b596, aacraid (and probably others which call dma_get_required_mask() before setting the mask) was unable to enable 64bit DMA and fall back to using IOMMU which was known not to work, one of the problems is double free of an IOMMU page. This fixes DMA for aacraid, both with and without "iommu=nobypass" in the kernel command line. Verified with "stress-ng -d 4". Fixes: 6a5c7be5e484 ("powerpc: Override dma_get_required_mask by platform hook and ops") Cc: stable@vger.kernel.org # v3.2+ Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200908015106.79661-1-aik@ozlabs.ru --- arch/powerpc/kernel/dma-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 569fecd7b5b2..9053fc9d20c7 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -120,7 +120,8 @@ u64 dma_iommu_get_required_mask(struct device *dev) if (!tbl) return 0; - mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1); + mask = 1ULL << (fls_long(tbl->it_offset + tbl->it_size) + + tbl->it_page_shift - 1); mask += mask - 1; return mask; From 1d3ee7df009a46440c58508b8819213c09503acd Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Thu, 3 Sep 2020 14:57:27 +0530 Subject: [PATCH 17/18] cpuidle: pseries: Fix CEDE latency conversion from tb to us Commit d947fb4c965c ("cpuidle: pseries: Fixup exit latency for CEDE(0)") sets the exit latency of CEDE(0) based on the latency values of the Extended CEDE states advertised by the platform. The values advertised by the platform are in timebase ticks. However the cpuidle framework requires the latency values in microseconds. If the tb-ticks value advertised by the platform correspond to a value smaller than 1us, during the conversion from tb-ticks to microseconds, in the current code, the result becomes zero. This is incorrect as it puts a CEDE state on par with the snooze state. This patch fixes this by rounding up the result obtained while converting the latency value from tb-ticks to microseconds. It also prints a warning in case we discover an extended-cede state with wakeup latency to be 0. In such a case, ensure that CEDE(0) has a non-zero wakeup latency. Fixes: d947fb4c965c ("cpuidle: pseries: Fixup exit latency for CEDE(0)") Signed-off-by: Gautham R. Shenoy Reviewed-by: Vaidyanathan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1599125247-28488-1-git-send-email-ego@linux.vnet.ibm.com --- drivers/cpuidle/cpuidle-pseries.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index ff6d99e923a4..a2b5c6f60cf0 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -361,7 +361,10 @@ static void __init fixup_cede0_latency(void) for (i = 0; i < nr_xcede_records; i++) { struct xcede_latency_record *record = &payload->records[i]; u64 latency_tb = be64_to_cpu(record->latency_ticks); - u64 latency_us = tb_to_ns(latency_tb) / NSEC_PER_USEC; + u64 latency_us = DIV_ROUND_UP_ULL(tb_to_ns(latency_tb), NSEC_PER_USEC); + + if (latency_us == 0) + pr_warn("cpuidle: xcede record %d has an unrealistic latency of 0us.\n", i); if (latency_us < min_latency_us) min_latency_us = latency_us; @@ -378,10 +381,14 @@ static void __init fixup_cede0_latency(void) * Perform the fix-up. */ if (min_latency_us < dedicated_states[1].exit_latency) { - u64 cede0_latency = min_latency_us - 1; + /* + * We set a minimum of 1us wakeup latency for cede0 to + * distinguish it from snooze + */ + u64 cede0_latency = 1; - if (cede0_latency <= 0) - cede0_latency = min_latency_us; + if (min_latency_us > cede0_latency) + cede0_latency = min_latency_us - 1; dedicated_states[1].exit_latency = cede0_latency; dedicated_states[1].target_residency = 10 * (cede0_latency); From 0460534b532e5518c657c7d6492b9337d975eaa3 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 7 Sep 2020 16:35:40 +0530 Subject: [PATCH 18/18] powerpc/papr_scm: Limit the readability of 'perf_stats' sysfs attribute The newly introduced 'perf_stats' attribute uses the default access mode of 0444, allowing non-root users to access performance stats of an nvdimm and potentially force the kernel into issuing a large number of expensive hypercalls. Since the information exposed by this attribute cannot be cached it is better to ward off access to this attribute from users who don't need to access to these performance statistics. Hence update the access mode of 'perf_stats' attribute to be only readable by root users. Fixes: 2d02bf835e57 ("powerpc/papr_scm: Fetch nvdimm performance stats from PHYP") Reported-by: Aneesh Kumar K.V Signed-off-by: Vaibhav Jain Reviewed-by: Ira Weiny Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200907110540.21349-1-vaibhav@linux.ibm.com --- arch/powerpc/platforms/pseries/papr_scm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index f439f0dfea7d..a88a707a608a 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -822,7 +822,7 @@ free_stats: kfree(stats); return rc ? rc : seq_buf_used(&s); } -DEVICE_ATTR_RO(perf_stats); +DEVICE_ATTR_ADMIN_RO(perf_stats); static ssize_t flags_show(struct device *dev, struct device_attribute *attr, char *buf)