From 90aff8d091c9c1fa46df45ddd8e73e4c6b6e2b0b Mon Sep 17 00:00:00 2001 From: Peng Donglin Date: Wed, 13 Jun 2018 09:11:56 +0800 Subject: [PATCH 01/90] ARM64: dump: Convert to use DEFINE_SHOW_ATTRIBUTE macro Use DEFINE_SHOW_ATTRIBUTE macro to simplify the code. Signed-off-by: Peng Donglin Signed-off-by: Will Deacon --- arch/arm64/mm/ptdump_debugfs.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c index 02b18f8b2905..24d786fc3a4c 100644 --- a/arch/arm64/mm/ptdump_debugfs.c +++ b/arch/arm64/mm/ptdump_debugfs.c @@ -10,18 +10,7 @@ static int ptdump_show(struct seq_file *m, void *v) ptdump_walk_pgd(m, info); return 0; } - -static int ptdump_open(struct inode *inode, struct file *file) -{ - return single_open(file, ptdump_show, inode->i_private); -} - -static const struct file_operations ptdump_fops = { - .open = ptdump_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(ptdump); int ptdump_debugfs_register(struct ptdump_info *info, const char *name) { From 1201a5a25cf0f7d8019d106a81c64f3a7397d17c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 29 Jun 2018 11:45:36 -0700 Subject: [PATCH 02/90] perf/arm-cci: Remove VLA usage In the quest to remove all stack VLA usage from the kernel[1], this removes the VLA in favor of a maximum size and adds a sanity check at registration time. The sizes are all explicitly enumerated already, so this just collects them into macros. [1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com Signed-off-by: Kees Cook Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 0d09d8e669cd..1bfeb160c5b1 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -53,6 +53,16 @@ enum { CCI_IF_MAX, }; +#define NUM_HW_CNTRS_CII_4XX 4 +#define NUM_HW_CNTRS_CII_5XX 8 +#define NUM_HW_CNTRS_MAX NUM_HW_CNTRS_CII_5XX + +#define FIXED_HW_CNTRS_CII_4XX 1 +#define FIXED_HW_CNTRS_CII_5XX 0 +#define FIXED_HW_CNTRS_MAX FIXED_HW_CNTRS_CII_4XX + +#define HW_CNTRS_MAX (NUM_HW_CNTRS_MAX + FIXED_HW_CNTRS_MAX) + struct event_range { u32 min; u32 max; @@ -633,8 +643,7 @@ static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu) { int i; struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; - - DECLARE_BITMAP(mask, cci_pmu->num_cntrs); + DECLARE_BITMAP(mask, HW_CNTRS_MAX); bitmap_zero(mask, cci_pmu->num_cntrs); for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) { @@ -940,7 +949,7 @@ static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) { int i; - DECLARE_BITMAP(saved_mask, cci_pmu->num_cntrs); + DECLARE_BITMAP(saved_mask, HW_CNTRS_MAX); bitmap_zero(saved_mask, cci_pmu->num_cntrs); pmu_save_counters(cci_pmu, saved_mask); @@ -1245,7 +1254,7 @@ static int validate_group(struct perf_event *event) { struct perf_event *sibling, *leader = event->group_leader; struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); - unsigned long mask[BITS_TO_LONGS(cci_pmu->num_cntrs)]; + unsigned long mask[BITS_TO_LONGS(HW_CNTRS_MAX)]; struct cci_pmu_hw_events fake_pmu = { /* * Initialise the fake PMU. We only need to populate the @@ -1403,6 +1412,11 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) char *name = model->name; u32 num_cntrs; + if (WARN_ON(model->num_hw_cntrs > NUM_HW_CNTRS_MAX)) + return -EINVAL; + if (WARN_ON(model->fixed_hw_cntrs > FIXED_HW_CNTRS_MAX)) + return -EINVAL; + pmu_event_attr_group.attrs = model->event_attrs; pmu_format_attr_group.attrs = model->format_attrs; @@ -1455,8 +1469,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { #ifdef CONFIG_ARM_CCI400_PMU [CCI400_R0] = { .name = "CCI_400", - .fixed_hw_cntrs = 1, /* Cycle counter */ - .num_hw_cntrs = 4, + .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_4XX, /* Cycle counter */ + .num_hw_cntrs = NUM_HW_CNTRS_CII_4XX, .cntr_size = SZ_4K, .format_attrs = cci400_pmu_format_attrs, .event_attrs = cci400_r0_pmu_event_attrs, @@ -1475,8 +1489,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { }, [CCI400_R1] = { .name = "CCI_400_r1", - .fixed_hw_cntrs = 1, /* Cycle counter */ - .num_hw_cntrs = 4, + .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_4XX, /* Cycle counter */ + .num_hw_cntrs = NUM_HW_CNTRS_CII_4XX, .cntr_size = SZ_4K, .format_attrs = cci400_pmu_format_attrs, .event_attrs = cci400_r1_pmu_event_attrs, @@ -1497,8 +1511,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { #ifdef CONFIG_ARM_CCI5xx_PMU [CCI500_R0] = { .name = "CCI_500", - .fixed_hw_cntrs = 0, - .num_hw_cntrs = 8, + .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_5XX, + .num_hw_cntrs = NUM_HW_CNTRS_CII_5XX, .cntr_size = SZ_64K, .format_attrs = cci5xx_pmu_format_attrs, .event_attrs = cci5xx_pmu_event_attrs, @@ -1521,8 +1535,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { }, [CCI550_R0] = { .name = "CCI_550", - .fixed_hw_cntrs = 0, - .num_hw_cntrs = 8, + .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_5XX, + .num_hw_cntrs = NUM_HW_CNTRS_CII_5XX, .cntr_size = SZ_64K, .format_attrs = cci5xx_pmu_format_attrs, .event_attrs = cci5xx_pmu_event_attrs, From 59b62e7ad0874681dc4f84cac90cc991dc265809 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 4 Jul 2018 11:50:50 +0100 Subject: [PATCH 03/90] drivers/perf: Initialise return value in armpmu_request_irqs() If a PMU doesn't have any IRQs, we should return 0 from armpmu_request_irqs(), rather than uninitialised stack. Signed-off-by: Will Deacon --- drivers/perf/arm_pmu_platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 971ff336494a..96075cecb0ae 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -160,7 +160,7 @@ static int pmu_parse_irqs(struct arm_pmu *pmu) static int armpmu_request_irqs(struct arm_pmu *armpmu) { struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; - int cpu, err; + int cpu, err = 0; for_each_cpu(cpu, &armpmu->supported_cpus) { int irq = per_cpu(hw_events->irq, cpu); From 7e7df71fd57ff2894d96abb0080922bf39460a79 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 14 Jun 2018 14:58:21 -0400 Subject: [PATCH 04/90] arm64: fix infinite stacktrace I've got this infinite stacktrace when debugging another problem: [ 908.795225] INFO: rcu_preempt detected stalls on CPUs/tasks: [ 908.796176] 1-...!: (1 GPs behind) idle=952/1/4611686018427387904 softirq=1462/1462 fqs=355 [ 908.797692] 2-...!: (1 GPs behind) idle=f42/1/4611686018427387904 softirq=1550/1551 fqs=355 [ 908.799189] (detected by 0, t=2109 jiffies, g=130, c=129, q=235) [ 908.800284] Task dump for CPU 1: [ 908.800871] kworker/1:1 R running task 0 32 2 0x00000022 [ 908.802127] Workqueue: writecache-writeabck writecache_writeback [dm_writecache] [ 908.820285] Call trace: [ 908.824785] __switch_to+0x68/0x90 [ 908.837661] 0xfffffe00603afd90 [ 908.844119] 0xfffffe00603afd90 [ 908.850091] 0xfffffe00603afd90 [ 908.854285] 0xfffffe00603afd90 [ 908.863538] 0xfffffe00603afd90 [ 908.865523] 0xfffffe00603afd90 The machine just locked up and kept on printing the same line over and over again. This patch fixes it. Signed-off-by: Mikulas Patocka Signed-off-by: Will Deacon --- arch/arm64/kernel/stacktrace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index d5718a060672..e160ca123da3 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -56,6 +56,9 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); + if (frame->fp <= fp) + return -EINVAL; + #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (tsk->ret_stack && (frame->pc == (unsigned long)return_to_handler)) { From 76f4e2da45b44bf70f61c28fcbc91668492463e0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 2 Jul 2018 14:17:53 +0100 Subject: [PATCH 05/90] arm64: kexec: always reset to EL2 if present Currently machine_kexec() doesn't reset to EL2 in the case of a crashdump kernel. This leaves potentially dodgy state active at EL2, and means that if the crashdump kernel attempts to online secondary CPUs, these will be booted as mismatched ELs. Let's reset to EL2, as we do in all other cases, and simplify things. If EL2 state is corrupt, things are already sufficiently bad that kdump is unlikely to work, and it's best-effort regardless. Cc: Catalin Marinas Cc: James Morse Acked-by: Marc Zyngier Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu-reset.h | 9 +++++---- arch/arm64/kernel/machine_kexec.c | 3 +-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h index 6c2b1b4f57c9..fad90e4935fb 100644 --- a/arch/arm64/kernel/cpu-reset.h +++ b/arch/arm64/kernel/cpu-reset.h @@ -16,13 +16,14 @@ void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry, unsigned long arg0, unsigned long arg1, unsigned long arg2); -static inline void __noreturn cpu_soft_restart(unsigned long el2_switch, - unsigned long entry, unsigned long arg0, unsigned long arg1, - unsigned long arg2) +static inline void __noreturn cpu_soft_restart(unsigned long entry, + unsigned long arg0, + unsigned long arg1, + unsigned long arg2) { typeof(__cpu_soft_restart) *restart; - el2_switch = el2_switch && !is_kernel_in_hyp_mode() && + unsigned long el2_switch = !is_kernel_in_hyp_mode() && is_hyp_mode_available(); restart = (void *)__pa_symbol(__cpu_soft_restart); diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index f76ea92dff91..f62effc6e064 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -207,8 +207,7 @@ void machine_kexec(struct kimage *kimage) * relocation is complete. */ - cpu_soft_restart(kimage != kexec_crash_image, - reboot_code_buffer_phys, kimage->head, kimage->start, 0); + cpu_soft_restart(reboot_code_buffer_phys, kimage->head, kimage->start, 0); BUG(); /* Should never get here. */ } From 598865c5f32d6e11e99f2aac07348e5fd17cdc03 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 31 Jan 2018 15:55:24 +0000 Subject: [PATCH 06/90] arm64: barrier: Implement smp_cond_load_relaxed We can provide an implementation of smp_cond_load_relaxed using READ_ONCE and __cmpwait_relaxed. Signed-off-by: Will Deacon --- arch/arm64/include/asm/barrier.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index f11518af96a9..822a9192c551 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -128,6 +128,19 @@ do { \ __u.__val; \ }) +#define smp_cond_load_relaxed(ptr, cond_expr) \ +({ \ + typeof(ptr) __PTR = (ptr); \ + typeof(*ptr) VAL; \ + for (;;) { \ + VAL = READ_ONCE(*__PTR); \ + if (cond_expr) \ + break; \ + __cmpwait_relaxed(__PTR, VAL); \ + } \ + VAL; \ +}) + #define smp_cond_load_acquire(ptr, cond_expr) \ ({ \ typeof(ptr) __PTR = (ptr); \ From c11090474d70590170cf5fa6afe85864ab494b37 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 13 Mar 2018 20:45:45 +0000 Subject: [PATCH 07/90] arm64: locking: Replace ticket lock implementation with qspinlock It's fair to say that our ticket lock has served us well over time, but it's time to bite the bullet and start using the generic qspinlock code so we can make use of explicit MCS queuing and potentially better PV performance in future. Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/Kbuild | 1 + arch/arm64/include/asm/spinlock.h | 117 +----------------------- arch/arm64/include/asm/spinlock_types.h | 17 +--- 4 files changed, 4 insertions(+), 132 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 42c090cf0292..facd19625563 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -44,6 +44,7 @@ config ARM64 select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPT select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_QUEUED_RWLOCKS + select ARCH_USE_QUEUED_SPINLOCKS select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 3a9b84d39d71..6cd5d77b6b44 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -16,6 +16,7 @@ generic-y += mm-arch-hooks.h generic-y += msi.h generic-y += preempt.h generic-y += qrwlock.h +generic-y += qspinlock.h generic-y += rwsem.h generic-y += segment.h generic-y += serial.h diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 26c5bd7d88d8..38116008d18b 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -16,123 +16,8 @@ #ifndef __ASM_SPINLOCK_H #define __ASM_SPINLOCK_H -#include -#include -#include - -/* - * Spinlock implementation. - * - * The memory barriers are implicit with the load-acquire and store-release - * instructions. - */ - -static inline void arch_spin_lock(arch_spinlock_t *lock) -{ - unsigned int tmp; - arch_spinlock_t lockval, newval; - - asm volatile( - /* Atomically increment the next ticket. */ - ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ -" prfm pstl1strm, %3\n" -"1: ldaxr %w0, %3\n" -" add %w1, %w0, %w5\n" -" stxr %w2, %w1, %3\n" -" cbnz %w2, 1b\n", - /* LSE atomics */ -" mov %w2, %w5\n" -" ldadda %w2, %w0, %3\n" - __nops(3) - ) - - /* Did we get the lock? */ -" eor %w1, %w0, %w0, ror #16\n" -" cbz %w1, 3f\n" - /* - * No: spin on the owner. Send a local event to avoid missing an - * unlock before the exclusive load. - */ -" sevl\n" -"2: wfe\n" -" ldaxrh %w2, %4\n" -" eor %w1, %w2, %w0, lsr #16\n" -" cbnz %w1, 2b\n" - /* We got the lock. Critical section starts here. */ -"3:" - : "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock) - : "Q" (lock->owner), "I" (1 << TICKET_SHIFT) - : "memory"); -} - -static inline int arch_spin_trylock(arch_spinlock_t *lock) -{ - unsigned int tmp; - arch_spinlock_t lockval; - - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " prfm pstl1strm, %2\n" - "1: ldaxr %w0, %2\n" - " eor %w1, %w0, %w0, ror #16\n" - " cbnz %w1, 2f\n" - " add %w0, %w0, %3\n" - " stxr %w1, %w0, %2\n" - " cbnz %w1, 1b\n" - "2:", - /* LSE atomics */ - " ldr %w0, %2\n" - " eor %w1, %w0, %w0, ror #16\n" - " cbnz %w1, 1f\n" - " add %w1, %w0, %3\n" - " casa %w0, %w1, %2\n" - " sub %w1, %w1, %3\n" - " eor %w1, %w1, %w0\n" - "1:") - : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) - : "I" (1 << TICKET_SHIFT) - : "memory"); - - return !tmp; -} - -static inline void arch_spin_unlock(arch_spinlock_t *lock) -{ - unsigned long tmp; - - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " ldrh %w1, %0\n" - " add %w1, %w1, #1\n" - " stlrh %w1, %0", - /* LSE atomics */ - " mov %w1, #1\n" - " staddlh %w1, %0\n" - __nops(1)) - : "=Q" (lock->owner), "=&r" (tmp) - : - : "memory"); -} - -static inline int arch_spin_value_unlocked(arch_spinlock_t lock) -{ - return lock.owner == lock.next; -} - -static inline int arch_spin_is_locked(arch_spinlock_t *lock) -{ - return !arch_spin_value_unlocked(READ_ONCE(*lock)); -} - -static inline int arch_spin_is_contended(arch_spinlock_t *lock) -{ - arch_spinlock_t lockval = READ_ONCE(*lock); - return (lockval.next - lockval.owner) > 1; -} -#define arch_spin_is_contended arch_spin_is_contended - #include +#include /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h index 6b856012c51b..a157ff465e27 100644 --- a/arch/arm64/include/asm/spinlock_types.h +++ b/arch/arm64/include/asm/spinlock_types.h @@ -20,22 +20,7 @@ # error "please don't include this file directly" #endif -#include - -#define TICKET_SHIFT 16 - -typedef struct { -#ifdef __AARCH64EB__ - u16 next; - u16 owner; -#else - u16 owner; - u16 next; -#endif -} __aligned(4) arch_spinlock_t; - -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 , 0 } - +#include #include #endif From 5d168964aece0b4a41269839c613683c5d7e0fb2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 13 Mar 2018 21:17:01 +0000 Subject: [PATCH 08/90] arm64: kconfig: Ensure spinlock fastpaths are inlined if !PREEMPT When running with CONFIG_PREEMPT=n, the spinlock fastpaths fit inside 64 bytes, which typically coincides with the L1 I-cache line size. Inline the spinlock fastpaths, like we do already for rwlocks. Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index facd19625563..476de9b1d239 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -42,6 +42,16 @@ config ARM64 select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPT select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPT select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPT + select ARCH_INLINE_SPIN_TRYLOCK if !PREEMPT + select ARCH_INLINE_SPIN_TRYLOCK_BH if !PREEMPT + select ARCH_INLINE_SPIN_LOCK if !PREEMPT + select ARCH_INLINE_SPIN_LOCK_BH if !PREEMPT + select ARCH_INLINE_SPIN_LOCK_IRQ if !PREEMPT + select ARCH_INLINE_SPIN_LOCK_IRQSAVE if !PREEMPT + select ARCH_INLINE_SPIN_UNLOCK if !PREEMPT + select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPT + select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPT + select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPT select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS From 4c4a39dd5fe2d13e2d2fa5fceb8ef95d19fc389a Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 4 Jul 2018 23:07:45 +0100 Subject: [PATCH 09/90] arm64: Fix mismatched cache line size detection If there is a mismatch in the I/D min line size, we must always use the system wide safe value both in applications and in the kernel, while performing cache operations. However, we have been checking more bits than just the min line sizes, which triggers false negatives. We may need to trap the user accesses in such cases, but not necessarily patch the kernel. This patch fixes the check to do the right thing as advertised. A new capability will be added to check mismatches in other fields and ensure we trap the CTR accesses. Fixes: be68a8aaf925 ("arm64: cpufeature: Fix CTR_EL0 field definitions") Cc: Cc: Mark Rutland Cc: Catalin Marinas Reported-by: Will Deacon Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cache.h | 4 ++++ arch/arm64/kernel/cpu_errata.c | 6 ++++-- arch/arm64/kernel/cpufeature.c | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 5df5cfe1c143..5ee5bca8c24b 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -21,12 +21,16 @@ #define CTR_L1IP_SHIFT 14 #define CTR_L1IP_MASK 3 #define CTR_DMINLINE_SHIFT 16 +#define CTR_IMINLINE_SHIFT 0 #define CTR_ERG_SHIFT 20 #define CTR_CWG_SHIFT 24 #define CTR_CWG_MASK 15 #define CTR_IDC_SHIFT 28 #define CTR_DIC_SHIFT 29 +#define CTR_CACHE_MINLINE_MASK \ + (0xf << CTR_DMINLINE_SHIFT | 0xf << CTR_IMINLINE_SHIFT) + #define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) #define ICACHE_POLICY_VPIPT 0 diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 1d2b6d768efe..5d1fa928ea4b 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -68,9 +68,11 @@ static bool has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry, int scope) { + u64 mask = CTR_CACHE_MINLINE_MASK; + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - return (read_cpuid_cachetype() & arm64_ftr_reg_ctrel0.strict_mask) != - (arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask); + return (read_cpuid_cachetype() & mask) != + (arm64_ftr_reg_ctrel0.sys_val & mask); } static void diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f24892a40d2c..25d5cef00333 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -214,7 +214,7 @@ static const struct arm64_ftr_bits ftr_ctr[] = { * If we have differing I-cache policies, report it as the weakest - VIPT. */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT), /* L1Ip */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0), ARM64_FTR_END, }; From 314d53d297980676011e6fd83dac60db4a01dc70 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 4 Jul 2018 23:07:46 +0100 Subject: [PATCH 10/90] arm64: Handle mismatched cache type Track mismatches in the cache type register (CTR_EL0), other than the D/I min line sizes and trap user accesses if there are any. Fixes: be68a8aaf925 ("arm64: cpufeature: Fix CTR_EL0 field definitions") Cc: Cc: Mark Rutland Cc: Will Deacon Cc: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/kernel/cpu_errata.c | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 8a699c708fc9..be3bf3d08916 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -49,7 +49,8 @@ #define ARM64_HAS_CACHE_DIC 28 #define ARM64_HW_DBM 29 #define ARM64_SSBD 30 +#define ARM64_MISMATCHED_CACHE_TYPE 31 -#define ARM64_NCAPS 31 +#define ARM64_NCAPS 32 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 5d1fa928ea4b..5d59ff9a8da9 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -65,11 +65,15 @@ is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope) } static bool -has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry, - int scope) +has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, + int scope) { u64 mask = CTR_CACHE_MINLINE_MASK; + /* Skip matching the min line sizes for cache type check */ + if (entry->capability == ARM64_MISMATCHED_CACHE_TYPE) + mask ^= arm64_ftr_reg_ctrel0.strict_mask; + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); return (read_cpuid_cachetype() & mask) != (arm64_ftr_reg_ctrel0.sys_val & mask); @@ -615,7 +619,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Mismatched cache line size", .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, - .matches = has_mismatched_cache_line_size, + .matches = has_mismatched_cache_type, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .cpu_enable = cpu_enable_trap_ctr_access, + }, + { + .desc = "Mismatched cache type", + .capability = ARM64_MISMATCHED_CACHE_TYPE, + .matches = has_mismatched_cache_type, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .cpu_enable = cpu_enable_trap_ctr_access, }, From 25086263425641c74123f9387426c23072b299ea Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Jul 2018 15:16:48 +0100 Subject: [PATCH 11/90] arm64: add PSR_AA32_* definitions The AArch32 CPSR/SPSR format is *almost* identical to the AArch64 SPSR_ELx format for exceptions taken from AArch32, but the two have diverged with the addition of DIT, and we need to treat the two as logically distinct. This patch adds new definitions for the SPSR_ELx format for exceptions taken from AArch32, with a consistent PSR_AA32_ prefix. The existing COMPAT_PSR_ definitions will be used for the PSR format as seen from AArch32. Definitions of DIT are provided for both, and inline functions are provided to map between the two formats. Note that for SPSR_ELx, the (RES0) J bit has been re-allocated as the DIT bit. Once users of the COMPAT_PSR definitions have been migrated over to the PSR_AA32 definitions, the (majority of) the former will be removed, so no efforts is made to avoid duplication until then. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Christoffer Dall Cc: Marc Zyngier Cc: Suzuki Poulose Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/ptrace.h | 57 ++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 6069d66e0bc2..1b2a253de6a1 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -35,7 +35,37 @@ #define COMPAT_PTRACE_GETHBPREGS 29 #define COMPAT_PTRACE_SETHBPREGS 30 -/* AArch32 CPSR bits */ +/* SPSR_ELx bits for exceptions taken from AArch32 */ +#define PSR_AA32_MODE_MASK 0x0000001f +#define PSR_AA32_MODE_USR 0x00000010 +#define PSR_AA32_MODE_FIQ 0x00000011 +#define PSR_AA32_MODE_IRQ 0x00000012 +#define PSR_AA32_MODE_SVC 0x00000013 +#define PSR_AA32_MODE_ABT 0x00000017 +#define PSR_AA32_MODE_HYP 0x0000001a +#define PSR_AA32_MODE_UND 0x0000001b +#define PSR_AA32_MODE_SYS 0x0000001f +#define PSR_AA32_T_BIT 0x00000020 +#define PSR_AA32_F_BIT 0x00000040 +#define PSR_AA32_I_BIT 0x00000080 +#define PSR_AA32_A_BIT 0x00000100 +#define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_DIT_BIT 0x01000000 +#define PSR_AA32_Q_BIT 0x08000000 +#define PSR_AA32_V_BIT 0x10000000 +#define PSR_AA32_C_BIT 0x20000000 +#define PSR_AA32_Z_BIT 0x40000000 +#define PSR_AA32_N_BIT 0x80000000 +#define PSR_AA32_IT_MASK 0x0600fc00 /* If-Then execution state mask */ +#define PSR_AA32_GE_MASK 0x000f0000 + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define PSR_AA32_ENDSTATE PSR_AA32_E_BIT +#else +#define PSR_AA32_ENDSTATE 0 +#endif + +/* AArch32 CPSR bits, as seen in AArch32 */ #define COMPAT_PSR_MODE_MASK 0x0000001f #define COMPAT_PSR_MODE_USR 0x00000010 #define COMPAT_PSR_MODE_FIQ 0x00000011 @@ -50,6 +80,7 @@ #define COMPAT_PSR_I_BIT 0x00000080 #define COMPAT_PSR_A_BIT 0x00000100 #define COMPAT_PSR_E_BIT 0x00000200 +#define COMPAT_PSR_DIT_BIT 0x00200000 #define COMPAT_PSR_J_BIT 0x01000000 #define COMPAT_PSR_Q_BIT 0x08000000 #define COMPAT_PSR_V_BIT 0x10000000 @@ -111,6 +142,30 @@ #define compat_sp_fiq regs[29] #define compat_lr_fiq regs[30] +static inline unsigned long compat_psr_to_pstate(const unsigned long psr) +{ + unsigned long pstate; + + pstate = psr & ~COMPAT_PSR_DIT_BIT; + + if (psr & COMPAT_PSR_DIT_BIT) + pstate |= PSR_AA32_DIT_BIT; + + return pstate; +} + +static inline unsigned long pstate_to_compat_psr(const unsigned long pstate) +{ + unsigned long psr; + + psr = pstate & ~PSR_AA32_DIT_BIT; + + if (pstate & PSR_AA32_DIT_BIT) + psr |= COMPAT_PSR_DIT_BIT; + + return psr; +} + /* * This struct defines the way the registers are stored on the stack during an * exception. Note that sizeof(struct pt_regs) has to be a multiple of 16 (for From 1265132127b63502d34e0f58c8bdef3a4dc927c2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Jul 2018 15:16:49 +0100 Subject: [PATCH 12/90] arm64: don't zero DIT on signal return Currently valid_user_regs() treats SPSR_ELx.DIT as a RES0 bit, causing it to be zeroed upon exception return, rather than preserved. Thus, code relying on DIT will not function as expected, and may expose an unexpected timing sidechannel. Let's remove DIT from the set of RES0 bits, such that it is preserved. At the same time, the related comment is updated to better describe the situation, and to take into account the most recent documentation of SPSR_ELx, in ARM DDI 0487C.a. Signed-off-by: Mark Rutland Fixes: 7206dc93a58fb764 ("arm64: Expose Arm v8.4 features") Cc: Catalin Marinas Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 5c338ce5a7fa..5df75ab6a3c8 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1659,15 +1659,19 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs) } /* - * Bits which are always architecturally RES0 per ARM DDI 0487A.h + * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487C.a + * We also take into account DIT (bit 24), which is not yet documented, and + * treat PAN and UAO as RES0 bits, as they are meaningless at EL0, and may be + * allocated an EL0 meaning in future. * Userspace cannot use these until they have an architectural meaning. + * Note that this follows the SPSR_ELx format, not the AArch32 PSR format. * We also reserve IL for the kernel; SS is handled dynamically. */ #define SPSR_EL1_AARCH64_RES0_BITS \ - (GENMASK_ULL(63,32) | GENMASK_ULL(27, 22) | GENMASK_ULL(20, 10) | \ - GENMASK_ULL(5, 5)) + (GENMASK_ULL(63,32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \ + GENMASK_ULL(20, 10) | GENMASK_ULL(5, 5)) #define SPSR_EL1_AARCH32_RES0_BITS \ - (GENMASK_ULL(63,32) | GENMASK_ULL(24, 22) | GENMASK_ULL(20,20)) + (GENMASK_ULL(63,32) | GENMASK_ULL(23, 22) | GENMASK_ULL(20,20)) static int valid_compat_regs(struct user_pt_regs *regs) { From 25dc2c80cfa33153057aa94984855acd57adf92a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Jul 2018 15:16:50 +0100 Subject: [PATCH 13/90] arm64: compat: map SPSR_ELx<->PSR for signals The SPSR_ELx format for exceptions taken from AArch32 differs from the AArch32 PSR format. Thus, we must translate between the two when setting up a compat sigframe, or restoring context from a compat sigframe. Signed-off-by: Mark Rutland Fixes: 7206dc93a58fb764 ("arm64: Expose Arm v8.4 features") Cc: Catalin Marinas Cc: Suzuki Poulose Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/signal32.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 77b91f478995..12aec0a66376 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -243,6 +243,7 @@ static int compat_restore_sigframe(struct pt_regs *regs, int err; sigset_t set; struct compat_aux_sigframe __user *aux; + unsigned long psr; err = get_sigset_t(&set, &sf->uc.uc_sigmask); if (err == 0) { @@ -266,7 +267,9 @@ static int compat_restore_sigframe(struct pt_regs *regs, __get_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err); __get_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err); __get_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err); - __get_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); + __get_user_error(psr, &sf->uc.uc_mcontext.arm_cpsr, err); + + regs->pstate = compat_psr_to_pstate(psr); /* * Avoid compat_sys_sigreturn() restarting. @@ -414,6 +417,7 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) { struct compat_aux_sigframe __user *aux; + unsigned long psr = pstate_to_compat_psr(regs->pstate); int err = 0; __put_user_error(regs->regs[0], &sf->uc.uc_mcontext.arm_r0, err); @@ -432,7 +436,7 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf, __put_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err); __put_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err); __put_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err); - __put_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); + __put_user_error(psr, &sf->uc.uc_mcontext.arm_cpsr, err); __put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.trap_no, err); /* set the compat FSR WnR */ From 76fc52bd07d3e9cb708f1a50b60c825c96acd606 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Jul 2018 15:16:51 +0100 Subject: [PATCH 14/90] arm64: ptrace: map SPSR_ELx<->PSR for compat tasks The SPSR_ELx format for exceptions taken from AArch32 is slightly different to the AArch32 PSR format. Map between the two in the compat ptrace code. Signed-off-by: Mark Rutland Fixes: 7206dc93a58fb764 ("arm64: Expose Arm v8.4 features") Cc: Catalin Marinas Cc: Suzuki Poulose Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 5df75ab6a3c8..2c4b09f84dd0 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1076,6 +1076,7 @@ static int compat_gpr_get(struct task_struct *target, break; case 16: reg = task_pt_regs(target)->pstate; + reg = pstate_to_compat_psr(reg); break; case 17: reg = task_pt_regs(target)->orig_x0; @@ -1143,6 +1144,7 @@ static int compat_gpr_set(struct task_struct *target, newregs.pc = reg; break; case 16: + reg = compat_psr_to_pstate(reg); newregs.pstate = reg; break; case 17: From d64567f67835736d65086e9bfc41a19b2863c32e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Jul 2018 15:16:52 +0100 Subject: [PATCH 15/90] arm64: use PSR_AA32 definitions Some code cares about the SPSR_ELx format for exceptions taken from AArch32 to inspect or manipulate the SPSR_ELx value, which is already in the SPSR_ELx format, and not in the AArch32 PSR format. To separate these from cases where we care about the AArch32 PSR format, migrate these cases to use the PSR_AA32_* definitions rather than COMPAT_PSR_*. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/processor.h | 6 +++--- arch/arm64/include/asm/ptrace.h | 2 +- arch/arm64/kernel/armv8_deprecated.c | 24 ++++++++++++------------ arch/arm64/kernel/cpufeature.c | 2 +- arch/arm64/kernel/process.c | 20 ++++++++++---------- arch/arm64/kernel/ptrace.c | 20 ++++++++++---------- arch/arm64/kernel/signal32.c | 10 +++++----- 7 files changed, 42 insertions(+), 42 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index a73ae1e49200..e02612105d78 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -182,12 +182,12 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { start_thread_common(regs, pc); - regs->pstate = COMPAT_PSR_MODE_USR; + regs->pstate = PSR_AA32_MODE_USR; if (pc & 1) - regs->pstate |= COMPAT_PSR_T_BIT; + regs->pstate |= PSR_AA32_T_BIT; #ifdef __AARCH64EB__ - regs->pstate |= COMPAT_PSR_E_BIT; + regs->pstate |= PSR_AA32_E_BIT; #endif regs->compat_sp = sp; diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 1b2a253de6a1..d55237eb89bb 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -211,7 +211,7 @@ static inline void forget_syscall(struct pt_regs *regs) #ifdef CONFIG_COMPAT #define compat_thumb_mode(regs) \ - (((regs)->pstate & COMPAT_PSR_T_BIT)) + (((regs)->pstate & PSR_AA32_T_BIT)) #else #define compat_thumb_mode(regs) (0) #endif diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index d4707abb2f16..d2c0938ccaec 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -441,8 +441,8 @@ static struct undef_hook swp_hooks[] = { { .instr_mask = 0x0fb00ff0, .instr_val = 0x01000090, - .pstate_mask = COMPAT_PSR_MODE_MASK, - .pstate_val = COMPAT_PSR_MODE_USR, + .pstate_mask = PSR_AA32_MODE_MASK, + .pstate_val = PSR_AA32_MODE_USR, .fn = swp_handler }, { } @@ -521,15 +521,15 @@ static struct undef_hook cp15_barrier_hooks[] = { { .instr_mask = 0x0fff0fdf, .instr_val = 0x0e070f9a, - .pstate_mask = COMPAT_PSR_MODE_MASK, - .pstate_val = COMPAT_PSR_MODE_USR, + .pstate_mask = PSR_AA32_MODE_MASK, + .pstate_val = PSR_AA32_MODE_USR, .fn = cp15barrier_handler, }, { .instr_mask = 0x0fff0fff, .instr_val = 0x0e070f95, - .pstate_mask = COMPAT_PSR_MODE_MASK, - .pstate_val = COMPAT_PSR_MODE_USR, + .pstate_mask = PSR_AA32_MODE_MASK, + .pstate_val = PSR_AA32_MODE_USR, .fn = cp15barrier_handler, }, { } @@ -562,10 +562,10 @@ static int compat_setend_handler(struct pt_regs *regs, u32 big_endian) if (big_endian) { insn = "setend be"; - regs->pstate |= COMPAT_PSR_E_BIT; + regs->pstate |= PSR_AA32_E_BIT; } else { insn = "setend le"; - regs->pstate &= ~COMPAT_PSR_E_BIT; + regs->pstate &= ~PSR_AA32_E_BIT; } trace_instruction_emulation(insn, regs->pc); @@ -593,16 +593,16 @@ static struct undef_hook setend_hooks[] = { { .instr_mask = 0xfffffdff, .instr_val = 0xf1010000, - .pstate_mask = COMPAT_PSR_MODE_MASK, - .pstate_val = COMPAT_PSR_MODE_USR, + .pstate_mask = PSR_AA32_MODE_MASK, + .pstate_val = PSR_AA32_MODE_USR, .fn = a32_setend_handler, }, { /* Thumb mode */ .instr_mask = 0x0000fff7, .instr_val = 0x0000b650, - .pstate_mask = (COMPAT_PSR_T_BIT | COMPAT_PSR_MODE_MASK), - .pstate_val = (COMPAT_PSR_T_BIT | COMPAT_PSR_MODE_USR), + .pstate_mask = (PSR_AA32_T_BIT | PSR_AA32_MODE_MASK), + .pstate_val = (PSR_AA32_T_BIT | PSR_AA32_MODE_USR), .fn = t16_setend_handler, }, {} diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 25d5cef00333..4b3dc9fc9c68 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1723,7 +1723,7 @@ static int emulate_mrs(struct pt_regs *regs, u32 insn) static struct undef_hook mrs_hook = { .instr_mask = 0xfff00000, .instr_val = 0xd5300000, - .pstate_mask = COMPAT_PSR_MODE_MASK, + .pstate_mask = PSR_AA32_MODE_MASK, .pstate_val = PSR_MODE_EL0t, .fn = emulate_mrs, }; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index e10bc363f533..740b31f77ade 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -177,16 +177,16 @@ static void print_pstate(struct pt_regs *regs) if (compat_user_mode(regs)) { printk("pstate: %08llx (%c%c%c%c %c %s %s %c%c%c)\n", pstate, - pstate & COMPAT_PSR_N_BIT ? 'N' : 'n', - pstate & COMPAT_PSR_Z_BIT ? 'Z' : 'z', - pstate & COMPAT_PSR_C_BIT ? 'C' : 'c', - pstate & COMPAT_PSR_V_BIT ? 'V' : 'v', - pstate & COMPAT_PSR_Q_BIT ? 'Q' : 'q', - pstate & COMPAT_PSR_T_BIT ? "T32" : "A32", - pstate & COMPAT_PSR_E_BIT ? "BE" : "LE", - pstate & COMPAT_PSR_A_BIT ? 'A' : 'a', - pstate & COMPAT_PSR_I_BIT ? 'I' : 'i', - pstate & COMPAT_PSR_F_BIT ? 'F' : 'f'); + pstate & PSR_AA32_N_BIT ? 'N' : 'n', + pstate & PSR_AA32_Z_BIT ? 'Z' : 'z', + pstate & PSR_AA32_C_BIT ? 'C' : 'c', + pstate & PSR_AA32_V_BIT ? 'V' : 'v', + pstate & PSR_AA32_Q_BIT ? 'Q' : 'q', + pstate & PSR_AA32_T_BIT ? "T32" : "A32", + pstate & PSR_AA32_E_BIT ? "BE" : "LE", + pstate & PSR_AA32_A_BIT ? 'A' : 'a', + pstate & PSR_AA32_I_BIT ? 'I' : 'i', + pstate & PSR_AA32_F_BIT ? 'F' : 'f'); } else { printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO)\n", pstate, diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 2c4b09f84dd0..42ff28aa5a00 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1681,15 +1681,15 @@ static int valid_compat_regs(struct user_pt_regs *regs) if (!system_supports_mixed_endian_el0()) { if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) - regs->pstate |= COMPAT_PSR_E_BIT; + regs->pstate |= PSR_AA32_E_BIT; else - regs->pstate &= ~COMPAT_PSR_E_BIT; + regs->pstate &= ~PSR_AA32_E_BIT; } if (user_mode(regs) && (regs->pstate & PSR_MODE32_BIT) && - (regs->pstate & COMPAT_PSR_A_BIT) == 0 && - (regs->pstate & COMPAT_PSR_I_BIT) == 0 && - (regs->pstate & COMPAT_PSR_F_BIT) == 0) { + (regs->pstate & PSR_AA32_A_BIT) == 0 && + (regs->pstate & PSR_AA32_I_BIT) == 0 && + (regs->pstate & PSR_AA32_F_BIT) == 0) { return 1; } @@ -1697,11 +1697,11 @@ static int valid_compat_regs(struct user_pt_regs *regs) * Force PSR to a valid 32-bit EL0t, preserving the same bits as * arch/arm. */ - regs->pstate &= COMPAT_PSR_N_BIT | COMPAT_PSR_Z_BIT | - COMPAT_PSR_C_BIT | COMPAT_PSR_V_BIT | - COMPAT_PSR_Q_BIT | COMPAT_PSR_IT_MASK | - COMPAT_PSR_GE_MASK | COMPAT_PSR_E_BIT | - COMPAT_PSR_T_BIT; + regs->pstate &= PSR_AA32_N_BIT | PSR_AA32_Z_BIT | + PSR_AA32_C_BIT | PSR_AA32_V_BIT | + PSR_AA32_Q_BIT | PSR_AA32_IT_MASK | + PSR_AA32_GE_MASK | PSR_AA32_E_BIT | + PSR_AA32_T_BIT; regs->pstate |= PSR_MODE32_BIT; return 0; diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 12aec0a66376..fca761be18c8 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -375,22 +375,22 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, { compat_ulong_t handler = ptr_to_compat(ka->sa.sa_handler); compat_ulong_t retcode; - compat_ulong_t spsr = regs->pstate & ~(PSR_f | COMPAT_PSR_E_BIT); + compat_ulong_t spsr = regs->pstate & ~(PSR_f | PSR_AA32_E_BIT); int thumb; /* Check if the handler is written for ARM or Thumb */ thumb = handler & 1; if (thumb) - spsr |= COMPAT_PSR_T_BIT; + spsr |= PSR_AA32_T_BIT; else - spsr &= ~COMPAT_PSR_T_BIT; + spsr &= ~PSR_AA32_T_BIT; /* The IT state must be cleared for both ARM and Thumb-2 */ - spsr &= ~COMPAT_PSR_IT_MASK; + spsr &= ~PSR_AA32_IT_MASK; /* Restore the original endianness */ - spsr |= COMPAT_PSR_ENDSTATE; + spsr |= PSR_AA32_ENDSTATE; if (ka->sa.sa_flags & SA_RESTORER) { retcode = ptr_to_compat(ka->sa.sa_restorer); From 256c0960b7b6453dc90a4e879da52ab76b4037f9 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Jul 2018 15:16:53 +0100 Subject: [PATCH 16/90] kvm/arm: use PSR_AA32 definitions Some code cares about the SPSR_ELx format for exceptions taken from AArch32 to inspect or manipulate the SPSR_ELx value, which is already in the SPSR_ELx format, and not in the AArch32 PSR format. To separate these from cases where we care about the AArch32 PSR format, migrate these cases to use the PSR_AA32_* definitions rather than COMPAT_PSR_*. There should be no functional change as a result of this patch. Note that arm64 KVM does not support a compat KVM API, and always uses the SPSR_ELx format, even for AArch32 guests. Signed-off-by: Mark Rutland Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm/include/asm/kvm_emulate.h | 14 +++++++------- arch/arm64/include/asm/kvm_emulate.h | 10 +++++----- arch/arm64/kvm/guest.c | 14 +++++++------- arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 2 +- arch/arm64/kvm/regmap.c | 22 +++++++++++----------- arch/arm64/kvm/reset.c | 4 ++-- virt/kvm/arm/aarch32.c | 20 ++++++++++---------- 7 files changed, 43 insertions(+), 43 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 6493bd479ddc..fe2fb1ddd771 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -26,13 +26,13 @@ #include /* arm64 compatibility macros */ -#define COMPAT_PSR_MODE_ABT ABT_MODE -#define COMPAT_PSR_MODE_UND UND_MODE -#define COMPAT_PSR_T_BIT PSR_T_BIT -#define COMPAT_PSR_I_BIT PSR_I_BIT -#define COMPAT_PSR_A_BIT PSR_A_BIT -#define COMPAT_PSR_E_BIT PSR_E_BIT -#define COMPAT_PSR_IT_MASK PSR_IT_MASK +#define PSR_AA32_MODE_ABT ABT_MODE +#define PSR_AA32_MODE_UND UND_MODE +#define PSR_AA32_T_BIT PSR_T_BIT +#define PSR_AA32_I_BIT PSR_I_BIT +#define PSR_AA32_A_BIT PSR_A_BIT +#define PSR_AA32_E_BIT PSR_E_BIT +#define PSR_AA32_IT_MASK PSR_IT_MASK unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 1dab3a984608..0c97e45d1dc3 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -140,7 +140,7 @@ static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) { - *vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT; + *vcpu_cpsr(vcpu) |= PSR_AA32_T_BIT; } /* @@ -190,8 +190,8 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) u32 mode; if (vcpu_mode_is_32bit(vcpu)) { - mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; - return mode > COMPAT_PSR_MODE_USR; + mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK; + return mode > PSR_AA32_MODE_USR; } mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK; @@ -329,7 +329,7 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) { - *vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT; + *vcpu_cpsr(vcpu) |= PSR_AA32_E_BIT; } else { u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); sctlr |= (1 << 25); @@ -340,7 +340,7 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) - return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT); + return !!(*vcpu_cpsr(vcpu) & PSR_AA32_E_BIT); return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 56a0260ceb11..cdd4d9d6d575 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -107,14 +107,14 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { - u32 mode = (*(u32 *)valp) & COMPAT_PSR_MODE_MASK; + u32 mode = (*(u32 *)valp) & PSR_AA32_MODE_MASK; switch (mode) { - case COMPAT_PSR_MODE_USR: - case COMPAT_PSR_MODE_FIQ: - case COMPAT_PSR_MODE_IRQ: - case COMPAT_PSR_MODE_SVC: - case COMPAT_PSR_MODE_ABT: - case COMPAT_PSR_MODE_UND: + case PSR_AA32_MODE_USR: + case PSR_AA32_MODE_FIQ: + case PSR_AA32_MODE_IRQ: + case PSR_AA32_MODE_SVC: + case PSR_AA32_MODE_ABT: + case PSR_AA32_MODE_UND: case PSR_MODE_EL0t: case PSR_MODE_EL1t: case PSR_MODE_EL1h: diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 39be799d0417..215c7c0eb3b0 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -27,7 +27,7 @@ static bool __hyp_text __is_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) - return !!(read_sysreg_el2(spsr) & COMPAT_PSR_E_BIT); + return !!(read_sysreg_el2(spsr) & PSR_AA32_E_BIT); return !!(read_sysreg(SCTLR_EL1) & SCTLR_ELx_EE); } diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c index eefe403a2e63..7a5173ea2276 100644 --- a/arch/arm64/kvm/regmap.c +++ b/arch/arm64/kvm/regmap.c @@ -112,22 +112,22 @@ static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = { unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) { unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs; - unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; + unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK; switch (mode) { - case COMPAT_PSR_MODE_USR ... COMPAT_PSR_MODE_SVC: + case PSR_AA32_MODE_USR ... PSR_AA32_MODE_SVC: mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */ break; - case COMPAT_PSR_MODE_ABT: + case PSR_AA32_MODE_ABT: mode = 4; break; - case COMPAT_PSR_MODE_UND: + case PSR_AA32_MODE_UND: mode = 5; break; - case COMPAT_PSR_MODE_SYS: + case PSR_AA32_MODE_SYS: mode = 0; /* SYS maps to USR */ break; @@ -143,13 +143,13 @@ unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) */ static int vcpu_spsr32_mode(const struct kvm_vcpu *vcpu) { - unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; + unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK; switch (mode) { - case COMPAT_PSR_MODE_SVC: return KVM_SPSR_SVC; - case COMPAT_PSR_MODE_ABT: return KVM_SPSR_ABT; - case COMPAT_PSR_MODE_UND: return KVM_SPSR_UND; - case COMPAT_PSR_MODE_IRQ: return KVM_SPSR_IRQ; - case COMPAT_PSR_MODE_FIQ: return KVM_SPSR_FIQ; + case PSR_AA32_MODE_SVC: return KVM_SPSR_SVC; + case PSR_AA32_MODE_ABT: return KVM_SPSR_ABT; + case PSR_AA32_MODE_UND: return KVM_SPSR_UND; + case PSR_AA32_MODE_IRQ: return KVM_SPSR_IRQ; + case PSR_AA32_MODE_FIQ: return KVM_SPSR_FIQ; default: BUG(); } } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index a74311beda35..4e4aedaf7ab7 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -42,8 +42,8 @@ static const struct kvm_regs default_regs_reset = { }; static const struct kvm_regs default_regs_reset32 = { - .regs.pstate = (COMPAT_PSR_MODE_SVC | COMPAT_PSR_A_BIT | - COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT), + .regs.pstate = (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | + PSR_AA32_I_BIT | PSR_AA32_F_BIT), }; static bool cpu_has_32bit_el1(void) diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index efc84cbe8277..5abbe9b3c652 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -108,9 +108,9 @@ static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) { unsigned long itbits, cond; unsigned long cpsr = *vcpu_cpsr(vcpu); - bool is_arm = !(cpsr & COMPAT_PSR_T_BIT); + bool is_arm = !(cpsr & PSR_AA32_T_BIT); - if (is_arm || !(cpsr & COMPAT_PSR_IT_MASK)) + if (is_arm || !(cpsr & PSR_AA32_IT_MASK)) return; cond = (cpsr & 0xe000) >> 13; @@ -123,7 +123,7 @@ static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) else itbits = (itbits << 1) & 0x1f; - cpsr &= ~COMPAT_PSR_IT_MASK; + cpsr &= ~PSR_AA32_IT_MASK; cpsr |= cond << 13; cpsr |= (itbits & 0x1c) << (10 - 2); cpsr |= (itbits & 0x3) << 25; @@ -138,7 +138,7 @@ void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) { bool is_thumb; - is_thumb = !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_T_BIT); + is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT); if (is_thumb && !is_wide_instr) *vcpu_pc(vcpu) += 2; else @@ -164,16 +164,16 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) { unsigned long cpsr; unsigned long new_spsr_value = *vcpu_cpsr(vcpu); - bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT); + bool is_thumb = (new_spsr_value & PSR_AA32_T_BIT); u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - cpsr = mode | COMPAT_PSR_I_BIT; + cpsr = mode | PSR_AA32_I_BIT; if (sctlr & (1 << 30)) - cpsr |= COMPAT_PSR_T_BIT; + cpsr |= PSR_AA32_T_BIT; if (sctlr & (1 << 25)) - cpsr |= COMPAT_PSR_E_BIT; + cpsr |= PSR_AA32_E_BIT; *vcpu_cpsr(vcpu) = cpsr; @@ -192,7 +192,7 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) void kvm_inject_undef32(struct kvm_vcpu *vcpu) { - prepare_fault32(vcpu, COMPAT_PSR_MODE_UND, 4); + prepare_fault32(vcpu, PSR_AA32_MODE_UND, 4); } /* @@ -216,7 +216,7 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, fsr = &vcpu_cp15(vcpu, c5_DFSR); } - prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, vect_offset); + prepare_fault32(vcpu, PSR_AA32_MODE_ABT | PSR_AA32_A_BIT, vect_offset); *far = addr; From 7373fed2f258e640031387616a2275a6fc0d7231 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Jul 2018 15:16:54 +0100 Subject: [PATCH 17/90] arm64: remove unused COMPAT_PSR definitions Now that users have been migrated to PSR_AA32, kill the unused COMPAT_PSR definitions. The only difference we need a definition for is COMPAT_PSR_DIT_BIT, which differs from PSR_AA32_DIT_BIT. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/ptrace.h | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index d55237eb89bb..177b851ca6d9 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -66,35 +66,7 @@ #endif /* AArch32 CPSR bits, as seen in AArch32 */ -#define COMPAT_PSR_MODE_MASK 0x0000001f -#define COMPAT_PSR_MODE_USR 0x00000010 -#define COMPAT_PSR_MODE_FIQ 0x00000011 -#define COMPAT_PSR_MODE_IRQ 0x00000012 -#define COMPAT_PSR_MODE_SVC 0x00000013 -#define COMPAT_PSR_MODE_ABT 0x00000017 -#define COMPAT_PSR_MODE_HYP 0x0000001a -#define COMPAT_PSR_MODE_UND 0x0000001b -#define COMPAT_PSR_MODE_SYS 0x0000001f -#define COMPAT_PSR_T_BIT 0x00000020 -#define COMPAT_PSR_F_BIT 0x00000040 -#define COMPAT_PSR_I_BIT 0x00000080 -#define COMPAT_PSR_A_BIT 0x00000100 -#define COMPAT_PSR_E_BIT 0x00000200 #define COMPAT_PSR_DIT_BIT 0x00200000 -#define COMPAT_PSR_J_BIT 0x01000000 -#define COMPAT_PSR_Q_BIT 0x08000000 -#define COMPAT_PSR_V_BIT 0x10000000 -#define COMPAT_PSR_C_BIT 0x20000000 -#define COMPAT_PSR_Z_BIT 0x40000000 -#define COMPAT_PSR_N_BIT 0x80000000 -#define COMPAT_PSR_IT_MASK 0x0600fc00 /* If-Then execution state mask */ -#define COMPAT_PSR_GE_MASK 0x000f0000 - -#ifdef CONFIG_CPU_BIG_ENDIAN -#define COMPAT_PSR_ENDSTATE COMPAT_PSR_E_BIT -#else -#define COMPAT_PSR_ENDSTATE 0 -#endif /* * These are 'magic' values for PTRACE_PEEKUSR that return info about where a From 3b8c9f1cdfc506e94e992ae66b68bbe416f89610 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 11 Jun 2018 14:22:09 +0100 Subject: [PATCH 18/90] arm64: IPI each CPU after invalidating the I-cache for kernel mappings When invalidating the instruction cache for a kernel mapping via flush_icache_range(), it is also necessary to flush the pipeline for other CPUs so that instructions fetched into the pipeline before the I-cache invalidation are discarded. For example, if module 'foo' is unloaded and then module 'bar' is loaded into the same area of memory, a CPU could end up executing instructions from 'foo' when branching into 'bar' if these instructions were fetched into the pipeline before 'foo' was unloaded. Whilst this is highly unlikely to occur in practice, particularly as any exception acts as a context-synchronizing operation, following the letter of the architecture requires us to execute an ISB on each CPU in order for the new instruction stream to be visible. Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 27 ++++++++++++++++++++++++++- arch/arm64/kernel/cpu_errata.c | 2 +- arch/arm64/kernel/insn.c | 18 ++++-------------- arch/arm64/mm/cache.S | 4 ++-- 4 files changed, 33 insertions(+), 18 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index d264a7274811..19844211a4e6 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -19,6 +19,7 @@ #ifndef __ASM_CACHEFLUSH_H #define __ASM_CACHEFLUSH_H +#include #include /* @@ -71,7 +72,7 @@ * - kaddr - page address * - size - region size */ -extern void flush_icache_range(unsigned long start, unsigned long end); +extern void __flush_icache_range(unsigned long start, unsigned long end); extern int invalidate_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); extern void __inval_dcache_area(void *addr, size_t len); @@ -81,6 +82,30 @@ extern void __clean_dcache_area_pou(void *addr, size_t len); extern long __flush_cache_user_range(unsigned long start, unsigned long end); extern void sync_icache_aliases(void *kaddr, unsigned long len); +static inline void flush_icache_range(unsigned long start, unsigned long end) +{ + __flush_icache_range(start, end); + + /* + * IPI all online CPUs so that they undergo a context synchronization + * event and are forced to refetch the new instructions. + */ +#ifdef CONFIG_KGDB + /* + * KGDB performs cache maintenance with interrupts disabled, so we + * will deadlock trying to IPI the secondary CPUs. In theory, we can + * set CACHE_FLUSH_IS_SAFE to 0 to avoid this known issue, but that + * just means that KGDB will elide the maintenance altogether! As it + * turns out, KGDB uses IPIs to round-up the secondary CPUs during + * the patching operation, so we don't need extra IPIs here anyway. + * In which case, add a KGDB-specific bodge and return early. + */ + if (kgdb_connected && irqs_disabled()) + return; +#endif + kick_all_cpus_sync(); +} + static inline void flush_cache_mm(struct mm_struct *mm) { } diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 5d59ff9a8da9..459129712dfa 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -107,7 +107,7 @@ static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, for (i = 0; i < SZ_2K; i += 0x80) memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start); - flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); + __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); } static void __install_bp_hardening_cb(bp_hardening_cb_t fn, diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 816d03c4c913..0f6a2e0cfde0 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -216,8 +216,8 @@ int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) ret = aarch64_insn_write(tp, insn); if (ret == 0) - flush_icache_range((uintptr_t)tp, - (uintptr_t)tp + AARCH64_INSN_SIZE); + __flush_icache_range((uintptr_t)tp, + (uintptr_t)tp + AARCH64_INSN_SIZE); return ret; } @@ -283,18 +283,8 @@ int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt) if (ret) return ret; - if (aarch64_insn_hotpatch_safe(insn, insns[0])) { - /* - * ARMv8 architecture doesn't guarantee all CPUs see - * the new instruction after returning from function - * aarch64_insn_patch_text_nosync(). So send IPIs to - * all other CPUs to achieve instruction - * synchronization. - */ - ret = aarch64_insn_patch_text_nosync(addrs[0], insns[0]); - kick_all_cpus_sync(); - return ret; - } + if (aarch64_insn_hotpatch_safe(insn, insns[0])) + return aarch64_insn_patch_text_nosync(addrs[0], insns[0]); } return aarch64_insn_patch_text_sync(addrs, insns, cnt); diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 30334d81b021..0c22ede52f90 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -35,7 +35,7 @@ * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(flush_icache_range) +ENTRY(__flush_icache_range) /* FALLTHROUGH */ /* @@ -77,7 +77,7 @@ alternative_else_nop_endif 9: mov x0, #-EFAULT b 1b -ENDPROC(flush_icache_range) +ENDPROC(__flush_icache_range) ENDPROC(__flush_cache_user_range) /* From 693350a7998018391852c48f68956cf0f855b2b9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 19 Jun 2018 17:55:28 +0100 Subject: [PATCH 19/90] arm64: insn: Don't fallback on nosync path for general insn patching Patching kernel instructions at runtime requires other CPUs to undergo a context synchronisation event via an explicit ISB or an IPI in order to ensure that the new instructions are visible. This is required even for "hotpatch" instructions such as NOP and BL, so avoid optimising in this case and always go via stop_machine() when performing general patching. ftrace isn't quite as strict, so it can continue to call the nosync code directly. Signed-off-by: Will Deacon --- arch/arm64/include/asm/insn.h | 2 -- arch/arm64/kernel/insn.c | 56 +---------------------------------- 2 files changed, 1 insertion(+), 57 deletions(-) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index f62c56b1793f..c6802dea6cab 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -446,8 +446,6 @@ u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base, s32 aarch64_get_branch_offset(u32 insn); u32 aarch64_set_branch_offset(u32 insn, s32 offset); -bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn); - int aarch64_insn_patch_text_nosync(void *addr, u32 insn); int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt); diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 0f6a2e0cfde0..2b3413549734 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -149,20 +149,6 @@ int __kprobes aarch64_insn_write(void *addr, u32 insn) return __aarch64_insn_write(addr, cpu_to_le32(insn)); } -static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) -{ - if (aarch64_get_insn_class(insn) != AARCH64_INSN_CLS_BR_SYS) - return false; - - return aarch64_insn_is_b(insn) || - aarch64_insn_is_bl(insn) || - aarch64_insn_is_svc(insn) || - aarch64_insn_is_hvc(insn) || - aarch64_insn_is_smc(insn) || - aarch64_insn_is_brk(insn) || - aarch64_insn_is_nop(insn); -} - bool __kprobes aarch64_insn_uses_literal(u32 insn) { /* ldr/ldrsw (literal), prfm */ @@ -189,22 +175,6 @@ bool __kprobes aarch64_insn_is_branch(u32 insn) aarch64_insn_is_bcond(insn); } -/* - * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a - * Section B2.6.5 "Concurrent modification and execution of instructions": - * Concurrent modification and execution of instructions can lead to the - * resulting instruction performing any behavior that can be achieved by - * executing any sequence of instructions that can be executed from the - * same Exception level, except where the instruction before modification - * and the instruction after modification is a B, BL, NOP, BKPT, SVC, HVC, - * or SMC instruction. - */ -bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn) -{ - return __aarch64_insn_hotpatch_safe(old_insn) && - __aarch64_insn_hotpatch_safe(new_insn); -} - int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) { u32 *tp = addr; @@ -239,11 +209,6 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg) for (i = 0; ret == 0 && i < pp->insn_cnt; i++) ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], pp->new_insns[i]); - /* - * aarch64_insn_patch_text_nosync() calls flush_icache_range(), - * which ends with "dsb; isb" pair guaranteeing global - * visibility. - */ /* Notify other processors with an additional increment. */ atomic_inc(&pp->cpu_count); } else { @@ -255,8 +220,7 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg) return ret; } -static -int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt) +int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt) { struct aarch64_insn_patch patch = { .text_addrs = addrs, @@ -272,24 +236,6 @@ int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt) cpu_online_mask); } -int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt) -{ - int ret; - u32 insn; - - /* Unsafe to patch multiple instructions without synchronizaiton */ - if (cnt == 1) { - ret = aarch64_insn_read(addrs[0], &insn); - if (ret) - return ret; - - if (aarch64_insn_hotpatch_safe(insn, insns[0])) - return aarch64_insn_patch_text_nosync(addrs[0], insns[0]); - } - - return aarch64_insn_patch_text_sync(addrs, insns, cnt); -} - static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type, u32 *maskp, int *shiftp) { From 05f2d2f83b5a02a15b6538017f29ee53a73088fb Mon Sep 17 00:00:00 2001 From: Chintan Pandya Date: Wed, 6 Jun 2018 12:31:20 +0530 Subject: [PATCH 20/90] arm64: tlbflush: Introduce __flush_tlb_kernel_pgtable Add an interface to invalidate intermediate page tables from TLB for kernel. Acked-by: Will Deacon Signed-off-by: Chintan Pandya Signed-off-by: Will Deacon --- arch/arm64/include/asm/tlbflush.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index dfc61d73f740..a4a1901140ee 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -218,6 +218,13 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm, dsb(ish); } +static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) +{ + unsigned long addr = __TLBI_VADDR(kaddr, 0); + + __tlbi(vaae1is, addr); + dsb(ish); +} #endif #endif From ec28bb9c9b0826d7bd36f44cccfa5295c291cadd Mon Sep 17 00:00:00 2001 From: Chintan Pandya Date: Wed, 6 Jun 2018 12:31:21 +0530 Subject: [PATCH 21/90] arm64: Implement page table free interfaces arm64 requires break-before-make. Originally, before setting up new pmd/pud entry for huge mapping, in few cases, the modifying pmd/pud entry was still valid and pointing to next level page table as we only clear off leaf PTE in unmap leg. a) This was resulting into stale entry in TLBs (as few TLBs also cache intermediate mapping for performance reasons) b) Also, modifying pmd/pud was the only reference to next level page table and it was getting lost without freeing it. So, page leaks were happening. Implement pud_free_pmd_page() and pmd_free_pte_page() to enforce BBM and also free the leaking page tables. Implementation requires, 1) Clearing off the current pud/pmd entry 2) Invalidation of TLB 3) Freeing of the un-used next level page tables Reviewed-by: Will Deacon Signed-off-by: Chintan Pandya Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 48 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 8ae5d7ae4af3..65f86271f02b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -45,6 +45,7 @@ #include #include #include +#include #define NO_BLOCK_MAPPINGS BIT(0) #define NO_CONT_MAPPINGS BIT(1) @@ -977,12 +978,51 @@ int pmd_clear_huge(pmd_t *pmdp) return 1; } -int pud_free_pmd_page(pud_t *pud, unsigned long addr) +int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr) { - return pud_none(*pud); + pte_t *table; + pmd_t pmd; + + pmd = READ_ONCE(*pmdp); + + /* No-op for empty entry and WARN_ON for valid entry */ + if (!pmd_present(pmd) || !pmd_table(pmd)) { + VM_WARN_ON(!pmd_table(pmd)); + return 1; + } + + table = pte_offset_kernel(pmdp, addr); + pmd_clear(pmdp); + __flush_tlb_kernel_pgtable(addr); + pte_free_kernel(NULL, table); + return 1; } -int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) +int pud_free_pmd_page(pud_t *pudp, unsigned long addr) { - return pmd_none(*pmd); + pmd_t *table; + pmd_t *pmdp; + pud_t pud; + unsigned long next, end; + + pud = READ_ONCE(*pudp); + + /* No-op for empty entry and WARN_ON for valid entry */ + if (!pud_present(pud) || !pud_table(pud)) { + VM_WARN_ON(!pud_table(pud)); + return 1; + } + + table = pmd_offset(pudp, addr); + pmdp = table; + next = addr; + end = addr + PUD_SIZE; + do { + pmd_free_pte_page(pmdp, next); + } while (pmdp++, next += PMD_SIZE, next != end); + + pud_clear(pudp); + __flush_tlb_kernel_pgtable(addr); + pmd_free(NULL, table); + return 1; } From 178909a669584b97e9154375df2126fd93b6306c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2018 09:57:45 +0100 Subject: [PATCH 22/90] arm64: errata: Don't define type field twice for arm64_errata[] entries The ERRATA_MIDR_REV_RANGE macro assigns ARM64_CPUCAP_LOCAL_CPU_ERRATUM to the '.type' field of the 'struct arm64_cpu_capabilities', so there's no need to assign it explicitly as well. Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 459129712dfa..94876ab071c7 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -662,7 +662,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .cpu_enable = enable_smccc_arch_workaround_1, ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus), }, @@ -671,7 +670,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "EL2 vector hardening", .capability = ARM64_HARDEN_EL2_VECTORS, - .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors), }, #endif From 31b46035576d405ca9a98be95945986c91986368 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 6 Jul 2018 12:02:42 +0100 Subject: [PATCH 23/90] arm64: topology: refactor reset_cpu_topology to add support for removing topology Currently reset_cpu_topology clears all the CPU topology information and resets to default values. However we may need to just clear the information when we hotplug out the CPU. In preparation to add the support the same, let's refactor reset_cpu_topology to just reset the information and move clearing out the topology information to clear_cpu_topology. Cc: Catalin Marinas Cc: Will Deacon Tested-by: Ganapatrao Kulkarni Tested-by: Hanjun Guo Signed-off-by: Sudeep Holla Signed-off-by: Will Deacon --- arch/arm64/kernel/topology.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index f845a8617812..b64733c5ea28 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -293,6 +293,19 @@ topology_populated: update_siblings_masks(cpuid); } +static void clear_cpu_topology(int cpu) +{ + struct cpu_topology *cpu_topo = &cpu_topology[cpu]; + + cpumask_clear(&cpu_topo->llc_siblings); + cpumask_set_cpu(cpu, &cpu_topo->llc_siblings); + + cpumask_clear(&cpu_topo->core_sibling); + cpumask_set_cpu(cpu, &cpu_topo->core_sibling); + cpumask_clear(&cpu_topo->thread_sibling); + cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); +} + static void __init reset_cpu_topology(void) { unsigned int cpu; @@ -303,15 +316,9 @@ static void __init reset_cpu_topology(void) cpu_topo->thread_id = -1; cpu_topo->core_id = 0; cpu_topo->package_id = -1; - cpu_topo->llc_id = -1; - cpumask_clear(&cpu_topo->llc_siblings); - cpumask_set_cpu(cpu, &cpu_topo->llc_siblings); - cpumask_clear(&cpu_topo->core_sibling); - cpumask_set_cpu(cpu, &cpu_topo->core_sibling); - cpumask_clear(&cpu_topo->thread_sibling); - cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); + clear_cpu_topology(cpu); } } From 97fd6016a7b3df00901b4cfdd883eac01e89fa8a Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 6 Jul 2018 12:02:43 +0100 Subject: [PATCH 24/90] arm64: numa: separate out updates to percpu nodeid and NUMA node cpumap Currently numa_clear_node removes both cpu information from the NUMA node cpumap as well as the NUMA node id from the cpu. Similarly numa_store_cpu_info updates both percpu nodeid and NUMA cpumap. However we need to retain the numa node id for the cpu and only remove the cpu information from the numa node cpumap during CPU hotplug out. The same can be extended for hotplugging in the CPU. This patch separates out numa_{add,remove}_cpu from numa_clear_node and numa_store_cpu_info. Cc: Catalin Marinas Cc: Will Deacon Reviewed-by: Ganapatrao Kulkarni Tested-by: Ganapatrao Kulkarni Tested-by: Hanjun Guo Signed-off-by: Sudeep Holla Signed-off-by: Will Deacon --- arch/arm64/include/asm/numa.h | 4 ++++ arch/arm64/kernel/smp.c | 2 ++ arch/arm64/mm/numa.c | 29 +++++++++++++++++++++-------- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h index 01bc46d5b43a..626ad01e83bf 100644 --- a/arch/arm64/include/asm/numa.h +++ b/arch/arm64/include/asm/numa.h @@ -35,10 +35,14 @@ void __init numa_set_distance(int from, int to, int distance); void __init numa_free_distance(void); void __init early_map_cpu_to_node(unsigned int cpu, int nid); void numa_store_cpu_info(unsigned int cpu); +void numa_add_cpu(unsigned int cpu); +void numa_remove_cpu(unsigned int cpu); #else /* CONFIG_NUMA */ static inline void numa_store_cpu_info(unsigned int cpu) { } +static inline void numa_add_cpu(unsigned int cpu) { } +static inline void numa_remove_cpu(unsigned int cpu) { } static inline void arm64_numa_init(void) { } static inline void early_map_cpu_to_node(unsigned int cpu, int nid) { } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 2faa9863d2e5..2a315f32fad3 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -225,6 +225,7 @@ asmlinkage notrace void secondary_start_kernel(void) notify_cpu_starting(cpu); store_cpu_topology(cpu); + numa_add_cpu(cpu); /* * OK, now it's safe to let the boot CPU continue. Wait for @@ -679,6 +680,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) this_cpu = smp_processor_id(); store_cpu_topology(this_cpu); numa_store_cpu_info(this_cpu); + numa_add_cpu(this_cpu); /* * If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index dad128ba98bf..146c04ceaa51 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -70,19 +70,32 @@ EXPORT_SYMBOL(cpumask_of_node); #endif -static void map_cpu_to_node(unsigned int cpu, int nid) +static void numa_update_cpu(unsigned int cpu, bool remove) { - set_cpu_numa_node(cpu, nid); - if (nid >= 0) + int nid = cpu_to_node(cpu); + + if (nid == NUMA_NO_NODE) + return; + + if (remove) + cpumask_clear_cpu(cpu, node_to_cpumask_map[nid]); + else cpumask_set_cpu(cpu, node_to_cpumask_map[nid]); } +void numa_add_cpu(unsigned int cpu) +{ + numa_update_cpu(cpu, false); +} + +void numa_remove_cpu(unsigned int cpu) +{ + numa_update_cpu(cpu, true); +} + void numa_clear_node(unsigned int cpu) { - int nid = cpu_to_node(cpu); - - if (nid >= 0) - cpumask_clear_cpu(cpu, node_to_cpumask_map[nid]); + numa_remove_cpu(cpu); set_cpu_numa_node(cpu, NUMA_NO_NODE); } @@ -116,7 +129,7 @@ static void __init setup_node_to_cpumask_map(void) */ void numa_store_cpu_info(unsigned int cpu) { - map_cpu_to_node(cpu, cpu_to_node_map[cpu]); + set_cpu_numa_node(cpu, cpu_to_node_map[cpu]); } void __init early_map_cpu_to_node(unsigned int cpu, int nid) From 5bdd2b3f0f8830f281bb568e65df6eabf655dd0d Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 6 Jul 2018 12:02:44 +0100 Subject: [PATCH 25/90] arm64: topology: add support to remove cpu topology sibling masks This patch adds support to remove all the CPU topology information using clear_cpu_topology and also resetting the sibling information on other sibling CPUs. This will be used in cpu_disable so that all the topology sibling information is removed on CPU hotplug out. Cc: Catalin Marinas Cc: Will Deacon Tested-by: Ganapatrao Kulkarni Tested-by: Hanjun Guo Signed-off-by: Sudeep Holla Signed-off-by: Will Deacon --- arch/arm64/include/asm/topology.h | 2 ++ arch/arm64/kernel/topology.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index df48212f767b..4c073d28f9e4 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -20,9 +20,11 @@ extern struct cpu_topology cpu_topology[NR_CPUS]; #define topology_core_id(cpu) (cpu_topology[cpu].core_id) #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) #define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) +#define topology_llc_cpumask(cpu) (&cpu_topology[cpu].llc_siblings) void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); +void remove_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); #ifdef CONFIG_NUMA diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index b64733c5ea28..cb81df545359 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -322,6 +322,20 @@ static void __init reset_cpu_topology(void) } } +void remove_cpu_topology(unsigned int cpu) +{ + int sibling; + + for_each_cpu(sibling, topology_core_cpumask(cpu)) + cpumask_clear_cpu(cpu, topology_core_cpumask(sibling)); + for_each_cpu(sibling, topology_sibling_cpumask(cpu)) + cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); + for_each_cpu(sibling, topology_llc_cpumask(cpu)) + cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling)); + + clear_cpu_topology(cpu); +} + #ifdef CONFIG_ACPI /* * Propagate the topology information of the processor_topology_node tree to the From 5ec8b59172b4e8d26a12952ce3a2914b30128538 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 6 Jul 2018 12:02:45 +0100 Subject: [PATCH 26/90] arm64: topology: restrict updating siblings_masks to online cpus only It's incorrect to iterate over all the possible CPUs to update the sibling masks when any CPU is hotplugged in. In case the topology siblings masks of the CPU is removed when is it hotplugged out, we end up updating those masks when one of it's sibling is powered up again. This will provide inconsistent view. Further, since the CPU calling update_sibling_masks is yet to be set online, there's no need to compare itself with each online CPU when updating the siblings masks. This patch restricts updation of sibling masks only for CPUs that are already online. It also the drops the unnecessary cpuid check. Cc: Catalin Marinas Cc: Will Deacon Tested-by: Ganapatrao Kulkarni Tested-by: Hanjun Guo Signed-off-by: Sudeep Holla Signed-off-by: Will Deacon --- arch/arm64/kernel/topology.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index cb81df545359..1a3f8ab455d0 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -231,7 +231,7 @@ static void update_siblings_masks(unsigned int cpuid) int cpu; /* update core and thread sibling masks */ - for_each_possible_cpu(cpu) { + for_each_online_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; if (cpuid_topo->llc_id == cpu_topo->llc_id) { @@ -243,15 +243,13 @@ static void update_siblings_masks(unsigned int cpuid) continue; cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); - if (cpu != cpuid) - cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); if (cpuid_topo->core_id != cpu_topo->core_id) continue; cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); - if (cpu != cpuid) - cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); } } From 7f9545aa1a91a9a4dc8c3e1476dbbfa98dd38b81 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 6 Jul 2018 12:02:46 +0100 Subject: [PATCH 27/90] arm64: smp: remove cpu and numa topology information when hotplugging out CPU We already repopulate the information on CPU hotplug-in, so we can safely remove the CPU topology and NUMA cpumap information during CPU hotplug out operation. This will help to provide the correct cpumask for scheduler domains. Cc: Catalin Marinas Cc: Will Deacon Tested-by: Ganapatrao Kulkarni Tested-by: Hanjun Guo Signed-off-by: Sudeep Holla Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 2a315f32fad3..a44cc09059b5 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -279,6 +279,9 @@ int __cpu_disable(void) if (ret) return ret; + remove_cpu_topology(cpu); + numa_remove_cpu(cpu); + /* * Take this CPU offline. Once we clear this, we can't return, * and we must not schedule until we're ready to give up the cpu. From f70ff1271378b12570aa3c4ebe2ad07f35a4ec51 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 6 Jul 2018 12:02:47 +0100 Subject: [PATCH 28/90] arm64: topology: rename llc_siblings to align with other struct members Similar to core_sibling and thread_sibling, it's better to align and rename llc_siblings to llc_sibling. Cc: Catalin Marinas Cc: Will Deacon Tested-by: Ganapatrao Kulkarni Tested-by: Hanjun Guo Signed-off-by: Sudeep Holla Signed-off-by: Will Deacon --- arch/arm64/include/asm/topology.h | 4 ++-- arch/arm64/kernel/topology.c | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 4c073d28f9e4..49a0fee4f89b 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -11,7 +11,7 @@ struct cpu_topology { int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; - cpumask_t llc_siblings; + cpumask_t llc_sibling; }; extern struct cpu_topology cpu_topology[NR_CPUS]; @@ -20,7 +20,7 @@ extern struct cpu_topology cpu_topology[NR_CPUS]; #define topology_core_id(cpu) (cpu_topology[cpu].core_id) #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) #define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) -#define topology_llc_cpumask(cpu) (&cpu_topology[cpu].llc_siblings) +#define topology_llc_cpumask(cpu) (&cpu_topology[cpu].llc_sibling) void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 1a3f8ab455d0..03b0ed2480aa 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -218,8 +218,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu) const cpumask_t *core_mask = &cpu_topology[cpu].core_sibling; if (cpu_topology[cpu].llc_id != -1) { - if (cpumask_subset(&cpu_topology[cpu].llc_siblings, core_mask)) - core_mask = &cpu_topology[cpu].llc_siblings; + if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask)) + core_mask = &cpu_topology[cpu].llc_sibling; } return core_mask; @@ -235,8 +235,8 @@ static void update_siblings_masks(unsigned int cpuid) cpu_topo = &cpu_topology[cpu]; if (cpuid_topo->llc_id == cpu_topo->llc_id) { - cpumask_set_cpu(cpu, &cpuid_topo->llc_siblings); - cpumask_set_cpu(cpuid, &cpu_topo->llc_siblings); + cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); + cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); } if (cpuid_topo->package_id != cpu_topo->package_id) @@ -295,8 +295,8 @@ static void clear_cpu_topology(int cpu) { struct cpu_topology *cpu_topo = &cpu_topology[cpu]; - cpumask_clear(&cpu_topo->llc_siblings); - cpumask_set_cpu(cpu, &cpu_topo->llc_siblings); + cpumask_clear(&cpu_topo->llc_sibling); + cpumask_set_cpu(cpu, &cpu_topo->llc_sibling); cpumask_clear(&cpu_topo->core_sibling); cpumask_set_cpu(cpu, &cpu_topo->core_sibling); From e67ecf647020d31342d37e73c3766ae02434ed24 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 6 Jul 2018 12:02:48 +0100 Subject: [PATCH 29/90] arm64: topology: re-introduce numa mask check for scheduler MC selection Commit 37c3ec2d810f ("arm64: topology: divorce MC scheduling domain from core_siblings") selected the smallest of LLC, socket siblings, and NUMA node siblings to ensure that the sched domain we build for the MC layer isn't larger than the DIE above it or it's shrunk to the socket or NUMA node if LLC exist acrosis NUMA node/chiplets. Commit acd32e52e4e0 ("arm64: topology: Avoid checking numa mask for scheduler MC selection") reverted the NUMA siblings checks since the CPU topology masks weren't updated on hotplug at that time. This patch re-introduces numa mask check as the CPU and NUMA topology is now updated in hotplug paths. Effectively, this patch does the partial revert of commit acd32e52e4e0. Cc: Catalin Marinas Cc: Will Deacon Tested-by: Ganapatrao Kulkarni Tested-by: Hanjun Guo Signed-off-by: Sudeep Holla Signed-off-by: Will Deacon --- arch/arm64/kernel/topology.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 03b0ed2480aa..0825c4a856e3 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -215,8 +215,13 @@ EXPORT_SYMBOL_GPL(cpu_topology); const struct cpumask *cpu_coregroup_mask(int cpu) { - const cpumask_t *core_mask = &cpu_topology[cpu].core_sibling; + const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu)); + /* Find the smaller of NUMA, core or LLC siblings */ + if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) { + /* not numa in package, lets use the package siblings */ + core_mask = &cpu_topology[cpu].core_sibling; + } if (cpu_topology[cpu].llc_id != -1) { if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask)) core_mask = &cpu_topology[cpu].llc_sibling; From bedbeec65c6cdec25aab609d557b630c97f81866 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2018 16:21:17 +0100 Subject: [PATCH 30/90] arm64: mm: Export __flush_icache_range() to modules lkdtm calls flush_icache_range(), which results in an out-of-line call to __flush_icache_range(), which is not exported to modules. Export the symbol to modules to fix this build breakage. Fixes: 3b8c9f1cdfc5 ("arm64: IPI each CPU after invalidating the I-cache for kernel mappings") Signed-off-by: Will Deacon --- arch/arm64/mm/flush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 1059884f9a6f..9786f9d5d3dc 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -82,7 +82,7 @@ EXPORT_SYMBOL(flush_dcache_page); /* * Additional functions defined in assembly. */ -EXPORT_SYMBOL(flush_icache_range); +EXPORT_SYMBOL(__flush_icache_range); #ifdef CONFIG_ARCH_HAS_PMEM_API void arch_wb_cache_pmem(void *addr, size_t size) From d7c7118caf99fc4fe3b2e1f47e3d23b2bbaf7929 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 5 Jul 2018 17:18:05 +0100 Subject: [PATCH 31/90] MAINTAINERS: arm64: Remove boot/dts/ directory from arm64 entry The arm-soc tree does a good job handling .dts files, so exclude them from the ARM64 entry in MAINTAINERS. Cc: Catalin Marinas Acked-by: Olof Johansson Signed-off-by: Will Deacon --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 07d1576fc766..61301440e66f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2264,6 +2264,7 @@ L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) T: git git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git S: Maintained F: arch/arm64/ +X: arch/arm64/boot/dts/ F: Documentation/arm64/ AS3645A LED FLASH CONTROLLER DRIVER From e7d4bac428edb806c5f5d903ab8850ffbe0ed9ce Mon Sep 17 00:00:00 2001 From: Nikunj Kela Date: Fri, 6 Jul 2018 10:47:24 -0700 Subject: [PATCH 32/90] arm64: add ARM64-specific support for flatmem Flatmem is useful in reducing kernel memory usage. One usecase is in kdump kernel. We are able to save ~14M by moving to flatmem scheme. Cc: xe-kernel@external.cisco.com Cc: Nikunj Kela Signed-off-by: Nikunj Kela Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 476de9b1d239..c548b3dd48ab 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -786,6 +786,9 @@ config ARCH_SPARSEMEM_DEFAULT config ARCH_SELECT_MEMORY_MODEL def_bool ARCH_SPARSEMEM_ENABLE +config ARCH_FLATMEM_ENABLE + def_bool y + config HAVE_ARCH_PFN_VALID def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM From 64b2f025715a68bed49fb14588c2d893dfbd00a8 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sat, 7 Jul 2018 23:16:56 +0200 Subject: [PATCH 33/90] arm: perf: prevent unbind/bind via sysfs Unbinding and rebinding the ARM PMU driver via sysfs leads to a warning followed by more errors: WARNING: CPU: 0 PID: 217 at kernel/irq/chip.c:1034 irq_modify_status+0x150/0x16c .. genirq: Flags mismatch irq 19. 00010c04 (arm-pmu) vs. 00010c04 (arm-pmu) hw perfevents: unable to request IRQ19 for ARM PMU counters hw perfevents: /pmu: failed to register PMU devices! armv7-pmu: probe of pmu failed with error -16 The driver is clearly not designed to be removed. Disable bind/ unbind for this driver. Signed-off-by: Stefan Agner Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_v7.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 57f01e059f39..5a5116794440 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -2030,6 +2030,7 @@ static struct platform_driver armv7_pmu_driver = { .driver = { .name = "armv7-pmu", .of_match_table = armv7_pmu_of_device_ids, + .suppress_bind_attrs = true, }, .probe = armv7_pmu_device_probe, }; From e189624916961c735c18e3c75acc478661403830 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 25 Jun 2018 14:05:52 +0100 Subject: [PATCH 34/90] arm64: numa: rework ACPI NUMA initialization Current ACPI ARM64 NUMA initialization code in acpi_numa_gicc_affinity_init() carries out NUMA nodes creation and cpu<->node mappings at the same time in the arch backend so that a single SRAT walk is needed to parse both pieces of information. This implies that the cpu<->node mappings must be stashed in an array (sized NR_CPUS) so that SMP code can later use the stashed values to avoid another SRAT table walk to set-up the early cpu<->node mappings. If the kernel is configured with a NR_CPUS value less than the actual processor entries in the SRAT (and MADT), the logic in acpi_numa_gicc_affinity_init() is broken in that the cpu<->node mapping is only carried out (and stashed for future use) only for a number of SRAT entries up to NR_CPUS, which do not necessarily correspond to the possible cpus detected at SMP initialization in acpi_map_gic_cpu_interface() (ie MADT and SRAT processor entries order is not enforced), which leaves the kernel with broken cpu<->node mappings. Furthermore, given the current ACPI NUMA code parsing logic in acpi_numa_gicc_affinity_init(), PXM domains for CPUs that are not parsed because they exceed NR_CPUS entries are not mapped to NUMA nodes (ie the PXM corresponding node is not created in the kernel) leaving the system with a broken NUMA topology. Rework the ACPI ARM64 NUMA initialization process so that the NUMA nodes creation and cpu<->node mappings are decoupled. cpu<->node mappings are moved to SMP initialization code (where they are needed), at the cost of an extra SRAT walk so that ACPI NUMA mappings can be batched before being applied, fixing current parsing pitfalls. Acked-by: Hanjun Guo Tested-by: John Garry Fixes: d8b47fca8c23 ("arm64, ACPI, NUMA: NUMA support based on SRAT and SLIT") Link: http://lkml.kernel.org/r/1527768879-88161-2-git-send-email-xiexiuqi@huawei.com Reported-by: Xie XiuQi Signed-off-by: Lorenzo Pieralisi Cc: Punit Agrawal Cc: Jonathan Cameron Cc: Will Deacon Cc: Hanjun Guo Cc: Ganapatrao Kulkarni Cc: Jeremy Linton Cc: Catalin Marinas Cc: Xie XiuQi Signed-off-by: Will Deacon --- arch/arm64/include/asm/acpi.h | 6 ++- arch/arm64/kernel/acpi_numa.c | 92 +++++++++++++++++++++-------------- arch/arm64/kernel/smp.c | 39 ++++++++++----- 3 files changed, 87 insertions(+), 50 deletions(-) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 0db62a4cbce2..d1c290f29b6f 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -134,10 +134,12 @@ pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr); #ifdef CONFIG_ACPI_NUMA int arm64_acpi_numa_init(void); -int acpi_numa_get_nid(unsigned int cpu, u64 hwid); +int acpi_numa_get_nid(unsigned int cpu); +void acpi_map_cpus_to_nodes(void); #else static inline int arm64_acpi_numa_init(void) { return -ENOSYS; } -static inline int acpi_numa_get_nid(unsigned int cpu, u64 hwid) { return NUMA_NO_NODE; } +static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; } +static inline void acpi_map_cpus_to_nodes(void) { } #endif /* CONFIG_ACPI_NUMA */ #define ACPI_TABLE_UPGRADE_MAX_PHYS MEMBLOCK_ALLOC_ACCESSIBLE diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index d190a7b231bf..4f4f1815e047 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -26,36 +26,73 @@ #include #include -#include #include -static int cpus_in_srat; +static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE }; -struct __node_cpu_hwid { - u32 node_id; /* logical node containing this CPU */ - u64 cpu_hwid; /* MPIDR for this CPU */ -}; - -static struct __node_cpu_hwid early_node_cpu_hwid[NR_CPUS] = { -[0 ... NR_CPUS - 1] = {NUMA_NO_NODE, PHYS_CPUID_INVALID} }; - -int acpi_numa_get_nid(unsigned int cpu, u64 hwid) +int __init acpi_numa_get_nid(unsigned int cpu) { - int i; + return acpi_early_node_map[cpu]; +} - for (i = 0; i < cpus_in_srat; i++) { - if (hwid == early_node_cpu_hwid[i].cpu_hwid) - return early_node_cpu_hwid[i].node_id; - } +static inline int get_cpu_for_acpi_id(u32 uid) +{ + int cpu; - return NUMA_NO_NODE; + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (uid == get_acpi_id_for_cpu(cpu)) + return cpu; + + return -EINVAL; +} + +static int __init acpi_parse_gicc_pxm(struct acpi_subtable_header *header, + const unsigned long end) +{ + struct acpi_srat_gicc_affinity *pa; + int cpu, pxm, node; + + if (srat_disabled()) + return -EINVAL; + + pa = (struct acpi_srat_gicc_affinity *)header; + if (!pa) + return -EINVAL; + + if (!(pa->flags & ACPI_SRAT_GICC_ENABLED)) + return 0; + + pxm = pa->proximity_domain; + node = pxm_to_node(pxm); + + /* + * If we can't map the UID to a logical cpu this + * means that the UID is not part of possible cpus + * so we do not need a NUMA mapping for it, skip + * the SRAT entry and keep parsing. + */ + cpu = get_cpu_for_acpi_id(pa->acpi_processor_uid); + if (cpu < 0) + return 0; + + acpi_early_node_map[cpu] = node; + pr_info("SRAT: PXM %d -> MPIDR 0x%llx -> Node %d\n", pxm, + cpu_logical_map(cpu), node); + + return 0; +} + +void __init acpi_map_cpus_to_nodes(void) +{ + acpi_table_parse_entries(ACPI_SIG_SRAT, sizeof(struct acpi_table_srat), + ACPI_SRAT_TYPE_GICC_AFFINITY, + acpi_parse_gicc_pxm, 0); } /* Callback for Proximity Domain -> ACPI processor UID mapping */ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { int pxm, node; - phys_cpuid_t mpidr; if (srat_disabled()) return; @@ -70,12 +107,6 @@ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) if (!(pa->flags & ACPI_SRAT_GICC_ENABLED)) return; - if (cpus_in_srat >= NR_CPUS) { - pr_warn_once("SRAT: cpu_to_node_map[%d] is too small, may not be able to use all cpus\n", - NR_CPUS); - return; - } - pxm = pa->proximity_domain; node = acpi_map_pxm_to_node(pxm); @@ -85,20 +116,7 @@ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) return; } - mpidr = acpi_map_madt_entry(pa->acpi_processor_uid); - if (mpidr == PHYS_CPUID_INVALID) { - pr_err("SRAT: PXM %d with ACPI ID %d has no valid MPIDR in MADT\n", - pxm, pa->acpi_processor_uid); - bad_srat(); - return; - } - - early_node_cpu_hwid[cpus_in_srat].node_id = node; - early_node_cpu_hwid[cpus_in_srat].cpu_hwid = mpidr; node_set(node, numa_nodes_parsed); - cpus_in_srat++; - pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d\n", - pxm, mpidr, node); } int __init arm64_acpi_numa_init(void) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index a44cc09059b5..25fcd22a4bb2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -522,7 +522,6 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) } bootcpu_valid = true; cpu_madt_gicc[0] = *processor; - early_map_cpu_to_node(0, acpi_numa_get_nid(0, hwid)); return; } @@ -545,8 +544,6 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) */ acpi_set_mailbox_entry(cpu_count, processor); - early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count, hwid)); - cpu_count++; } @@ -566,8 +563,34 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header, return 0; } + +static void __init acpi_parse_and_init_cpus(void) +{ + int i; + + /* + * do a walk of MADT to determine how many CPUs + * we have including disabled CPUs, and get information + * we need for SMP init. + */ + acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, + acpi_parse_gic_cpu_interface, 0); + + /* + * In ACPI, SMP and CPU NUMA information is provided in separate + * static tables, namely the MADT and the SRAT. + * + * Thus, it is simpler to first create the cpu logical map through + * an MADT walk and then map the logical cpus to their node ids + * as separate steps. + */ + acpi_map_cpus_to_nodes(); + + for (i = 0; i < nr_cpu_ids; i++) + early_map_cpu_to_node(i, acpi_numa_get_nid(i)); +} #else -#define acpi_table_parse_madt(...) do { } while (0) +#define acpi_parse_and_init_cpus(...) do { } while (0) #endif /* @@ -640,13 +663,7 @@ void __init smp_init_cpus(void) if (acpi_disabled) of_parse_and_init_cpus(); else - /* - * do a walk of MADT to determine how many CPUs - * we have including disabled CPUs, and get information - * we need for SMP init - */ - acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, - acpi_parse_gic_cpu_interface, 0); + acpi_parse_and_init_cpus(); if (cpu_count > nr_cpu_ids) pr_warn("Number of cores (%d) exceeds configured maximum of %u - clipping\n", From 8d3e994241e6bcc7ead2b918c4f15b7683afa90a Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 10 Jul 2018 09:57:58 +0100 Subject: [PATCH 35/90] arm_pmu: Clean up maximum period handling Each PMU defines their max_period of the counter as the maximum value that can be counted. Since all the PMU backends support 32bit counters by default, let us remove the redundant field. No functional changes. Cc: Will Deacon Acked-by: Mark Rutland Reviewed-by: Julien Thierry Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_v6.c | 2 -- arch/arm/kernel/perf_event_v7.c | 1 - arch/arm/kernel/perf_event_xscale.c | 2 -- arch/arm64/kernel/perf_event.c | 1 - drivers/perf/arm_pmu.c | 16 ++++++++++++---- include/linux/perf/arm_pmu.h | 1 - 6 files changed, 12 insertions(+), 11 deletions(-) diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index be42c4f66a40..f64a6bfebcec 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -495,7 +495,6 @@ static void armv6pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->stop = armv6pmu_stop; cpu_pmu->map_event = armv6_map_event; cpu_pmu->num_events = 3; - cpu_pmu->max_period = (1LLU << 32) - 1; } static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu) @@ -546,7 +545,6 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->stop = armv6pmu_stop; cpu_pmu->map_event = armv6mpcore_map_event; cpu_pmu->num_events = 3; - cpu_pmu->max_period = (1LLU << 32) - 1; return 0; } diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 5a5116794440..2cf1ca2925c8 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -1170,7 +1170,6 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->start = armv7pmu_start; cpu_pmu->stop = armv7pmu_stop; cpu_pmu->reset = armv7pmu_reset; - cpu_pmu->max_period = (1LLU << 32) - 1; }; static void armv7_read_num_pmnc_events(void *info) diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 88d1a76f5367..c4f029458b52 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -374,7 +374,6 @@ static int xscale1pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->stop = xscale1pmu_stop; cpu_pmu->map_event = xscale_map_event; cpu_pmu->num_events = 3; - cpu_pmu->max_period = (1LLU << 32) - 1; return 0; } @@ -743,7 +742,6 @@ static int xscale2pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->stop = xscale2pmu_stop; cpu_pmu->map_event = xscale_map_event; cpu_pmu->num_events = 5; - cpu_pmu->max_period = (1LLU << 32) - 1; return 0; } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 33147aacdafd..678ecffd3724 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -960,7 +960,6 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->start = armv8pmu_start, cpu_pmu->stop = armv8pmu_stop, cpu_pmu->reset = armv8pmu_reset, - cpu_pmu->max_period = (1LLU << 32) - 1, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; return 0; diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index a6347d487635..6ddc00da5373 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -28,6 +28,11 @@ static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); static DEFINE_PER_CPU(int, cpu_irq); +static inline u64 arm_pmu_max_period(void) +{ + return (1ULL << 32) - 1; +} + static int armpmu_map_cache_event(const unsigned (*cache_map) [PERF_COUNT_HW_CACHE_MAX] @@ -114,8 +119,10 @@ int armpmu_event_set_period(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; s64 left = local64_read(&hwc->period_left); s64 period = hwc->sample_period; + u64 max_period; int ret = 0; + max_period = arm_pmu_max_period(); if (unlikely(left <= -period)) { left = period; local64_set(&hwc->period_left, left); @@ -136,8 +143,8 @@ int armpmu_event_set_period(struct perf_event *event) * effect we are reducing max_period to account for * interrupt latency (and we are being very conservative). */ - if (left > (armpmu->max_period >> 1)) - left = armpmu->max_period >> 1; + if (left > (max_period >> 1)) + left = (max_period >> 1); local64_set(&hwc->prev_count, (u64)-left); @@ -153,6 +160,7 @@ u64 armpmu_event_update(struct perf_event *event) struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; u64 delta, prev_raw_count, new_raw_count; + u64 max_period = arm_pmu_max_period(); again: prev_raw_count = local64_read(&hwc->prev_count); @@ -162,7 +170,7 @@ again: new_raw_count) != prev_raw_count) goto again; - delta = (new_raw_count - prev_raw_count) & armpmu->max_period; + delta = (new_raw_count - prev_raw_count) & max_period; local64_add(delta, &event->count); local64_sub(delta, &hwc->period_left); @@ -402,7 +410,7 @@ __hw_perf_event_init(struct perf_event *event) * is far less likely to overtake the previous one unless * you have some serious IRQ latency issues. */ - hwc->sample_period = armpmu->max_period >> 1; + hwc->sample_period = arm_pmu_max_period() >> 1; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); } diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index ad5444491975..12c30a22fc8d 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -94,7 +94,6 @@ struct arm_pmu { void (*reset)(void *); int (*map_event)(struct perf_event *event); int num_events; - u64 max_period; bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); From 3a95200d3f89afd8b67f39d88d36cc7ec96ce385 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 10 Jul 2018 09:57:59 +0100 Subject: [PATCH 36/90] arm_pmu: Change API to support 64bit counter values Convert the {read/write}_counter APIs to handle 64bit values to enable supporting chained event counters. The backends still use 32bit values and we pass them 32bit values only. So in effect there are no functional changes. Cc: Will Deacon Acked-by: Mark Rutland Reviewed-by: Julien Thierry Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_v6.c | 4 ++-- arch/arm/kernel/perf_event_v7.c | 4 ++-- arch/arm/kernel/perf_event_xscale.c | 8 ++++---- arch/arm64/kernel/perf_event.c | 9 ++++----- include/linux/perf/arm_pmu.h | 4 ++-- 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index f64a6bfebcec..0729f9841ef4 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -233,7 +233,7 @@ armv6_pmcr_counter_has_overflowed(unsigned long pmcr, return ret; } -static inline u32 armv6pmu_read_counter(struct perf_event *event) +static inline u64 armv6pmu_read_counter(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; @@ -251,7 +251,7 @@ static inline u32 armv6pmu_read_counter(struct perf_event *event) return value; } -static inline void armv6pmu_write_counter(struct perf_event *event, u32 value) +static inline void armv6pmu_write_counter(struct perf_event *event, u64 value) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 2cf1ca2925c8..973043dd6187 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -743,7 +743,7 @@ static inline void armv7_pmnc_select_counter(int idx) isb(); } -static inline u32 armv7pmu_read_counter(struct perf_event *event) +static inline u64 armv7pmu_read_counter(struct perf_event *event) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; @@ -763,7 +763,7 @@ static inline u32 armv7pmu_read_counter(struct perf_event *event) return value; } -static inline void armv7pmu_write_counter(struct perf_event *event, u32 value) +static inline void armv7pmu_write_counter(struct perf_event *event, u64 value) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index c4f029458b52..942230fe0426 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -316,7 +316,7 @@ static void xscale1pmu_stop(struct arm_pmu *cpu_pmu) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static inline u32 xscale1pmu_read_counter(struct perf_event *event) +static inline u64 xscale1pmu_read_counter(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; @@ -337,7 +337,7 @@ static inline u32 xscale1pmu_read_counter(struct perf_event *event) return val; } -static inline void xscale1pmu_write_counter(struct perf_event *event, u32 val) +static inline void xscale1pmu_write_counter(struct perf_event *event, u64 val) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; @@ -678,7 +678,7 @@ static void xscale2pmu_stop(struct arm_pmu *cpu_pmu) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static inline u32 xscale2pmu_read_counter(struct perf_event *event) +static inline u64 xscale2pmu_read_counter(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; @@ -705,7 +705,7 @@ static inline u32 xscale2pmu_read_counter(struct perf_event *event) return val; } -static inline void xscale2pmu_write_counter(struct perf_event *event, u32 val) +static inline void xscale2pmu_write_counter(struct perf_event *event, u64 val) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 678ecffd3724..66a2ffdca6dd 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -512,7 +512,7 @@ static inline int armv8pmu_select_counter(int idx) return idx; } -static inline u32 armv8pmu_read_counter(struct perf_event *event) +static inline u64 armv8pmu_read_counter(struct perf_event *event) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; @@ -530,7 +530,7 @@ static inline u32 armv8pmu_read_counter(struct perf_event *event) return value; } -static inline void armv8pmu_write_counter(struct perf_event *event, u32 value) +static inline void armv8pmu_write_counter(struct perf_event *event, u64 value) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; @@ -545,9 +545,8 @@ static inline void armv8pmu_write_counter(struct perf_event *event, u32 value) * count using the lower 32bits and we want an interrupt when * it overflows. */ - u64 value64 = 0xffffffff00000000ULL | value; - - write_sysreg(value64, pmccntr_el0); + value |= 0xffffffff00000000ULL; + write_sysreg(value, pmccntr_el0); } else if (armv8pmu_select_counter(idx) == idx) write_sysreg(value, pmxevcntr_el0); } diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 12c30a22fc8d..f7126a21df30 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -87,8 +87,8 @@ struct arm_pmu { struct perf_event *event); int (*set_event_filter)(struct hw_perf_event *evt, struct perf_event_attr *attr); - u32 (*read_counter)(struct perf_event *event); - void (*write_counter)(struct perf_event *event, u32 val); + u64 (*read_counter)(struct perf_event *event); + void (*write_counter)(struct perf_event *event, u64 val); void (*start)(struct arm_pmu *); void (*stop)(struct arm_pmu *); void (*reset)(void *); From e2da97d328d4951d25f6634eda7213f7257417b6 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 10 Jul 2018 09:58:00 +0100 Subject: [PATCH 37/90] arm_pmu: Add support for 64bit event counters Each PMU has a set of 32bit event counters. But in some special cases, the events could be counted using counters which are effectively 64bit wide. e.g, Arm V8 PMUv3 has a 64 bit cycle counter which can count only the CPU cycles. Also, the PMU can chain the event counters to effectively count as a 64bit counter. Add support for tracking the events that uses 64bit counters. This only affects the periods set for each counter in the core driver. Cc: Will Deacon Reviewed-by: Julien Thierry Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 16 ++++++++++------ include/linux/perf/arm_pmu.h | 6 ++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 6ddc00da5373..8cad6b535a2c 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -28,9 +28,12 @@ static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); static DEFINE_PER_CPU(int, cpu_irq); -static inline u64 arm_pmu_max_period(void) +static inline u64 arm_pmu_event_max_period(struct perf_event *event) { - return (1ULL << 32) - 1; + if (event->hw.flags & ARMPMU_EVT_64BIT) + return GENMASK_ULL(63, 0); + else + return GENMASK_ULL(31, 0); } static int @@ -122,7 +125,7 @@ int armpmu_event_set_period(struct perf_event *event) u64 max_period; int ret = 0; - max_period = arm_pmu_max_period(); + max_period = arm_pmu_event_max_period(event); if (unlikely(left <= -period)) { left = period; local64_set(&hwc->period_left, left); @@ -148,7 +151,7 @@ int armpmu_event_set_period(struct perf_event *event) local64_set(&hwc->prev_count, (u64)-left); - armpmu->write_counter(event, (u64)(-left) & 0xffffffff); + armpmu->write_counter(event, (u64)(-left) & max_period); perf_event_update_userpage(event); @@ -160,7 +163,7 @@ u64 armpmu_event_update(struct perf_event *event) struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; u64 delta, prev_raw_count, new_raw_count; - u64 max_period = arm_pmu_max_period(); + u64 max_period = arm_pmu_event_max_period(event); again: prev_raw_count = local64_read(&hwc->prev_count); @@ -368,6 +371,7 @@ __hw_perf_event_init(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; int mapping; + hwc->flags = 0; mapping = armpmu->map_event(event); if (mapping < 0) { @@ -410,7 +414,7 @@ __hw_perf_event_init(struct perf_event *event) * is far less likely to overtake the previous one unless * you have some serious IRQ latency issues. */ - hwc->sample_period = arm_pmu_max_period() >> 1; + hwc->sample_period = arm_pmu_event_max_period(event) >> 1; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); } diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index f7126a21df30..10f92e1d8e7b 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -25,6 +25,12 @@ */ #define ARMPMU_MAX_HWEVENTS 32 +/* + * ARM PMU hw_event flags + */ +/* Event uses a 64bit counter */ +#define ARMPMU_EVT_64BIT 1 + #define HW_OP_UNSUPPORTED 0xFFFF #define C(_x) PERF_COUNT_HW_CACHE_##_x #define CACHE_OP_UNSUPPORTED 0xFFFF From 7dfc8db1d117ae08c649266d5459ac8b10800d6e Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 10 Jul 2018 09:58:01 +0100 Subject: [PATCH 38/90] arm_pmu: Tidy up clear_event_idx call backs The armpmu uses get_event_idx callback to allocate an event counter for a given event, which marks the selected counter as "used". Now, when we delete the counter, the arm_pmu goes ahead and clears the "used" bit and then invokes the "clear_event_idx" call back, which kind of splits the job between the core code and the backend. To keep things tidy, mandate the implementation of clear_event_idx() and add it for exisiting backends. This will be useful for adding the chained event support, where we leave the event idx maintenance to the backend. Also, when an event is removed from the PMU, reset the hw.idx to indicate that a counter is not allocated for this event, to help the backends do better checks. This will be also used for the chain counter support. Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Julien Thierry Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_v6.c | 8 ++++++++ arch/arm/kernel/perf_event_v7.c | 9 +++++++++ arch/arm/kernel/perf_event_xscale.c | 8 ++++++++ arch/arm64/kernel/perf_event.c | 7 +++++++ drivers/perf/arm_pmu.c | 7 +++---- 5 files changed, 35 insertions(+), 4 deletions(-) diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 0729f9841ef4..1ae99deeec54 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -411,6 +411,12 @@ armv6pmu_get_event_idx(struct pmu_hw_events *cpuc, } } +static void armv6pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + static void armv6pmu_disable_event(struct perf_event *event) { unsigned long val, mask, evt, flags; @@ -491,6 +497,7 @@ static void armv6pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = armv6pmu_read_counter; cpu_pmu->write_counter = armv6pmu_write_counter; cpu_pmu->get_event_idx = armv6pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx; cpu_pmu->start = armv6pmu_start; cpu_pmu->stop = armv6pmu_stop; cpu_pmu->map_event = armv6_map_event; @@ -541,6 +548,7 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = armv6pmu_read_counter; cpu_pmu->write_counter = armv6pmu_write_counter; cpu_pmu->get_event_idx = armv6pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx; cpu_pmu->start = armv6pmu_start; cpu_pmu->stop = armv6pmu_stop; cpu_pmu->map_event = armv6mpcore_map_event; diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 973043dd6187..a4fb0f8b8f84 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -1058,6 +1058,12 @@ static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc, return -EAGAIN; } +static void armv7pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + /* * Add an event filter to a given event. This will only work for PMUv2 PMUs. */ @@ -1167,6 +1173,7 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = armv7pmu_read_counter; cpu_pmu->write_counter = armv7pmu_write_counter; cpu_pmu->get_event_idx = armv7pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv7pmu_clear_event_idx; cpu_pmu->start = armv7pmu_start; cpu_pmu->stop = armv7pmu_stop; cpu_pmu->reset = armv7pmu_reset; @@ -1637,6 +1644,7 @@ static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc, bool venum_event = EVENT_VENUM(hwc->config_base); bool krait_event = EVENT_CPU(hwc->config_base); + armv7pmu_clear_event_idx(cpuc, event); if (venum_event || krait_event) { bit = krait_event_to_bit(event, region, group); clear_bit(bit, cpuc->used_mask); @@ -1966,6 +1974,7 @@ static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc, bool venum_event = EVENT_VENUM(hwc->config_base); bool scorpion_event = EVENT_CPU(hwc->config_base); + armv7pmu_clear_event_idx(cpuc, event); if (venum_event || scorpion_event) { bit = scorpion_event_to_bit(event, region, group); clear_bit(bit, cpuc->used_mask); diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 942230fe0426..f6cdcacfb96d 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -292,6 +292,12 @@ xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc, } } +static void xscalepmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + static void xscale1pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags, val; @@ -370,6 +376,7 @@ static int xscale1pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = xscale1pmu_read_counter; cpu_pmu->write_counter = xscale1pmu_write_counter; cpu_pmu->get_event_idx = xscale1pmu_get_event_idx; + cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx; cpu_pmu->start = xscale1pmu_start; cpu_pmu->stop = xscale1pmu_stop; cpu_pmu->map_event = xscale_map_event; @@ -738,6 +745,7 @@ static int xscale2pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = xscale2pmu_read_counter; cpu_pmu->write_counter = xscale2pmu_write_counter; cpu_pmu->get_event_idx = xscale2pmu_get_event_idx; + cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx; cpu_pmu->start = xscale2pmu_start; cpu_pmu->stop = xscale2pmu_stop; cpu_pmu->map_event = xscale_map_event; diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 66a2ffdca6dd..ac66851d4b13 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -778,6 +778,12 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, return -EAGAIN; } +static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + /* * Add an event filter to a given event. This will only work for PMUv2 PMUs. */ @@ -956,6 +962,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = armv8pmu_read_counter, cpu_pmu->write_counter = armv8pmu_write_counter, cpu_pmu->get_event_idx = armv8pmu_get_event_idx, + cpu_pmu->clear_event_idx = armv8pmu_clear_event_idx, cpu_pmu->start = armv8pmu_start, cpu_pmu->stop = armv8pmu_stop, cpu_pmu->reset = armv8pmu_reset, diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 8cad6b535a2c..a28881058f18 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -238,11 +238,10 @@ armpmu_del(struct perf_event *event, int flags) armpmu_stop(event, PERF_EF_UPDATE); hw_events->events[idx] = NULL; - clear_bit(idx, hw_events->used_mask); - if (armpmu->clear_event_idx) - armpmu->clear_event_idx(hw_events, event); - + armpmu->clear_event_idx(hw_events, event); perf_event_update_userpage(event); + /* Clear the allocated counter */ + hwc->idx = -1; } static int From 0c55d19c1659d3fc38dcecca8bdaf6eabda39a9d Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 10 Jul 2018 09:58:02 +0100 Subject: [PATCH 39/90] arm64: perf: Clean up armv8pmu_select_counter armv8pmu_select_counter always returns the passed idx. So let us make that void and get rid of the pointless checks. Suggested-by: Mark Rutland Cc: Will Deacon Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index ac66851d4b13..bc014af59fb3 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -503,13 +503,17 @@ static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); } -static inline int armv8pmu_select_counter(int idx) +static inline void armv8pmu_select_counter(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); write_sysreg(counter, pmselr_el0); isb(); +} - return idx; +static inline u32 armv8pmu_read_evcntr(int idx) +{ + armv8pmu_select_counter(idx); + return read_sysreg(pmxevcntr_el0); } static inline u64 armv8pmu_read_counter(struct perf_event *event) @@ -524,12 +528,18 @@ static inline u64 armv8pmu_read_counter(struct perf_event *event) smp_processor_id(), idx); else if (idx == ARMV8_IDX_CYCLE_COUNTER) value = read_sysreg(pmccntr_el0); - else if (armv8pmu_select_counter(idx) == idx) - value = read_sysreg(pmxevcntr_el0); + else + value = armv8pmu_read_evcntr(idx); return value; } +static inline void armv8pmu_write_evcntr(int idx, u32 value) +{ + armv8pmu_select_counter(idx); + write_sysreg(value, pmxevcntr_el0); +} + static inline void armv8pmu_write_counter(struct perf_event *event, u64 value) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); @@ -547,16 +557,15 @@ static inline void armv8pmu_write_counter(struct perf_event *event, u64 value) */ value |= 0xffffffff00000000ULL; write_sysreg(value, pmccntr_el0); - } else if (armv8pmu_select_counter(idx) == idx) - write_sysreg(value, pmxevcntr_el0); + } else + armv8pmu_write_evcntr(idx, value); } static inline void armv8pmu_write_evtype(int idx, u32 val) { - if (armv8pmu_select_counter(idx) == idx) { - val &= ARMV8_PMU_EVTYPE_MASK; - write_sysreg(val, pmxevtyper_el0); - } + armv8pmu_select_counter(idx); + val &= ARMV8_PMU_EVTYPE_MASK; + write_sysreg(val, pmxevtyper_el0); } static inline int armv8pmu_enable_counter(int idx) From 3cce50dfec4a5b0414c974190940f47dd32c6dee Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 10 Jul 2018 09:58:03 +0100 Subject: [PATCH 40/90] arm64: perf: Disable PMU while processing counter overflows The arm64 PMU updates the event counters and reprograms the counters in the overflow IRQ handler without disabling the PMU. This could potentially cause skews in for group counters, where the overflowed counters may potentially loose some event counts, while they are reprogrammed. To prevent this, disable the PMU while we process the counter overflows and enable it right back when we are done. This patch also moves the PMU stop/start routines to avoid a forward declaration. Suggested-by: Mark Rutland Cc: Will Deacon Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 50 +++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index bc014af59fb3..b414d81b80ca 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -678,6 +678,28 @@ static void armv8pmu_disable_event(struct perf_event *event) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } +static void armv8pmu_start(struct arm_pmu *cpu_pmu) +{ + unsigned long flags; + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + /* Enable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void armv8pmu_stop(struct arm_pmu *cpu_pmu) +{ + unsigned long flags; + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + /* Disable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) { u32 pmovsr; @@ -702,6 +724,11 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) */ regs = get_irq_regs(); + /* + * Stop the PMU while processing the counter overflows + * to prevent skews in group events. + */ + armv8pmu_stop(cpu_pmu); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -726,6 +753,7 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) if (perf_event_overflow(event, &data, regs)) cpu_pmu->disable(event); } + armv8pmu_start(cpu_pmu); /* * Handle the pending perf events. @@ -739,28 +767,6 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) return IRQ_HANDLED; } -static void armv8pmu_start(struct arm_pmu *cpu_pmu) -{ - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - raw_spin_lock_irqsave(&events->pmu_lock, flags); - /* Enable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); -} - -static void armv8pmu_stop(struct arm_pmu *cpu_pmu) -{ - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - raw_spin_lock_irqsave(&events->pmu_lock, flags); - /* Disable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); -} - static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, struct perf_event *event) { From c13207905340d85eaddd85b6d2868218f324b180 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 10 Jul 2018 09:58:04 +0100 Subject: [PATCH 41/90] arm64: perf: Add support for chaining event counters Add support for 64bit event by using chained event counters and 64bit cycle counters. PMUv3 allows chaining a pair of adjacent 32-bit counters, effectively forming a 64-bit counter. The low/even counter is programmed to count the event of interest, and the high/odd counter is programmed to count the CHAIN event, taken when the low/even counter overflows. For CPU cycles, when 64bit mode is requested, the cycle counter is used in 64bit mode. If the cycle counter is not available, falls back to chaining. Cc: Will Deacon Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 185 ++++++++++++++++++++++++++++----- drivers/perf/arm_pmu.c | 9 +- 2 files changed, 160 insertions(+), 34 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index b414d81b80ca..dfff5ed5c625 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -446,9 +446,16 @@ static struct attribute_group armv8_pmuv3_events_attr_group = { }; PMU_FORMAT_ATTR(event, "config:0-15"); +PMU_FORMAT_ATTR(long, "config1:0"); + +static inline bool armv8pmu_event_is_64bit(struct perf_event *event) +{ + return event->attr.config1 & 0x1; +} static struct attribute *armv8_pmuv3_format_attrs[] = { &format_attr_event.attr, + &format_attr_long.attr, NULL, }; @@ -465,6 +472,21 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { #define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \ (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) +/* + * We must chain two programmable counters for 64 bit events, + * except when we have allocated the 64bit cycle counter (for CPU + * cycles event). This must be called only when the event has + * a counter allocated. + */ +static inline bool armv8pmu_event_is_chained(struct perf_event *event) +{ + int idx = event->hw.idx; + + return !WARN_ON(idx < 0) && + armv8pmu_event_is_64bit(event) && + (idx != ARMV8_IDX_CYCLE_COUNTER); +} + /* * ARMv8 low level PMU access */ @@ -516,12 +538,23 @@ static inline u32 armv8pmu_read_evcntr(int idx) return read_sysreg(pmxevcntr_el0); } +static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) +{ + int idx = event->hw.idx; + u64 val = 0; + + val = armv8pmu_read_evcntr(idx); + if (armv8pmu_event_is_chained(event)) + val = (val << 32) | armv8pmu_read_evcntr(idx - 1); + return val; +} + static inline u64 armv8pmu_read_counter(struct perf_event *event) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - u32 value = 0; + u64 value = 0; if (!armv8pmu_counter_valid(cpu_pmu, idx)) pr_err("CPU%u reading wrong counter %d\n", @@ -529,7 +562,7 @@ static inline u64 armv8pmu_read_counter(struct perf_event *event) else if (idx == ARMV8_IDX_CYCLE_COUNTER) value = read_sysreg(pmccntr_el0); else - value = armv8pmu_read_evcntr(idx); + value = armv8pmu_read_hw_counter(event); return value; } @@ -540,6 +573,19 @@ static inline void armv8pmu_write_evcntr(int idx, u32 value) write_sysreg(value, pmxevcntr_el0); } +static inline void armv8pmu_write_hw_counter(struct perf_event *event, + u64 value) +{ + int idx = event->hw.idx; + + if (armv8pmu_event_is_chained(event)) { + armv8pmu_write_evcntr(idx, upper_32_bits(value)); + armv8pmu_write_evcntr(idx - 1, lower_32_bits(value)); + } else { + armv8pmu_write_evcntr(idx, value); + } +} + static inline void armv8pmu_write_counter(struct perf_event *event, u64 value) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); @@ -551,14 +597,16 @@ static inline void armv8pmu_write_counter(struct perf_event *event, u64 value) smp_processor_id(), idx); else if (idx == ARMV8_IDX_CYCLE_COUNTER) { /* - * Set the upper 32bits as this is a 64bit counter but we only - * count using the lower 32bits and we want an interrupt when - * it overflows. + * The cycles counter is really a 64-bit counter. + * When treating it as a 32-bit counter, we only count + * the lower 32 bits, and set the upper 32-bits so that + * we get an interrupt upon 32-bit overflow. */ - value |= 0xffffffff00000000ULL; + if (!armv8pmu_event_is_64bit(event)) + value |= 0xffffffff00000000ULL; write_sysreg(value, pmccntr_el0); } else - armv8pmu_write_evcntr(idx, value); + armv8pmu_write_hw_counter(event, value); } static inline void armv8pmu_write_evtype(int idx, u32 val) @@ -568,6 +616,27 @@ static inline void armv8pmu_write_evtype(int idx, u32 val) write_sysreg(val, pmxevtyper_el0); } +static inline void armv8pmu_write_event_type(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* + * For chained events, the low counter is programmed to count + * the event of interest and the high counter is programmed + * with CHAIN event code with filters set to count at all ELs. + */ + if (armv8pmu_event_is_chained(event)) { + u32 chain_evt = ARMV8_PMUV3_PERFCTR_CHAIN | + ARMV8_PMU_INCLUDE_EL2; + + armv8pmu_write_evtype(idx - 1, hwc->config_base); + armv8pmu_write_evtype(idx, chain_evt); + } else { + armv8pmu_write_evtype(idx, hwc->config_base); + } +} + static inline int armv8pmu_enable_counter(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -575,6 +644,16 @@ static inline int armv8pmu_enable_counter(int idx) return idx; } +static inline void armv8pmu_enable_event_counter(struct perf_event *event) +{ + int idx = event->hw.idx; + + armv8pmu_enable_counter(idx); + if (armv8pmu_event_is_chained(event)) + armv8pmu_enable_counter(idx - 1); + isb(); +} + static inline int armv8pmu_disable_counter(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -582,6 +661,16 @@ static inline int armv8pmu_disable_counter(int idx) return idx; } +static inline void armv8pmu_disable_event_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (armv8pmu_event_is_chained(event)) + armv8pmu_disable_counter(idx - 1); + armv8pmu_disable_counter(idx); +} + static inline int armv8pmu_enable_intens(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -589,6 +678,11 @@ static inline int armv8pmu_enable_intens(int idx) return idx; } +static inline int armv8pmu_enable_event_irq(struct perf_event *event) +{ + return armv8pmu_enable_intens(event->hw.idx); +} + static inline int armv8pmu_disable_intens(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -601,6 +695,11 @@ static inline int armv8pmu_disable_intens(int idx) return idx; } +static inline int armv8pmu_disable_event_irq(struct perf_event *event) +{ + return armv8pmu_disable_intens(event->hw.idx); +} + static inline u32 armv8pmu_getreset_flags(void) { u32 value; @@ -618,10 +717,8 @@ static inline u32 armv8pmu_getreset_flags(void) static void armv8pmu_enable_event(struct perf_event *event) { unsigned long flags; - struct hw_perf_event *hwc = &event->hw; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - int idx = hwc->idx; /* * Enable counter and interrupt, and set the counter to count @@ -632,22 +729,22 @@ static void armv8pmu_enable_event(struct perf_event *event) /* * Disable counter */ - armv8pmu_disable_counter(idx); + armv8pmu_disable_event_counter(event); /* * Set event (if destined for PMNx counters). */ - armv8pmu_write_evtype(idx, hwc->config_base); + armv8pmu_write_event_type(event); /* * Enable interrupt for this counter */ - armv8pmu_enable_intens(idx); + armv8pmu_enable_event_irq(event); /* * Enable counter */ - armv8pmu_enable_counter(idx); + armv8pmu_enable_event_counter(event); raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } @@ -655,10 +752,8 @@ static void armv8pmu_enable_event(struct perf_event *event) static void armv8pmu_disable_event(struct perf_event *event) { unsigned long flags; - struct hw_perf_event *hwc = &event->hw; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - int idx = hwc->idx; /* * Disable counter and interrupt @@ -668,12 +763,12 @@ static void armv8pmu_disable_event(struct perf_event *event) /* * Disable counter */ - armv8pmu_disable_counter(idx); + armv8pmu_disable_event_counter(event); /* * Disable interrupt for this counter */ - armv8pmu_disable_intens(idx); + armv8pmu_disable_event_irq(event); raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } @@ -767,10 +862,42 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) return IRQ_HANDLED; } +static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, + struct arm_pmu *cpu_pmu) +{ + int idx; + + for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx ++) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + return -EAGAIN; +} + +static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, + struct arm_pmu *cpu_pmu) +{ + int idx; + + /* + * Chaining requires two consecutive event counters, where + * the lower idx must be even. + */ + for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) { + if (!test_and_set_bit(idx, cpuc->used_mask)) { + /* Check if the preceding even counter is available */ + if (!test_and_set_bit(idx - 1, cpuc->used_mask)) + return idx; + /* Release the Odd counter */ + clear_bit(idx, cpuc->used_mask); + } + } + return -EAGAIN; +} + static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, struct perf_event *event) { - int idx; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; @@ -784,19 +911,20 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, /* * Otherwise use events counters */ - for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - } - - /* The counters are all in use. */ - return -EAGAIN; + if (armv8pmu_event_is_64bit(event)) + return armv8pmu_get_chain_idx(cpuc, cpu_pmu); + else + return armv8pmu_get_single_idx(cpuc, cpu_pmu); } static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) + struct perf_event *event) { - clear_bit(event->hw.idx, cpuc->used_mask); + int idx = event->hw.idx; + + clear_bit(idx, cpuc->used_mask); + if (armv8pmu_event_is_chained(event)) + clear_bit(idx - 1, cpuc->used_mask); } /* @@ -871,6 +999,9 @@ static int __armv8_pmuv3_map_event(struct perf_event *event, &armv8_pmuv3_perf_cache_map, ARMV8_PMU_EVTYPE_EVENT); + if (armv8pmu_event_is_64bit(event)) + event->hw.flags |= ARMPMU_EVT_64BIT; + /* Onl expose micro/arch events supported by this PMU */ if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) && test_bit(hw_event_id, armpmu->pmceid_bitmap)) { diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index a28881058f18..7f01f6f60b87 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -665,14 +665,9 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd) int idx; for (idx = 0; idx < armpmu->num_events; idx++) { - /* - * If the counter is not used skip it, there is no - * need of stopping/restarting it. - */ - if (!test_bit(idx, hw_events->used_mask)) - continue; - event = hw_events->events[idx]; + if (!event) + continue; switch (cmd) { case CPU_PM_ENTER: From 54501ac150fed82b256e6a9bd4e2201bc3b21b22 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Jul 2018 17:16:27 +0200 Subject: [PATCH 42/90] arm64: make flatmem depend on !NUMA Building without NUMA but with FLATMEM results in a link error because mem_map[] is not available: aarch64-linux-ld -EB -maarch64elfb --no-undefined -X -pie -shared -Bsymbolic --no-apply-dynamic-relocs --build-id -o .tmp_vmlinux1 -T ./arch/arm64/kernel/vmlinux.lds --whole-archive built-in.a --no-whole-archive --start-group arch/arm64/lib/lib.a lib/lib.a --end-group init/do_mounts.o: In function `mount_block_root': do_mounts.c:(.init.text+0x1e8): undefined reference to `mem_map' arch/arm64/kernel/vdso.o: In function `vdso_init': vdso.c:(.init.text+0xb4): undefined reference to `mem_map' This uses the same trick as the other architectures, making flatmem depend on !NUMA to avoid the broken configuration. Fixes: e7d4bac428ed ("arm64: add ARM64-specific support for flatmem") Signed-off-by: Arnd Bergmann Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c548b3dd48ab..2e7609e4d08a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -787,7 +787,7 @@ config ARCH_SELECT_MEMORY_MODEL def_bool ARCH_SPARSEMEM_ENABLE config ARCH_FLATMEM_ENABLE - def_bool y + def_bool !NUMA config HAVE_ARCH_PFN_VALID def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM From 409d5db49867c20d226a1fc50a73d646bd733dae Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 20 Jun 2018 14:46:50 +0100 Subject: [PATCH 43/90] arm64: rseq: Implement backend rseq calls and select HAVE_RSEQ Implement calls to rseq_signal_deliver, rseq_handle_notify_resume and rseq_syscall so that we can select HAVE_RSEQ on arm64. Acked-by: Mathieu Desnoyers Acked-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 ++ arch/arm64/kernel/entry.S | 2 ++ arch/arm64/kernel/ptrace.c | 2 ++ arch/arm64/kernel/signal.c | 3 +++ 6 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2e7609e4d08a..f4157d4a0289 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -138,6 +138,7 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RCU_TABLE_FREE + select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HAVE_KPROBES diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index a0baa9af5487..e0d0f5b856e7 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -43,7 +43,7 @@ #define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) -#define __NR_compat_syscalls 398 +#define __NR_compat_syscalls 399 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index ef292160748c..0fdc7ef8a776 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -817,6 +817,8 @@ __SYSCALL(__NR_pkey_alloc, sys_pkey_alloc) __SYSCALL(__NR_pkey_free, sys_pkey_free) #define __NR_statx 397 __SYSCALL(__NR_statx, sys_statx) +#define __NR_rseq 398 +__SYSCALL(__NR_rseq, sys_rseq) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 28ad8799406f..1eda9e1a1f4a 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -904,6 +904,7 @@ ENDPROC(el0_error) ret_fast_syscall: disable_daif str x0, [sp, #S_X0] // returned x0 +#ifndef CONFIG_DEBUG_RSEQ ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing and x2, x1, #_TIF_SYSCALL_WORK cbnz x2, ret_fast_syscall_trace @@ -911,6 +912,7 @@ ret_fast_syscall: cbnz x2, work_pending enable_step_tsk x1, x2 kernel_exit 0 +#endif ret_fast_syscall_trace: enable_daif b __sys_trace_return_skipped // we already saved x0 diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 42ff28aa5a00..489db3624606 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1658,6 +1658,8 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs) if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); + + rseq_syscall(regs); } /* diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 511af13e8d8f..e3b1d1b0aee8 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -802,6 +802,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) int usig = ksig->sig; int ret; + rseq_signal_deliver(ksig, regs); + /* * Set up the stack frame */ @@ -940,6 +942,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, if (thread_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } if (thread_flags & _TIF_FOREIGN_FPSTATE) From db7a2d1809a5b6b08d138ff68837f805fc073351 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 20 Jun 2018 14:58:10 +0100 Subject: [PATCH 44/90] asm-generic: unistd.h: Wire up sys_rseq The new rseq call arrived in 4.18-rc1, so provide it in the asm-generic unistd.h for architectures such as arm64. Acked-by: Arnd Bergmann Acked-by: Mathieu Desnoyers Signed-off-by: Will Deacon --- include/uapi/asm-generic/unistd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 42990676a55e..df4bedb9b01c 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -734,9 +734,11 @@ __SYSCALL(__NR_pkey_free, sys_pkey_free) __SYSCALL(__NR_statx, sys_statx) #define __NR_io_pgetevents 292 __SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents) +#define __NR_rseq 293 +__SYSCALL(__NR_rseq, sys_rseq) #undef __NR_syscalls -#define __NR_syscalls 293 +#define __NR_syscalls 294 /* * 32 bit systems traditionally used different From e87a4a92fba3721eb06ba8d061b550e09e3d063a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 12 Jul 2018 11:37:40 +0100 Subject: [PATCH 45/90] Revert "arm64: fix infinite stacktrace" This reverts commit 7e7df71fd57ff2894d96abb0080922bf39460a79. When unwinding out of the IRQ stack and onto the interrupted EL1 stack, we cannot rely on the frame pointer being strictly increasing, as this could terminate the backtrace early depending on how the stacks have been allocated. Signed-off-by: Will Deacon --- arch/arm64/kernel/stacktrace.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index e160ca123da3..d5718a060672 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -56,9 +56,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); - if (frame->fp <= fp) - return -EINVAL; - #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (tsk->ret_stack && (frame->pc == (unsigned long)return_to_handler)) { From 3eb6f1f9e6364d5eca1ddf3c9e963c5916069190 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:36 +0100 Subject: [PATCH 46/90] arm64: consistently use unsigned long for thread flags In do_notify_resume, we manipulate thread_flags as a 32-bit unsigned int, whereas thread_info::flags is a 64-bit unsigned long, and elsewhere (e.g. in the entry assembly) we manipulate the flags as a 64-bit quantity. For consistency, and to avoid problems if we end up with more than 32 flags, let's make do_notify_resume take the flags as a 64-bit unsigned long. Signed-off-by: Mark Rutland Reviewed-by: Dave Martin Acked-by: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index e3b1d1b0aee8..5a8df96dc4ab 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -912,7 +912,7 @@ static void do_signal(struct pt_regs *regs) } asmlinkage void do_notify_resume(struct pt_regs *regs, - unsigned int thread_flags) + unsigned long thread_flags) { /* * The assembly code enters us with IRQs off, but it hasn't From 1c312e84c2d71da4101754fa6118f703f7473e01 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:37 +0100 Subject: [PATCH 47/90] arm64: move SCTLR_EL{1,2} assertions to Currently we assert that the SCTLR_EL{1,2}_{SET,CLEAR} bits are self-consistent with an assertion in config_sctlr_el1(). This is a bit unusual, since config_sctlr_el1() doesn't make use of these definitions, and is far away from the definitions themselves. We can use the CPP #error directive to have equivalent assertions in , next to the definitions of the set/clear bits, which is a bit clearer and simpler. At the same time, lets fill in the upper 32 bits for both registers in their respective RES0 definitions. This could be a little nicer with GENMASK_ULL(63, 32), but this currently lives in , which cannot safely be included from assembly, as can. Note the when the preprocessor evaluates an expression for an #if directive, all signed or unsigned values are treated as intmax_t or uintmax_t respectively. To avoid ambiguity, we define explicitly define the mask of all 64 bits. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Cc: Dave Martin Cc: James Morse Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index a8f84812c6e8..fefc17dae8ee 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -436,7 +436,8 @@ #define SCTLR_EL2_RES0 ((1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | \ (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \ (1 << 17) | (1 << 20) | (1 << 24) | (1 << 26) | \ - (1 << 27) | (1 << 30) | (1 << 31)) + (1 << 27) | (1 << 30) | (1 << 31) | \ + (0xffffffffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL2 SCTLR_ELx_EE @@ -452,9 +453,9 @@ SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \ ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) -/* Check all the bits are accounted for */ -#define SCTLR_EL2_BUILD_BUG_ON_MISSING_BITS BUILD_BUG_ON((SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != ~0) - +#if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffff +#error "Inconsistent SCTLR_EL2 set/clear bits" +#endif /* SCTLR_EL1 specific flags. */ #define SCTLR_EL1_UCI (1 << 26) @@ -473,7 +474,8 @@ #define SCTLR_EL1_RES1 ((1 << 11) | (1 << 20) | (1 << 22) | (1 << 28) | \ (1 << 29)) #define SCTLR_EL1_RES0 ((1 << 6) | (1 << 10) | (1 << 13) | (1 << 17) | \ - (1 << 27) | (1 << 30) | (1 << 31)) + (1 << 27) | (1 << 30) | (1 << 31) | \ + (0xffffffffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) @@ -492,8 +494,9 @@ SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\ SCTLR_EL1_RES0) -/* Check all the bits are accounted for */ -#define SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS BUILD_BUG_ON((SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != ~0) +#if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff +#error "Inconsistent SCTLR_EL1 set/clear bits" +#endif /* id_aa64isar0 */ #define ID_AA64ISAR0_TS_SHIFT 52 @@ -743,9 +746,6 @@ static inline void config_sctlr_el1(u32 clear, u32 set) { u32 val; - SCTLR_EL2_BUILD_BUG_ON_MISSING_BITS; - SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS; - val = read_sysreg(sctlr_el1); val &= ~clear; val |= set; From 25be597ada0b49d2748ab520a78a28c1764d69e4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:38 +0100 Subject: [PATCH 48/90] arm64: kill config_sctlr_el1() Now that we have sysreg_clear_set(), we can consistently use this instead of config_sctlr_el1(). Signed-off-by: Mark Rutland Reviewed-by: Dave Martin Acked-by: Catalin Marinas Cc: James Morse Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 10 ---------- arch/arm64/kernel/armv8_deprecated.c | 8 ++++---- arch/arm64/kernel/cpu_errata.c | 3 +-- arch/arm64/kernel/traps.c | 2 +- arch/arm64/mm/fault.c | 2 +- 5 files changed, 7 insertions(+), 18 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index fefc17dae8ee..e205ec8489e9 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -742,16 +742,6 @@ asm( write_sysreg(__scs_new, sysreg); \ } while (0) -static inline void config_sctlr_el1(u32 clear, u32 set) -{ - u32 val; - - val = read_sysreg(sctlr_el1); - val &= ~clear; - val |= set; - write_sysreg(val, sctlr_el1); -} - #endif #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index d2c0938ccaec..92be1d12d590 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -511,9 +511,9 @@ ret: static int cp15_barrier_set_hw_mode(bool enable) { if (enable) - config_sctlr_el1(0, SCTLR_EL1_CP15BEN); + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_CP15BEN); else - config_sctlr_el1(SCTLR_EL1_CP15BEN, 0); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_CP15BEN, 0); return 0; } @@ -548,9 +548,9 @@ static int setend_set_hw_mode(bool enable) return -EINVAL; if (enable) - config_sctlr_el1(SCTLR_EL1_SED, 0); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SED, 0); else - config_sctlr_el1(0, SCTLR_EL1_SED); + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_SED); return 0; } diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 94876ab071c7..dec10898d688 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -82,8 +82,7 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, static void cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) { - /* Clear SCTLR_EL1.UCT */ - config_sctlr_el1(SCTLR_EL1_UCT, 0); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index d399d459397b..c27292703bd1 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -411,7 +411,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) { - config_sctlr_el1(SCTLR_EL1_UCI, 0); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); } #define __user_cache_maint(insn, address, res) \ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index b8eecc7b9531..ea591c9e5144 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -879,7 +879,7 @@ void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) */ WARN_ON_ONCE(in_interrupt()); - config_sctlr_el1(SCTLR_EL1_SPAN, 0); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); asm(SET_PSTATE_PAN(1)); } #endif /* CONFIG_ARM64_PAN */ From 8d370933faecec098acb99fbf317cf9dfa9ee995 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:39 +0100 Subject: [PATCH 49/90] arm64: kill change_cpacr() Now that we have sysreg_clear_set(), we can use this instead of change_cpacr(). Note that the order of the set and clear arguments differs between change_cpacr() and sysreg_clear_set(), so these are flipped as part of the conversion. Also, sve_user_enable() redundantly clears CPACR_EL1_ZEN_EL0EN before setting it; this is removed for clarity. Signed-off-by: Mark Rutland Reviewed-by: Dave Martin Acked-by: Catalin Marinas Cc: James Morse Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 84c68b14f1b2..a98a7f96aff1 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -159,23 +159,14 @@ static void sve_free(struct task_struct *task) __sve_free(task); } -static void change_cpacr(u64 val, u64 mask) -{ - u64 cpacr = read_sysreg(CPACR_EL1); - u64 new = (cpacr & ~mask) | val; - - if (new != cpacr) - write_sysreg(new, CPACR_EL1); -} - static void sve_user_disable(void) { - change_cpacr(0, CPACR_EL1_ZEN_EL0EN); + sysreg_clear_set(cpacr_el1, CPACR_EL1_ZEN_EL0EN, 0); } static void sve_user_enable(void) { - change_cpacr(CPACR_EL1_ZEN_EL0EN, CPACR_EL1_ZEN_EL0EN); + sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_ZEN_EL0EN); } /* From f9209e26293300db80a57a6bf2f71ccb26ad45db Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:40 +0100 Subject: [PATCH 50/90] arm64: move sve_user_{enable,disable} to In subsequent patches, we'll want to make use of sve_user_enable() and sve_user_disable() outside of kernel/fpsimd.c. Let's move these to where we can make use of them. To avoid ifdeffery in sequences like: if (system_supports_sve() && some_condition) sve_user_disable(); ... empty stubs are provided when support for SVE is not enabled. Note that system_supports_sve() contains as IS_ENABLED(CONFIG_ARM64_SVE), so the sve_user_disable() call should be optimized away entirely when CONFIG_ARM64_SVE is not selected. To ensure that this is the case, the stub definitions contain a BUILD_BUG(), as we do for other stubs for which calls should always be optimized away when the relevant config option is not selected. At the same time, the include list of is sorted while adding . Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Reviewed-by: Dave Martin Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 17 ++++++++++++++++- arch/arm64/kernel/fpsimd.c | 10 ---------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index fa92747a49c8..dd1ad3950ef5 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -16,13 +16,15 @@ #ifndef __ASM_FP_H #define __ASM_FP_H -#include #include +#include #include #include +#include #ifndef __ASSEMBLY__ +#include #include #include #include @@ -102,6 +104,16 @@ extern int sve_set_vector_length(struct task_struct *task, extern int sve_set_current_vl(unsigned long arg); extern int sve_get_current_vl(void); +static inline void sve_user_disable(void) +{ + sysreg_clear_set(cpacr_el1, CPACR_EL1_ZEN_EL0EN, 0); +} + +static inline void sve_user_enable(void) +{ + sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_ZEN_EL0EN); +} + /* * Probing and setup functions. * Calls to these functions must be serialised with one another. @@ -128,6 +140,9 @@ static inline int sve_get_current_vl(void) return -EINVAL; } +static inline void sve_user_disable(void) { BUILD_BUG(); } +static inline void sve_user_enable(void) { BUILD_BUG(); } + static inline void sve_init_vq_map(void) { } static inline void sve_update_vq_map(void) { } static inline int sve_verify_vq_map(void) { return 0; } diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index a98a7f96aff1..58c53bc96928 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -159,16 +159,6 @@ static void sve_free(struct task_struct *task) __sve_free(task); } -static void sve_user_disable(void) -{ - sysreg_clear_set(cpacr_el1, CPACR_EL1_ZEN_EL0EN, 0); -} - -static void sve_user_enable(void) -{ - sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_ZEN_EL0EN); -} - /* * TIF_SVE controls whether a task can use SVE without trapping while * in userspace, and also the way a task's FPSIMD/SVE state is stored From 3085e1645e23888125224f66a710077cdb03106a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:41 +0100 Subject: [PATCH 51/90] arm64: remove sigreturn wrappers The arm64 sigreturn* syscall handlers are non-standard. Rather than taking a number of user parameters in registers as per the AAPCS, they expect the pt_regs as their sole argument. To make this work, we override the syscall definitions to invoke wrappers written in assembly, which mov the SP into x0, and branch to their respective C functions. On other architectures (such as x86), the sigreturn* functions take no argument and instead use current_pt_regs() to acquire the user registers. This requires less boilerplate code, and allows for other features such as interposing C code in this path. This patch takes the same approach for arm64. Signed-off-by: Mark Rutland Tentatively-reviewed-by: Dave Martin Reviewed-by: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/unistd32.h | 4 ++-- arch/arm64/kernel/entry.S | 8 -------- arch/arm64/kernel/entry32.S | 10 ---------- arch/arm64/kernel/signal.c | 3 ++- arch/arm64/kernel/signal32.c | 6 ++++-- arch/arm64/kernel/sys.c | 3 +-- arch/arm64/kernel/sys32.c | 4 ++-- 7 files changed, 11 insertions(+), 27 deletions(-) diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 0fdc7ef8a776..023d64af8339 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -260,7 +260,7 @@ __SYSCALL(117, sys_ni_syscall) #define __NR_fsync 118 __SYSCALL(__NR_fsync, sys_fsync) #define __NR_sigreturn 119 -__SYSCALL(__NR_sigreturn, compat_sys_sigreturn_wrapper) +__SYSCALL(__NR_sigreturn, compat_sys_sigreturn) #define __NR_clone 120 __SYSCALL(__NR_clone, sys_clone) #define __NR_setdomainname 121 @@ -368,7 +368,7 @@ __SYSCALL(__NR_getresgid, sys_getresgid16) #define __NR_prctl 172 __SYSCALL(__NR_prctl, sys_prctl) #define __NR_rt_sigreturn 173 -__SYSCALL(__NR_rt_sigreturn, compat_sys_rt_sigreturn_wrapper) +__SYSCALL(__NR_rt_sigreturn, compat_sys_rt_sigreturn) #define __NR_rt_sigaction 174 __SYSCALL(__NR_rt_sigaction, compat_sys_rt_sigaction) #define __NR_rt_sigprocmask 175 diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 1eda9e1a1f4a..201809537343 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -1139,14 +1139,6 @@ __entry_tramp_data_start: #endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ -/* - * Special system call wrappers. - */ -ENTRY(sys_rt_sigreturn_wrapper) - mov x0, sp - b sys_rt_sigreturn -ENDPROC(sys_rt_sigreturn_wrapper) - /* * Register switch for AArch64. The callee-saved registers need to be saved * and restored. On entry: diff --git a/arch/arm64/kernel/entry32.S b/arch/arm64/kernel/entry32.S index f332d5d1f6b4..f9461696dde4 100644 --- a/arch/arm64/kernel/entry32.S +++ b/arch/arm64/kernel/entry32.S @@ -30,16 +30,6 @@ * System call wrappers for the AArch32 compatibility layer. */ -ENTRY(compat_sys_sigreturn_wrapper) - mov x0, sp - b compat_sys_sigreturn -ENDPROC(compat_sys_sigreturn_wrapper) - -ENTRY(compat_sys_rt_sigreturn_wrapper) - mov x0, sp - b compat_sys_rt_sigreturn -ENDPROC(compat_sys_rt_sigreturn_wrapper) - ENTRY(compat_sys_statfs64_wrapper) mov w3, #84 cmp w1, #88 diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 5a8df96dc4ab..b6f3c3d76901 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -539,8 +539,9 @@ static int restore_sigframe(struct pt_regs *regs, return err; } -asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) +asmlinkage long sys_rt_sigreturn(void) { + struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index fca761be18c8..9aac1253e0a6 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -285,8 +285,9 @@ static int compat_restore_sigframe(struct pt_regs *regs, return err; } -asmlinkage int compat_sys_sigreturn(struct pt_regs *regs) +asmlinkage int compat_sys_sigreturn(void) { + struct pt_regs *regs = current_pt_regs(); struct compat_sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ @@ -315,8 +316,9 @@ badframe: return 0; } -asmlinkage int compat_sys_rt_sigreturn(struct pt_regs *regs) +asmlinkage int compat_sys_rt_sigreturn(void) { + struct pt_regs *regs = current_pt_regs(); struct compat_rt_sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index 72981bae10eb..31045f3fed92 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -48,8 +48,7 @@ SYSCALL_DEFINE1(arm64_personality, unsigned int, personality) /* * Wrappers to pass the pt_regs argument. */ -asmlinkage long sys_rt_sigreturn_wrapper(void); -#define sys_rt_sigreturn sys_rt_sigreturn_wrapper +asmlinkage long sys_rt_sigreturn(void); #define sys_personality sys_arm64_personality #undef __SYSCALL diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c index a40b1343b819..1ef103c95410 100644 --- a/arch/arm64/kernel/sys32.c +++ b/arch/arm64/kernel/sys32.c @@ -25,8 +25,8 @@ #include #include -asmlinkage long compat_sys_sigreturn_wrapper(void); -asmlinkage long compat_sys_rt_sigreturn_wrapper(void); +asmlinkage long compat_sys_sigreturn(void); +asmlinkage long compat_sys_rt_sigreturn(void); asmlinkage long compat_sys_statfs64_wrapper(void); asmlinkage long compat_sys_fstatfs64_wrapper(void); asmlinkage long compat_sys_pread64_wrapper(void); From 27d83e68f307ee55b70fdfdc7a9ba3f25f276189 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:42 +0100 Subject: [PATCH 52/90] arm64: introduce syscall_fn_t In preparation for invoking arbitrary syscalls from C code, let's define a type for an arbitrary syscall, matching the parameter passing rules of the AAPCS. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/syscall.h | 6 +++++- arch/arm64/kernel/sys.c | 8 +++++--- arch/arm64/kernel/sys32.c | 8 +++++--- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 709a574468f0..50841cb1bfa9 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -20,7 +20,11 @@ #include #include -extern const void *sys_call_table[]; +typedef long (*syscall_fn_t)(unsigned long, unsigned long, + unsigned long, unsigned long, + unsigned long, unsigned long); + +extern const syscall_fn_t sys_call_table[]; static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index 31045f3fed92..7f0907261269 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -25,7 +25,9 @@ #include #include #include + #include +#include asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -52,13 +54,13 @@ asmlinkage long sys_rt_sigreturn(void); #define sys_personality sys_arm64_personality #undef __SYSCALL -#define __SYSCALL(nr, sym) [nr] = sym, +#define __SYSCALL(nr, sym) [nr] = (syscall_fn_t)sym, /* * The sys_call_table array must be 4K aligned to be accessible from * kernel/entry.S. */ -void * const sys_call_table[__NR_syscalls] __aligned(4096) = { - [0 ... __NR_syscalls - 1] = sys_ni_syscall, +const syscall_fn_t sys_call_table[__NR_syscalls] __aligned(4096) = { + [0 ... __NR_syscalls - 1] = (syscall_fn_t)sys_ni_syscall, #include }; diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c index 1ef103c95410..d993214118aa 100644 --- a/arch/arm64/kernel/sys32.c +++ b/arch/arm64/kernel/sys32.c @@ -25,6 +25,8 @@ #include #include +#include + asmlinkage long compat_sys_sigreturn(void); asmlinkage long compat_sys_rt_sigreturn(void); asmlinkage long compat_sys_statfs64_wrapper(void); @@ -40,13 +42,13 @@ asmlinkage long compat_sys_fallocate_wrapper(void); asmlinkage long compat_sys_mmap2_wrapper(void); #undef __SYSCALL -#define __SYSCALL(nr, sym) [nr] = sym, +#define __SYSCALL(nr, sym) [nr] = (syscall_fn_t)sym, /* * The sys_call_table array must be 4K aligned to be accessible from * kernel/entry.S. */ -void * const compat_sys_call_table[__NR_compat_syscalls] __aligned(4096) = { - [0 ... __NR_compat_syscalls - 1] = sys_ni_syscall, +const syscall_fn_t compat_sys_call_table[__NR_compat_syscalls] __aligned(4096) = { + [0 ... __NR_compat_syscalls - 1] = (syscall_fn_t)sys_ni_syscall, #include }; From 4141c857fd09dbed480f021b3eece4f46c653161 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:43 +0100 Subject: [PATCH 53/90] arm64: convert raw syscall invocation to C As a first step towards invoking syscalls with a pt_regs argument, convert the raw syscall invocation logic to C. We end up with a bit more register shuffling, but the unified invocation logic means we can unify the tracing paths, too. Previously, assembly had to open-code calls to ni_sys() when the system call number was out-of-bounds for the relevant syscall table. This case is now handled by invoke_syscall(), and the assembly no longer need to handle this case explicitly. This allows the tracing paths to be simplified and unified, as we no longer need the __ni_sys_trace path and the __sys_trace_return label. This only converts the invocation of the syscall. The rest of the syscall triage and tracing is left in assembly for now, and will be converted in subsequent patches. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 3 ++- arch/arm64/kernel/entry.S | 36 ++++++++-------------------- arch/arm64/kernel/syscall.c | 47 +++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/traps.c | 16 ------------- 4 files changed, 59 insertions(+), 43 deletions(-) create mode 100644 arch/arm64/kernel/syscall.c diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 0025f8691046..4e24d2244bd1 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,7 +18,8 @@ arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ hyp-stub.o psci.o cpu_ops.o insn.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ - smp.o smp_spin_table.o topology.o smccc-call.o + smp.o smp_spin_table.o topology.o smccc-call.o \ + syscall.o extra-$(CONFIG_EFI) := efi-entry.o diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 201809537343..2045aec79ccb 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -903,7 +903,6 @@ ENDPROC(el0_error) */ ret_fast_syscall: disable_daif - str x0, [sp, #S_X0] // returned x0 #ifndef CONFIG_DEBUG_RSEQ ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing and x2, x1, #_TIF_SYSCALL_WORK @@ -978,15 +977,11 @@ el0_svc_naked: // compat entry point tst x16, #_TIF_SYSCALL_WORK // check for syscall hooks b.ne __sys_trace - cmp wscno, wsc_nr // check upper syscall limit - b.hs ni_sys - mask_nospec64 xscno, xsc_nr, x19 // enforce bounds for syscall number - ldr x16, [stbl, xscno, lsl #3] // address in the syscall table - blr x16 // call sys_* routine - b ret_fast_syscall -ni_sys: mov x0, sp - bl do_ni_syscall + mov w1, wscno + mov w2, wsc_nr + mov x3, stbl + bl invoke_syscall b ret_fast_syscall ENDPROC(el0_svc) @@ -1003,29 +998,18 @@ __sys_trace: bl syscall_trace_enter cmp w0, #NO_SYSCALL // skip the syscall? b.eq __sys_trace_return_skipped - mov wscno, w0 // syscall number (possibly new) - mov x1, sp // pointer to regs - cmp wscno, wsc_nr // check upper syscall limit - b.hs __ni_sys_trace - ldp x0, x1, [sp] // restore the syscall args - ldp x2, x3, [sp, #S_X2] - ldp x4, x5, [sp, #S_X4] - ldp x6, x7, [sp, #S_X6] - ldr x16, [stbl, xscno, lsl #3] // address in the syscall table - blr x16 // call sys_* routine -__sys_trace_return: - str x0, [sp, #S_X0] // save returned x0 + mov x0, sp + mov w1, wscno + mov w2, wsc_nr + mov x3, stbl + bl invoke_syscall + __sys_trace_return_skipped: mov x0, sp bl syscall_trace_exit b ret_to_user -__ni_sys_trace: - mov x0, sp - bl do_ni_syscall - b __sys_trace_return - .popsection // .entry.text #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c new file mode 100644 index 000000000000..93d36f22647e --- /dev/null +++ b/arch/arm64/kernel/syscall.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include + +#include + +long compat_arm_syscall(struct pt_regs *regs); + +asmlinkage long do_ni_syscall(struct pt_regs *regs) +{ +#ifdef CONFIG_COMPAT + long ret; + if (is_compat_task()) { + ret = compat_arm_syscall(regs); + if (ret != -ENOSYS) + return ret; + } +#endif + + return sys_ni_syscall(); +} + +static long __invoke_syscall(struct pt_regs *regs, syscall_fn_t syscall_fn) +{ + return syscall_fn(regs->regs[0], regs->regs[1], regs->regs[2], + regs->regs[3], regs->regs[4], regs->regs[5]); +} + +asmlinkage void invoke_syscall(struct pt_regs *regs, unsigned int scno, + unsigned int sc_nr, + const syscall_fn_t syscall_table[]) +{ + long ret; + + if (scno < sc_nr) { + syscall_fn_t syscall_fn; + syscall_fn = syscall_table[array_index_nospec(scno, sc_nr)]; + ret = __invoke_syscall(regs, syscall_fn); + } else { + ret = do_ni_syscall(regs); + } + + regs->regs[0] = ret; +} diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index c27292703bd1..039e9ff379cc 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -547,22 +547,6 @@ asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) do_undefinstr(regs); } -long compat_arm_syscall(struct pt_regs *regs); - -asmlinkage long do_ni_syscall(struct pt_regs *regs) -{ -#ifdef CONFIG_COMPAT - long ret; - if (is_compat_task()) { - ret = compat_arm_syscall(regs); - if (ret != -ENOSYS) - return ret; - } -#endif - - return sys_ni_syscall(); -} - static const char *esr_class_str[] = { [0 ... ESR_ELx_EC_MAX] = "UNRECOGNIZED EC", [ESR_ELx_EC_UNKNOWN] = "Unknown/Uncategorized", From f37099b6992a0b818c7b51b899e435f4006a9f90 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:44 +0100 Subject: [PATCH 54/90] arm64: convert syscall trace logic to C Currently syscall tracing is a tricky assembly state machine, which can be rather difficult to follow, and even harder to modify. Before we start fiddling with it for pt_regs syscalls, let's convert it to C. This is not intended to have any functional change. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 55 ++--------------------------------- arch/arm64/kernel/syscall.c | 57 +++++++++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 56 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 2045aec79ccb..05b9f03f3e00 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -896,26 +896,6 @@ el0_error_naked: b ret_to_user ENDPROC(el0_error) - -/* - * This is the fast syscall return path. We do as little as possible here, - * and this includes saving x0 back into the kernel stack. - */ -ret_fast_syscall: - disable_daif -#ifndef CONFIG_DEBUG_RSEQ - ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing - and x2, x1, #_TIF_SYSCALL_WORK - cbnz x2, ret_fast_syscall_trace - and x2, x1, #_TIF_WORK_MASK - cbnz x2, work_pending - enable_step_tsk x1, x2 - kernel_exit 0 -#endif -ret_fast_syscall_trace: - enable_daif - b __sys_trace_return_skipped // we already saved x0 - /* * Ok, we need to do extra processing, enter the slow path. */ @@ -971,44 +951,13 @@ alternative_else_nop_endif #endif el0_svc_naked: // compat entry point - stp x0, xscno, [sp, #S_ORIG_X0] // save the original x0 and syscall number - enable_daif - ct_user_exit 1 - - tst x16, #_TIF_SYSCALL_WORK // check for syscall hooks - b.ne __sys_trace mov x0, sp mov w1, wscno mov w2, wsc_nr mov x3, stbl - bl invoke_syscall - b ret_fast_syscall -ENDPROC(el0_svc) - - /* - * This is the really slow path. We're going to be doing context - * switches, and waiting for our parent to respond. - */ -__sys_trace: - cmp wscno, #NO_SYSCALL // user-issued syscall(-1)? - b.ne 1f - mov x0, #-ENOSYS // set default errno if so - str x0, [sp, #S_X0] -1: mov x0, sp - bl syscall_trace_enter - cmp w0, #NO_SYSCALL // skip the syscall? - b.eq __sys_trace_return_skipped - - mov x0, sp - mov w1, wscno - mov w2, wsc_nr - mov x3, stbl - bl invoke_syscall - -__sys_trace_return_skipped: - mov x0, sp - bl syscall_trace_exit + bl el0_svc_common b ret_to_user +ENDPROC(el0_svc) .popsection // .entry.text diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 93d36f22647e..82098e6f6aa3 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -1,11 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 +#include +#include #include #include #include #include +#include #include +#include long compat_arm_syscall(struct pt_regs *regs); @@ -29,9 +33,9 @@ static long __invoke_syscall(struct pt_regs *regs, syscall_fn_t syscall_fn) regs->regs[3], regs->regs[4], regs->regs[5]); } -asmlinkage void invoke_syscall(struct pt_regs *regs, unsigned int scno, - unsigned int sc_nr, - const syscall_fn_t syscall_table[]) +static void invoke_syscall(struct pt_regs *regs, unsigned int scno, + unsigned int sc_nr, + const syscall_fn_t syscall_table[]) { long ret; @@ -45,3 +49,50 @@ asmlinkage void invoke_syscall(struct pt_regs *regs, unsigned int scno, regs->regs[0] = ret; } + +static inline bool has_syscall_work(unsigned long flags) +{ + return unlikely(flags & _TIF_SYSCALL_WORK); +} + +int syscall_trace_enter(struct pt_regs *regs); +void syscall_trace_exit(struct pt_regs *regs); + +asmlinkage void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, + const syscall_fn_t syscall_table[]) +{ + unsigned long flags = current_thread_info()->flags; + + regs->orig_x0 = regs->regs[0]; + regs->syscallno = scno; + + local_daif_restore(DAIF_PROCCTX); + user_exit(); + + if (has_syscall_work(flags)) { + /* set default errno for user-issued syscall(-1) */ + if (scno == NO_SYSCALL) + regs->regs[0] = -ENOSYS; + scno = syscall_trace_enter(regs); + if (scno == NO_SYSCALL) + goto trace_exit; + } + + invoke_syscall(regs, scno, sc_nr, syscall_table); + + /* + * The tracing status may have changed under our feet, so we have to + * check again. However, if we were tracing entry, then we always trace + * exit regardless, as the old entry assembly did. + */ + if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) { + local_daif_mask(); + flags = current_thread_info()->flags; + if (!has_syscall_work(flags)) + return; + local_daif_restore(DAIF_PROCCTX); + } + +trace_exit: + syscall_trace_exit(regs); +} From 3b7142752e4bee153df6db4a76ca104ef0d7c0b4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:45 +0100 Subject: [PATCH 55/90] arm64: convert native/compat syscall entry to C Now that the syscall invocation logic is in C, we can migrate the rest of the syscall entry logic over, so that the entry assembly needn't look at the register values at all. The SVE reset across syscall logic now unconditionally clears TIF_SVE, but sve_user_disable() will only write back to CPACR_EL1 when SVE is actually enabled. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Reviewed-by: Dave Martin Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/syscall.h | 4 +++ arch/arm64/kernel/entry.S | 42 +++----------------------------- arch/arm64/kernel/syscall.c | 37 ++++++++++++++++++++++++++-- 3 files changed, 43 insertions(+), 40 deletions(-) diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 50841cb1bfa9..b83d0e6980a3 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -26,6 +26,10 @@ typedef long (*syscall_fn_t)(unsigned long, unsigned long, extern const syscall_fn_t sys_call_table[]; +#ifdef CONFIG_COMPAT +extern const syscall_fn_t compat_sys_call_table[]; +#endif + static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 05b9f03f3e00..156c4e3fd1a4 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -720,14 +720,9 @@ el0_sync_compat: b.ge el0_dbg b el0_inv el0_svc_compat: - /* - * AArch32 syscall handling - */ - ldr x16, [tsk, #TSK_TI_FLAGS] // load thread flags - adrp stbl, compat_sys_call_table // load compat syscall table pointer - mov wscno, w7 // syscall number in w7 (r7) - mov wsc_nr, #__NR_compat_syscalls - b el0_svc_naked + mov x0, sp + bl el0_svc_compat_handler + b ret_to_user .align 6 el0_irq_compat: @@ -925,37 +920,8 @@ ENDPROC(ret_to_user) */ .align 6 el0_svc: - ldr x16, [tsk, #TSK_TI_FLAGS] // load thread flags - adrp stbl, sys_call_table // load syscall table pointer - mov wscno, w8 // syscall number in w8 - mov wsc_nr, #__NR_syscalls - -#ifdef CONFIG_ARM64_SVE -alternative_if_not ARM64_SVE - b el0_svc_naked -alternative_else_nop_endif - tbz x16, #TIF_SVE, el0_svc_naked // Skip unless TIF_SVE set: - bic x16, x16, #_TIF_SVE // discard SVE state - str x16, [tsk, #TSK_TI_FLAGS] - - /* - * task_fpsimd_load() won't be called to update CPACR_EL1 in - * ret_to_user unless TIF_FOREIGN_FPSTATE is still set, which only - * happens if a context switch or kernel_neon_begin() or context - * modification (sigreturn, ptrace) intervenes. - * So, ensure that CPACR_EL1 is already correct for the fast-path case: - */ - mrs x9, cpacr_el1 - bic x9, x9, #CPACR_EL1_ZEN_EL0EN // disable SVE for el0 - msr cpacr_el1, x9 // synchronised by eret to el0 -#endif - -el0_svc_naked: // compat entry point mov x0, sp - mov w1, wscno - mov w2, wsc_nr - mov x3, stbl - bl el0_svc_common + bl el0_svc_handler b ret_to_user ENDPROC(el0_svc) diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 82098e6f6aa3..7ff2d7b9f517 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -8,8 +8,10 @@ #include #include +#include #include #include +#include long compat_arm_syscall(struct pt_regs *regs); @@ -58,8 +60,8 @@ static inline bool has_syscall_work(unsigned long flags) int syscall_trace_enter(struct pt_regs *regs); void syscall_trace_exit(struct pt_regs *regs); -asmlinkage void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, - const syscall_fn_t syscall_table[]) +static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, + const syscall_fn_t syscall_table[]) { unsigned long flags = current_thread_info()->flags; @@ -96,3 +98,34 @@ asmlinkage void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, trace_exit: syscall_trace_exit(regs); } + +static inline void sve_user_discard(void) +{ + if (!system_supports_sve()) + return; + + clear_thread_flag(TIF_SVE); + + /* + * task_fpsimd_load() won't be called to update CPACR_EL1 in + * ret_to_user unless TIF_FOREIGN_FPSTATE is still set, which only + * happens if a context switch or kernel_neon_begin() or context + * modification (sigreturn, ptrace) intervenes. + * So, ensure that CPACR_EL1 is already correct for the fast-path case. + */ + sve_user_disable(); +} + +asmlinkage void el0_svc_handler(struct pt_regs *regs) +{ + sve_user_discard(); + el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table); +} + +#ifdef CONFIG_COMPAT +asmlinkage void el0_svc_compat_handler(struct pt_regs *regs) +{ + el0_svc_common(regs, regs->regs[7], __NR_compat_syscalls, + compat_sys_call_table); +} +#endif From d9be03256d745360a859b11091e5a43f7828382b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:46 +0100 Subject: [PATCH 56/90] arm64: don't restore GPRs when context tracking Now that syscalls are invoked with pt_regs, we no longer need to ensure that the argument regsiters are live in the entry assembly, and it's fine to not restore them after context_tracking_user_exit() has corrupted them. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 156c4e3fd1a4..c41b84d06644 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -41,19 +41,9 @@ * Context tracking subsystem. Used to instrument transitions * between user and kernel mode. */ - .macro ct_user_exit, syscall = 0 + .macro ct_user_exit #ifdef CONFIG_CONTEXT_TRACKING bl context_tracking_user_exit - .if \syscall == 1 - /* - * Save/restore needed during syscalls. Restore syscall arguments from - * the values already saved on stack during kernel_entry. - */ - ldp x0, x1, [sp] - ldp x2, x3, [sp, #S_X2] - ldp x4, x5, [sp, #S_X4] - ldp x6, x7, [sp, #S_X6] - .endif #endif .endm From 99ed3ed08d629eb0db6866d266e2bbe35efa5f9d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:47 +0100 Subject: [PATCH 57/90] arm64: don't reload GPRs after apply_ssbd Now that all of the syscall logic works on the saved pt_regs, apply_ssbd can safely corrupt x0-x3 in the entry paths, and we no longer need to restore them. So let's remove the logic doing so. With that logic gone, we can fold the branch target into the macro, so that callers need not deal with this. GAS provides \@, which provides a unique value per macro invocation, which we can use to create a unique label. Signed-off-by: Mark Rutland Acked-by: Marc Zyngier Acked-by: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index c41b84d06644..22b240da949b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -130,20 +130,21 @@ alternative_else_nop_endif // This macro corrupts x0-x3. It is the caller's duty // to save/restore them if required. - .macro apply_ssbd, state, targ, tmp1, tmp2 + .macro apply_ssbd, state, tmp1, tmp2 #ifdef CONFIG_ARM64_SSBD alternative_cb arm64_enable_wa2_handling - b \targ + b .L__asm_ssbd_skip\@ alternative_cb_end ldr_this_cpu \tmp2, arm64_ssbd_callback_required, \tmp1 - cbz \tmp2, \targ + cbz \tmp2, .L__asm_ssbd_skip\@ ldr \tmp2, [tsk, #TSK_TI_FLAGS] - tbnz \tmp2, #TIF_SSBD, \targ + tbnz \tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 mov w1, #\state alternative_cb arm64_update_smccc_conduit nop // Patched to SMC/HVC #0 alternative_cb_end +.L__asm_ssbd_skip\@: #endif .endm @@ -173,13 +174,7 @@ alternative_cb_end ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug disable_step_tsk x19, x20 // exceptions when scheduling. - apply_ssbd 1, 1f, x22, x23 - -#ifdef CONFIG_ARM64_SSBD - ldp x0, x1, [sp, #16 * 0] - ldp x2, x3, [sp, #16 * 1] -#endif -1: + apply_ssbd 1, x22, x23 mov x29, xzr // fp pointed to user-space .else @@ -321,8 +316,7 @@ alternative_if ARM64_WORKAROUND_845719 alternative_else_nop_endif #endif 3: - apply_ssbd 0, 5f, x0, x1 -5: + apply_ssbd 0, x0, x1 .endif msr elr_el1, x21 // set up the return data From baaa7237fecc5ada99e518f38541126977ca7ff4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:48 +0100 Subject: [PATCH 58/90] arm64: zero GPRs upon entry from EL0 We can zero GPRs x0 - x29 upon entry from EL0 to make it harder for userspace to control values consumed by speculative gadgets. We don't blat x30, since this is stashed much later, and we'll blat it before invoking C code. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 22b240da949b..d1440f84668b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -53,6 +53,12 @@ #endif .endm + .macro clear_gp_regs + .irp n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29 + mov x\n, xzr + .endr + .endm + /* * Bad Abort numbers *----------------- @@ -169,6 +175,7 @@ alternative_cb_end stp x28, x29, [sp, #16 * 14] .if \el == 0 + clear_gp_regs mrs x21, sp_el0 ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear, ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug @@ -176,7 +183,6 @@ alternative_cb_end apply_ssbd 1, x22, x23 - mov x29, xzr // fp pointed to user-space .else add x21, sp, #S_FRAME_SIZE get_thread_info tsk From 80d63bc39f9ace9c1d2bef7f921d2f3ef2037d4b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:49 +0100 Subject: [PATCH 59/90] arm64: drop alignment from syscall tables Our syscall tables are aligned to 4096 bytes, which allowed their addresses to be generated with a single adrp in entry.S. This has the unfortunate property of wasting space in .rodata for the necessary padding. Now that the address is generated by C code, we can rely on the compiler to do the right thing, and drop the alignemnt. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/sys.c | 6 +----- arch/arm64/kernel/sys32.c | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index 7f0907261269..df8180175515 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -56,11 +56,7 @@ asmlinkage long sys_rt_sigreturn(void); #undef __SYSCALL #define __SYSCALL(nr, sym) [nr] = (syscall_fn_t)sym, -/* - * The sys_call_table array must be 4K aligned to be accessible from - * kernel/entry.S. - */ -const syscall_fn_t sys_call_table[__NR_syscalls] __aligned(4096) = { +const syscall_fn_t sys_call_table[__NR_syscalls] = { [0 ... __NR_syscalls - 1] = (syscall_fn_t)sys_ni_syscall, #include }; diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c index d993214118aa..4ddeaae90128 100644 --- a/arch/arm64/kernel/sys32.c +++ b/arch/arm64/kernel/sys32.c @@ -44,11 +44,7 @@ asmlinkage long compat_sys_mmap2_wrapper(void); #undef __SYSCALL #define __SYSCALL(nr, sym) [nr] = (syscall_fn_t)sym, -/* - * The sys_call_table array must be 4K aligned to be accessible from - * kernel/entry.S. - */ -const syscall_fn_t compat_sys_call_table[__NR_compat_syscalls] __aligned(4096) = { +const syscall_fn_t compat_sys_call_table[__NR_compat_syscalls] = { [0 ... __NR_compat_syscalls - 1] = (syscall_fn_t)sys_ni_syscall, #include }; From bf1c77b4644f46d2986b7ca5e43e012f0fc8984b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:50 +0100 Subject: [PATCH 60/90] kernel: add ksys_personality() Using this helper allows us to avoid the in-kernel call to the sys_personality() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_personality(). Since ksys_personality is trivial, it is implemented directly in , as we do for ksys_close() and friends. This helper is necessary to enable conversion of arm64's syscall handling to use pt_regs wrappers. Signed-off-by: Mark Rutland Reviewed-by: Dominik Brodowski Cc: Al Viro Cc: Christoph Hellwig Cc: Dave Martin Signed-off-by: Will Deacon --- include/linux/syscalls.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a368a68cb667..abfe12d8a9c5 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -80,6 +80,7 @@ union bpf_attr; #include #include #include +#include #include #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER @@ -1281,4 +1282,14 @@ static inline long ksys_truncate(const char __user *pathname, loff_t length) return do_sys_truncate(pathname, length); } +static inline unsigned int ksys_personality(unsigned int personality) +{ + unsigned int old = current->personality; + + if (personality != 0xffffffff) + set_personality(personality); + + return old; +} + #endif From 9b54bf9d6a5b30e2cc22b18793e9a4158c5b4882 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:51 +0100 Subject: [PATCH 61/90] kernel: add kcompat_sys_{f,}statfs64() Using this helper allows us to avoid the in-kernel calls to the compat_sys_{f,}statfs64() sycalls, as are necessary for parameter mangling in arm64's compat handling. Following the example of ksys_* functions, kcompat_sys_* functions are intended to be a drop-in replacement for their compat_sys_* counterparts, with the same calling convention. This is necessary to enable conversion of arm64's syscall handling to use pt_regs wrappers. Signed-off-by: Mark Rutland Reviewed-by: Dominik Brodowski Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Will Deacon --- fs/statfs.c | 14 ++++++++++++-- include/linux/compat.h | 11 +++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/fs/statfs.c b/fs/statfs.c index 5b2a24f0f263..f0216629621d 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -335,7 +335,7 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat return 0; } -COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf) +int kcompat_sys_statfs64(const char __user * pathname, compat_size_t sz, struct compat_statfs64 __user * buf) { struct kstatfs tmp; int error; @@ -349,7 +349,12 @@ COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, s return error; } -COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf) +COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf) +{ + return kcompat_sys_statfs64(pathname, sz, buf); +} + +int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user * buf) { struct kstatfs tmp; int error; @@ -363,6 +368,11 @@ COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct co return error; } +COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf) +{ + return kcompat_sys_fstatfs64(fd, sz, buf); +} + /* * This is a copy of sys_ustat, just dealing with a structure layout. * Given how simple this syscall is that apporach is more maintainable diff --git a/include/linux/compat.h b/include/linux/compat.h index c68acc47da57..43f4ed44c5d5 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -1028,6 +1028,17 @@ static inline struct compat_timeval ns_to_compat_timeval(s64 nsec) return ctv; } +/* + * Kernel code should not call compat syscalls (i.e., compat_sys_xyzyyz()) + * directly. Instead, use one of the functions which work equivalently, such + * as the kcompat_sys_xyzyyz() functions prototyped below. + */ + +int kcompat_sys_statfs64(const char __user * pathname, compat_size_t sz, + struct compat_statfs64 __user * buf); +int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz, + struct compat_statfs64 __user * buf); + #else /* !CONFIG_COMPAT */ #define is_compat_task() (0) From 3f7deccb037cc2bd2f848576439c033f81daeb6a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:52 +0100 Subject: [PATCH 62/90] arm64: remove in-kernel call to sys_personality() With pt_regs syscall wrappers, the calling convention for sys_personality() will change. Use ksys_personality(), which is functionally equivalent. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index df8180175515..f4e02f6aceb8 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -44,7 +44,7 @@ SYSCALL_DEFINE1(arm64_personality, unsigned int, personality) if (personality(personality) == PER_LINUX32 && !system_supports_32bit_el0()) return -EINVAL; - return sys_personality(personality); + return ksys_personality(personality); } /* From bf4ce5cc23c9f8357bfd9c0328bf59685664cbc7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:53 +0100 Subject: [PATCH 63/90] arm64: use {COMPAT,}SYSCALL_DEFINE0 for sigreturn We don't currently annotate our various sigreturn functions as syscalls, as we need to do to use pt_regs syscall wrappers. Let's mark them as real syscalls. For compat_sys_sigreturn and compat_sys_rt_sigreturn, this changes the return type from int to long, matching the prototypes in sys32.c. Signed-off-by: Mark Rutland Reviewed-by: Dominik Brodowski Acked-by: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 2 +- arch/arm64/kernel/signal32.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index b6f3c3d76901..5dcc942906db 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -539,7 +539,7 @@ static int restore_sigframe(struct pt_regs *regs, return err; } -asmlinkage long sys_rt_sigreturn(void) +SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 9aac1253e0a6..24b09003f821 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -285,7 +285,7 @@ static int compat_restore_sigframe(struct pt_regs *regs, return err; } -asmlinkage int compat_sys_sigreturn(void) +COMPAT_SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = current_pt_regs(); struct compat_sigframe __user *frame; @@ -316,7 +316,7 @@ badframe: return 0; } -asmlinkage int compat_sys_rt_sigreturn(void) +COMPAT_SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = current_pt_regs(); struct compat_rt_sigframe __user *frame; From d3516c9073b4b81410195489dc169891cd64e4cd Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:54 +0100 Subject: [PATCH 64/90] arm64: use SYSCALL_DEFINE6() for mmap We don't currently annotate our mmap implementation as a syscall, as we need to do to use pt_regs syscall wrappers. Let's mark it as a real syscall. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Dominik Brodowski Acked-by: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/sys.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index f4e02f6aceb8..f249d9735f4c 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -29,9 +29,9 @@ #include #include -asmlinkage long sys_mmap(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, off_t off) +SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, off_t, off) { if (offset_in_page(off) != 0) return -EINVAL; From 55f849265af886aaf404cbf8a659b2c8d411fc3a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:55 +0100 Subject: [PATCH 65/90] arm64: convert compat wrappers to C In preparation for converting to pt_regs syscall wrappers, convert our existing compat wrappers to C. This will allow the pt_regs wrappers to be automatically generated, and will allow for the compat register manipulation to be folded in with the pt_regs accesses. To avoid confusion with the upcoming pt_regs wrappers and existing compat wrappers provided by core code, the C wrappers are renamed to compat_sys_aarch32_. With the assembly wrappers gone, we can get rid of entry32.S and the associated boilerplate. Note that these must call the ksys_* syscall entry points, as the usual sys_* entry points will be modified to take a single pt_regs pointer argument. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/unistd32.h | 22 +++--- arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/entry32.S | 111 ----------------------------- arch/arm64/kernel/sys32.c | 114 +++++++++++++++++++++++++++--- 4 files changed, 115 insertions(+), 134 deletions(-) delete mode 100644 arch/arm64/kernel/entry32.S diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 023d64af8339..2cd6dcf8d246 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -382,9 +382,9 @@ __SYSCALL(__NR_rt_sigqueueinfo, compat_sys_rt_sigqueueinfo) #define __NR_rt_sigsuspend 179 __SYSCALL(__NR_rt_sigsuspend, compat_sys_rt_sigsuspend) #define __NR_pread64 180 -__SYSCALL(__NR_pread64, compat_sys_pread64_wrapper) +__SYSCALL(__NR_pread64, compat_sys_aarch32_pread64) #define __NR_pwrite64 181 -__SYSCALL(__NR_pwrite64, compat_sys_pwrite64_wrapper) +__SYSCALL(__NR_pwrite64, compat_sys_aarch32_pwrite64) #define __NR_chown 182 __SYSCALL(__NR_chown, sys_chown16) #define __NR_getcwd 183 @@ -406,11 +406,11 @@ __SYSCALL(__NR_vfork, sys_vfork) #define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ __SYSCALL(__NR_ugetrlimit, compat_sys_getrlimit) /* SuS compliant getrlimit */ #define __NR_mmap2 192 -__SYSCALL(__NR_mmap2, compat_sys_mmap2_wrapper) +__SYSCALL(__NR_mmap2, compat_sys_aarch32_mmap2) #define __NR_truncate64 193 -__SYSCALL(__NR_truncate64, compat_sys_truncate64_wrapper) +__SYSCALL(__NR_truncate64, compat_sys_aarch32_truncate64) #define __NR_ftruncate64 194 -__SYSCALL(__NR_ftruncate64, compat_sys_ftruncate64_wrapper) +__SYSCALL(__NR_ftruncate64, compat_sys_aarch32_ftruncate64) #define __NR_stat64 195 __SYSCALL(__NR_stat64, sys_stat64) #define __NR_lstat64 196 @@ -472,7 +472,7 @@ __SYSCALL(223, sys_ni_syscall) #define __NR_gettid 224 __SYSCALL(__NR_gettid, sys_gettid) #define __NR_readahead 225 -__SYSCALL(__NR_readahead, compat_sys_readahead_wrapper) +__SYSCALL(__NR_readahead, compat_sys_aarch32_readahead) #define __NR_setxattr 226 __SYSCALL(__NR_setxattr, sys_setxattr) #define __NR_lsetxattr 227 @@ -554,15 +554,15 @@ __SYSCALL(__NR_clock_getres, compat_sys_clock_getres) #define __NR_clock_nanosleep 265 __SYSCALL(__NR_clock_nanosleep, compat_sys_clock_nanosleep) #define __NR_statfs64 266 -__SYSCALL(__NR_statfs64, compat_sys_statfs64_wrapper) +__SYSCALL(__NR_statfs64, compat_sys_aarch32_statfs64) #define __NR_fstatfs64 267 -__SYSCALL(__NR_fstatfs64, compat_sys_fstatfs64_wrapper) +__SYSCALL(__NR_fstatfs64, compat_sys_aarch32_fstatfs64) #define __NR_tgkill 268 __SYSCALL(__NR_tgkill, sys_tgkill) #define __NR_utimes 269 __SYSCALL(__NR_utimes, compat_sys_utimes) #define __NR_arm_fadvise64_64 270 -__SYSCALL(__NR_arm_fadvise64_64, compat_sys_fadvise64_64_wrapper) +__SYSCALL(__NR_arm_fadvise64_64, compat_sys_aarch32_fadvise64_64) #define __NR_pciconfig_iobase 271 __SYSCALL(__NR_pciconfig_iobase, sys_pciconfig_iobase) #define __NR_pciconfig_read 272 @@ -704,7 +704,7 @@ __SYSCALL(__NR_get_robust_list, compat_sys_get_robust_list) #define __NR_splice 340 __SYSCALL(__NR_splice, sys_splice) #define __NR_sync_file_range2 341 -__SYSCALL(__NR_sync_file_range2, compat_sys_sync_file_range2_wrapper) +__SYSCALL(__NR_sync_file_range2, compat_sys_aarch32_sync_file_range2) #define __NR_tee 342 __SYSCALL(__NR_tee, sys_tee) #define __NR_vmsplice 343 @@ -726,7 +726,7 @@ __SYSCALL(__NR_timerfd_create, sys_timerfd_create) #define __NR_eventfd 351 __SYSCALL(__NR_eventfd, sys_eventfd) #define __NR_fallocate 352 -__SYSCALL(__NR_fallocate, compat_sys_fallocate_wrapper) +__SYSCALL(__NR_fallocate, compat_sys_aarch32_fallocate) #define __NR_timerfd_settime 353 __SYSCALL(__NR_timerfd_settime, compat_sys_timerfd_settime) #define __NR_timerfd_gettime 354 diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 4e24d2244bd1..95ac7374d723 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -28,7 +28,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE $(call if_changed,objcopy) arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ - sys_compat.o entry32.o + sys_compat.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o diff --git a/arch/arm64/kernel/entry32.S b/arch/arm64/kernel/entry32.S deleted file mode 100644 index f9461696dde4..000000000000 --- a/arch/arm64/kernel/entry32.S +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Compat system call wrappers - * - * Copyright (C) 2012 ARM Ltd. - * Authors: Will Deacon - * Catalin Marinas - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include -#include -#include -#include - -/* - * System call wrappers for the AArch32 compatibility layer. - */ - -ENTRY(compat_sys_statfs64_wrapper) - mov w3, #84 - cmp w1, #88 - csel w1, w3, w1, eq - b compat_sys_statfs64 -ENDPROC(compat_sys_statfs64_wrapper) - -ENTRY(compat_sys_fstatfs64_wrapper) - mov w3, #84 - cmp w1, #88 - csel w1, w3, w1, eq - b compat_sys_fstatfs64 -ENDPROC(compat_sys_fstatfs64_wrapper) - -/* - * Note: off_4k (w5) is always in units of 4K. If we can't do the - * requested offset because it is not page-aligned, we return -EINVAL. - */ -ENTRY(compat_sys_mmap2_wrapper) -#if PAGE_SHIFT > 12 - tst w5, #~PAGE_MASK >> 12 - b.ne 1f - lsr w5, w5, #PAGE_SHIFT - 12 -#endif - b sys_mmap_pgoff -1: mov x0, #-EINVAL - ret -ENDPROC(compat_sys_mmap2_wrapper) - -/* - * Wrappers for AArch32 syscalls that either take 64-bit parameters - * in registers or that take 32-bit parameters which require sign - * extension. - */ -ENTRY(compat_sys_pread64_wrapper) - regs_to_64 x3, x4, x5 - b sys_pread64 -ENDPROC(compat_sys_pread64_wrapper) - -ENTRY(compat_sys_pwrite64_wrapper) - regs_to_64 x3, x4, x5 - b sys_pwrite64 -ENDPROC(compat_sys_pwrite64_wrapper) - -ENTRY(compat_sys_truncate64_wrapper) - regs_to_64 x1, x2, x3 - b sys_truncate -ENDPROC(compat_sys_truncate64_wrapper) - -ENTRY(compat_sys_ftruncate64_wrapper) - regs_to_64 x1, x2, x3 - b sys_ftruncate -ENDPROC(compat_sys_ftruncate64_wrapper) - -ENTRY(compat_sys_readahead_wrapper) - regs_to_64 x1, x2, x3 - mov w2, w4 - b sys_readahead -ENDPROC(compat_sys_readahead_wrapper) - -ENTRY(compat_sys_fadvise64_64_wrapper) - mov w6, w1 - regs_to_64 x1, x2, x3 - regs_to_64 x2, x4, x5 - mov w3, w6 - b sys_fadvise64_64 -ENDPROC(compat_sys_fadvise64_64_wrapper) - -ENTRY(compat_sys_sync_file_range2_wrapper) - regs_to_64 x2, x2, x3 - regs_to_64 x3, x4, x5 - b sys_sync_file_range2 -ENDPROC(compat_sys_sync_file_range2_wrapper) - -ENTRY(compat_sys_fallocate_wrapper) - regs_to_64 x2, x2, x3 - regs_to_64 x3, x4, x5 - b sys_fallocate -ENDPROC(compat_sys_fallocate_wrapper) diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c index 4ddeaae90128..a205d4ff4776 100644 --- a/arch/arm64/kernel/sys32.c +++ b/arch/arm64/kernel/sys32.c @@ -22,6 +22,7 @@ */ #define __COMPAT_SYSCALL_NR +#include #include #include @@ -29,17 +30,108 @@ asmlinkage long compat_sys_sigreturn(void); asmlinkage long compat_sys_rt_sigreturn(void); -asmlinkage long compat_sys_statfs64_wrapper(void); -asmlinkage long compat_sys_fstatfs64_wrapper(void); -asmlinkage long compat_sys_pread64_wrapper(void); -asmlinkage long compat_sys_pwrite64_wrapper(void); -asmlinkage long compat_sys_truncate64_wrapper(void); -asmlinkage long compat_sys_ftruncate64_wrapper(void); -asmlinkage long compat_sys_readahead_wrapper(void); -asmlinkage long compat_sys_fadvise64_64_wrapper(void); -asmlinkage long compat_sys_sync_file_range2_wrapper(void); -asmlinkage long compat_sys_fallocate_wrapper(void); -asmlinkage long compat_sys_mmap2_wrapper(void); + +COMPAT_SYSCALL_DEFINE3(aarch32_statfs64, const char __user *, pathname, + compat_size_t, sz, struct compat_statfs64 __user *, buf) +{ + /* + * 32-bit ARM applies an OABI compatibility fixup to statfs64 and + * fstatfs64 regardless of whether OABI is in use, and therefore + * arbitrary binaries may rely upon it, so we must do the same. + * For more details, see commit: + * + * 713c481519f19df9 ("[ARM] 3108/2: old ABI compat: statfs64 and + * fstatfs64") + */ + if (sz == 88) + sz = 84; + + return kcompat_sys_statfs64(pathname, sz, buf); +} + +COMPAT_SYSCALL_DEFINE3(aarch32_fstatfs64, unsigned int, fd, compat_size_t, sz, + struct compat_statfs64 __user *, buf) +{ + /* see aarch32_statfs64 */ + if (sz == 88) + sz = 84; + + return kcompat_sys_fstatfs64(fd, sz, buf); +} + +/* + * Note: off_4k is always in units of 4K. If we can't do the + * requested offset because it is not page-aligned, we return -EINVAL. + */ +COMPAT_SYSCALL_DEFINE6(aarch32_mmap2, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, off_4k) +{ + if (off_4k & (~PAGE_MASK >> 12)) + return -EINVAL; + + off_4k >>= (PAGE_SHIFT - 12); + + return ksys_mmap_pgoff(addr, len, prot, flags, fd, off_4k); +} + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define arg_u32p(name) u32, name##_hi, u32, name##_lo +#else +#define arg_u32p(name) u32, name##_lo, u32, name##_hi +#endif + +#define arg_u64(name) (((u64)name##_hi << 32) | name##_lo) + +COMPAT_SYSCALL_DEFINE6(aarch32_pread64, unsigned int, fd, char __user *, buf, + size_t, count, u32, __pad, arg_u32p(pos)) +{ + return ksys_pread64(fd, buf, count, arg_u64(pos)); +} + +COMPAT_SYSCALL_DEFINE6(aarch32_pwrite64, unsigned int, fd, + const char __user *, buf, size_t, count, u32, __pad, + arg_u32p(pos)) +{ + return ksys_pwrite64(fd, buf, count, arg_u64(pos)); +} + +COMPAT_SYSCALL_DEFINE4(aarch32_truncate64, const char __user *, pathname, + u32, __pad, arg_u32p(length)) +{ + return ksys_truncate(pathname, arg_u64(length)); +} + +COMPAT_SYSCALL_DEFINE4(aarch32_ftruncate64, unsigned int, fd, u32, __pad, + arg_u32p(length)) +{ + return ksys_ftruncate(fd, arg_u64(length)); +} + +COMPAT_SYSCALL_DEFINE5(aarch32_readahead, int, fd, u32, __pad, + arg_u32p(offset), size_t, count) +{ + return ksys_readahead(fd, arg_u64(offset), count); +} + +COMPAT_SYSCALL_DEFINE6(aarch32_fadvise64_64, int, fd, int, advice, + arg_u32p(offset), arg_u32p(len)) +{ + return ksys_fadvise64_64(fd, arg_u64(offset), arg_u64(len), advice); +} + +COMPAT_SYSCALL_DEFINE6(aarch32_sync_file_range2, int, fd, unsigned int, flags, + arg_u32p(offset), arg_u32p(nbytes)) +{ + return ksys_sync_file_range(fd, arg_u64(offset), arg_u64(nbytes), + flags); +} + +COMPAT_SYSCALL_DEFINE6(aarch32_fallocate, int, fd, int, mode, + arg_u32p(offset), arg_u32p(len)) +{ + return ksys_fallocate(fd, mode, arg_u64(offset), arg_u64(len)); +} #undef __SYSCALL #define __SYSCALL(nr, sym) [nr] = (syscall_fn_t)sym, From 4378a7d4be30ec6994702b19936f7d1465193541 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 14:56:56 +0100 Subject: [PATCH 66/90] arm64: implement syscall wrappers To minimize the risk of userspace-controlled values being used under speculation, this patch adds pt_regs based syscall wrappers for arm64, which pass the minimum set of required userspace values to syscall implementations. For each syscall, a wrapper which takes a pt_regs argument is automatically generated, and this extracts the arguments before calling the "real" syscall implementation. Each syscall has three functions generated: * __do_sys_ is the "real" syscall implementation, with the expected prototype. * __se_sys_ is the sign-extension/narrowing wrapper, inherited from common code. This takes a series of long parameters, casting each to the requisite types required by the "real" syscall implementation in __do_sys_. This wrapper *may* not be necessary on arm64 given the AAPCS rules on unused register bits, but it seemed safer to keep the wrapper for now. * __arm64__sys_ takes a struct pt_regs pointer, and extracts *only* the relevant register values, passing these on to the __se_sys_ wrapper. The syscall invocation code is updated to handle the calling convention required by __arm64__sys_, and passes a single struct pt_regs pointer. The compiler can fold the syscall implementation and its wrappers, such that the overhead of this approach is minimized. Note that we play games with sys_ni_syscall(). It can't be defined with SYSCALL_DEFINE0() because we must avoid the possibility of error injection. Additionally, there are a couple of locations where we need to call it from C code, and we don't (currently) have a ksys_ni_syscall(). While it has no wrapper, passing in a redundant pt_regs pointer is benign per the AAPCS. When ARCH_HAS_SYSCALL_WRAPPER is selected, no prototype is defines for sys_ni_syscall(). Since we need to treat it differently for in-kernel calls and the syscall tables, the prototype is defined as-required. The wrappers are largely the same as their x86 counterparts, but simplified as we don't have a variety of compat calling conventions that require separate stubs. Unlike x86, we have some zero-argument compat syscalls, and must define COMPAT_SYSCALL_DEFINE0() to ensure that these are also given an __arm64_compat_sys_ prefix. Signed-off-by: Mark Rutland Reviewed-by: Dominik Brodowski Reviewed-by: Catalin Marinas Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/syscall.h | 4 +- arch/arm64/include/asm/syscall_wrapper.h | 80 ++++++++++++++++++++++++ arch/arm64/kernel/sys.c | 10 ++- arch/arm64/kernel/sys32.c | 9 ++- arch/arm64/kernel/syscall.c | 5 +- 6 files changed, 101 insertions(+), 8 deletions(-) create mode 100644 arch/arm64/include/asm/syscall_wrapper.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f4157d4a0289..e2823362c324 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -24,6 +24,7 @@ config ARM64 select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX + select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_INLINE_READ_LOCK if !PREEMPT diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index b83d0e6980a3..ad8be16a39c9 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -20,9 +20,7 @@ #include #include -typedef long (*syscall_fn_t)(unsigned long, unsigned long, - unsigned long, unsigned long, - unsigned long, unsigned long); +typedef long (*syscall_fn_t)(struct pt_regs *regs); extern const syscall_fn_t sys_call_table[]; diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h new file mode 100644 index 000000000000..a4477e515b79 --- /dev/null +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * syscall_wrapper.h - arm64 specific wrappers to syscall definitions + * + * Based on arch/x86/include/asm_syscall_wrapper.h + */ + +#ifndef __ASM_SYSCALL_WRAPPER_H +#define __ASM_SYSCALL_WRAPPER_H + +#define SC_ARM64_REGS_TO_ARGS(x, ...) \ + __MAP(x,__SC_ARGS \ + ,,regs->regs[0],,regs->regs[1],,regs->regs[2] \ + ,,regs->regs[3],,regs->regs[4],,regs->regs[5]) + +#ifdef CONFIG_COMPAT + +#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long __arm64_compat_sys##name(const struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(__arm64_compat_sys##name, ERRNO); \ + static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + asmlinkage long __arm64_compat_sys##name(const struct pt_regs *regs) \ + { \ + return __se_compat_sys##name(SC_ARM64_REGS_TO_ARGS(x,__VA_ARGS__)); \ + } \ + static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + { \ + return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \ + } \ + static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) + +#define COMPAT_SYSCALL_DEFINE0(sname) \ + asmlinkage long __arm64_compat_sys_##sname(void); \ + ALLOW_ERROR_INJECTION(__arm64_compat_sys_##sname, ERRNO); \ + asmlinkage long __arm64_compat_sys_##sname(void) + +#define COND_SYSCALL_COMPAT(name) \ + cond_syscall(__arm64_compat_sys_##name); + +#define COMPAT_SYS_NI(name) \ + SYSCALL_ALIAS(__arm64_compat_sys_##name, sys_ni_posix_timers); + +#endif /* CONFIG_COMPAT */ + +#define __SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long __arm64_sys##name(const struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(__arm64_sys##name, ERRNO); \ + static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + asmlinkage long __arm64_sys##name(const struct pt_regs *regs) \ + { \ + return __se_sys##name(SC_ARM64_REGS_TO_ARGS(x,__VA_ARGS__)); \ + } \ + static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + { \ + long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ + return ret; \ + } \ + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) + +#ifndef SYSCALL_DEFINE0 +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __arm64_sys_##sname(void); \ + ALLOW_ERROR_INJECTION(__arm64_sys_##sname, ERRNO); \ + asmlinkage long __arm64_sys_##sname(void) +#endif + +#ifndef COND_SYSCALL +#define COND_SYSCALL(name) cond_syscall(__arm64_sys_##name) +#endif + +#ifndef SYS_NI +#define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers); +#endif + +#endif /* __ASM_SYSCALL_WRAPPER_H */ diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index f249d9735f4c..b44065fb1616 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -50,11 +50,17 @@ SYSCALL_DEFINE1(arm64_personality, unsigned int, personality) /* * Wrappers to pass the pt_regs argument. */ -asmlinkage long sys_rt_sigreturn(void); #define sys_personality sys_arm64_personality +asmlinkage long sys_ni_syscall(const struct pt_regs *); +#define __arm64_sys_ni_syscall sys_ni_syscall + #undef __SYSCALL -#define __SYSCALL(nr, sym) [nr] = (syscall_fn_t)sym, +#define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *); +#include + +#undef __SYSCALL +#define __SYSCALL(nr, sym) [nr] = (syscall_fn_t)__arm64_##sym, const syscall_fn_t sys_call_table[__NR_syscalls] = { [0 ... __NR_syscalls - 1] = (syscall_fn_t)sys_ni_syscall, diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c index a205d4ff4776..0f8bcb7de700 100644 --- a/arch/arm64/kernel/sys32.c +++ b/arch/arm64/kernel/sys32.c @@ -133,8 +133,15 @@ COMPAT_SYSCALL_DEFINE6(aarch32_fallocate, int, fd, int, mode, return ksys_fallocate(fd, mode, arg_u64(offset), arg_u64(len)); } +asmlinkage long sys_ni_syscall(const struct pt_regs *); +#define __arm64_sys_ni_syscall sys_ni_syscall + #undef __SYSCALL -#define __SYSCALL(nr, sym) [nr] = (syscall_fn_t)sym, +#define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *); +#include + +#undef __SYSCALL +#define __SYSCALL(nr, sym) [nr] = (syscall_fn_t)__arm64_##sym, const syscall_fn_t compat_sys_call_table[__NR_compat_syscalls] = { [0 ... __NR_compat_syscalls - 1] = (syscall_fn_t)sys_ni_syscall, diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 7ff2d7b9f517..be00c85794db 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -15,6 +15,8 @@ long compat_arm_syscall(struct pt_regs *regs); +long sys_ni_syscall(void); + asmlinkage long do_ni_syscall(struct pt_regs *regs) { #ifdef CONFIG_COMPAT @@ -31,8 +33,7 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs) static long __invoke_syscall(struct pt_regs *regs, syscall_fn_t syscall_fn) { - return syscall_fn(regs->regs[0], regs->regs[1], regs->regs[2], - regs->regs[3], regs->regs[4], regs->regs[5]); + return syscall_fn(regs); } static void invoke_syscall(struct pt_regs *regs, unsigned int scno, From 11527b3e0b577513ea318f01e392ccf7feb9d2a4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 12 Jul 2018 15:14:23 +0100 Subject: [PATCH 67/90] arm64: Drop asmlinkage qualifier from syscall_trace_{enter,exit} syscall_trace_{enter,exit} are only called from C code, so drop the asmlinkage qualifier from their definitions. Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 489db3624606..379b2b714193 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1631,7 +1631,7 @@ static void tracehook_report_syscall(struct pt_regs *regs, regs->regs[regno] = saved_reg; } -asmlinkage int syscall_trace_enter(struct pt_regs *regs) +int syscall_trace_enter(struct pt_regs *regs) { if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); @@ -1649,7 +1649,7 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) return regs->syscallno; } -asmlinkage void syscall_trace_exit(struct pt_regs *regs) +void syscall_trace_exit(struct pt_regs *regs) { audit_syscall_exit(regs); From 14d6e289a89780377f8bb09de8926d3c62d763cd Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 10 Jul 2018 19:01:22 +0100 Subject: [PATCH 68/90] arm64: fix possible spectre-v1 write in ptrace_hbp_set_event() It's possible for userspace to control idx. Sanitize idx when using it as an array index, to inhibit the potential spectre-v1 write gadget. Found by smatch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 379b2b714193..299758f9b9c4 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -277,19 +277,22 @@ static int ptrace_hbp_set_event(unsigned int note_type, switch (note_type) { case NT_ARM_HW_BREAK: - if (idx < ARM_MAX_BRP) { - tsk->thread.debug.hbp_break[idx] = bp; - err = 0; - } + if (idx >= ARM_MAX_BRP) + goto out; + idx = array_index_nospec(idx, ARM_MAX_BRP); + tsk->thread.debug.hbp_break[idx] = bp; + err = 0; break; case NT_ARM_HW_WATCH: - if (idx < ARM_MAX_WRP) { - tsk->thread.debug.hbp_watch[idx] = bp; - err = 0; - } + if (idx >= ARM_MAX_WRP) + goto out; + idx = array_index_nospec(idx, ARM_MAX_WRP); + tsk->thread.debug.hbp_watch[idx] = bp; + err = 0; break; } +out: return err; } From c931d34ea0853d41349e93f871bd3f17f1c03a6b Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Fri, 13 Jul 2018 08:30:33 -0700 Subject: [PATCH 69/90] arm64: build with baremetal linker target instead of Linux when available Not all toolchains have the baremetal elf targets, RedHat/Fedora ones in particular. So, probe for whether it's available and use the previous (linux) targets if it isn't. Reported-by: Laura Abbott Tested-by: Laura Abbott Acked-by: Masahiro Yamada Cc: Paul Kocialkowski Signed-off-by: Olof Johansson Signed-off-by: Will Deacon --- arch/arm64/Makefile | 9 +++++---- scripts/Kbuild.include | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 45272266dafb..99a1d859b8b8 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -60,15 +60,16 @@ ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ AS += -EB -LD += -EB -LDFLAGS += -maarch64linuxb +# Prefer the baremetal ELF build target, but not all toolchains include +# it so fall back to the standard linux version if needed. +LDFLAGS += -EB $(call ld-option, -maarch64elfb, -maarch64linuxb) UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian CHECKFLAGS += -D__AARCH64EL__ AS += -EL -LD += -EL -LDFLAGS += -maarch64linux +# Same as above, prefer ELF but fall back to linux target if needed. +LDFLAGS += -EL $(call ld-option, -maarch64elf, -maarch64linux) UTS_MACHINE := aarch64 endif diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index c8156d61678c..1e13f502b42f 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -163,8 +163,8 @@ cc-ldoption = $(call try-run,\ $(CC) $(1) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2)) # ld-option -# Usage: LDFLAGS += $(call ld-option, -X) -ld-option = $(call try-run, $(LD) $(LDFLAGS) $(1) -v,$(1),$(2)) +# Usage: LDFLAGS += $(call ld-option, -X, -Y) +ld-option = $(call try-run, $(LD) $(LDFLAGS) $(1) -v,$(1),$(2),$(3)) # ar-option # Usage: KBUILD_ARFLAGS := $(call ar-option,D) From 50d7ba36b916d0fd4687149ec143bf49c326523f Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 23 Jul 2018 10:57:28 +0900 Subject: [PATCH 70/90] arm64: export memblock_reserve()d regions via /proc/iomem There has been some confusion around what is necessary to prevent kexec overwriting important memory regions. memblock: reserve, or nomap? Only memblock nomap regions are reported via /proc/iomem, kexec's user-space doesn't know about memblock_reserve()d regions. Until commit f56ab9a5b73ca ("efi/arm: Don't mark ACPI reclaim memory as MEMBLOCK_NOMAP") the ACPI tables were nomap, now they are reserved and thus possible for kexec to overwrite with the new kernel or initrd. But this was always broken, as the UEFI memory map is also reserved and not marked as nomap. Exporting both nomap and reserved memblock types is a nuisance as they live in different memblock structures which we can't walk at the same time. Take a second walk over memblock.reserved and add new 'reserved' subnodes for the memblock_reserved() regions that aren't already described by the existing code. (e.g. Kernel Code) We use reserve_region_with_split() to find the gaps in existing named regions. This handles the gap between 'kernel code' and 'kernel data' which is memblock_reserve()d, but already partially described by request_standard_resources(). e.g.: | 80000000-dfffffff : System RAM | 80080000-80ffffff : Kernel code | 81000000-8158ffff : reserved | 81590000-8237efff : Kernel data | a0000000-dfffffff : Crash kernel | e00f0000-f949ffff : System RAM reserve_region_with_split needs kzalloc() which isn't available when request_standard_resources() is called, use an initcall. Reported-by: Bhupesh Sharma Reported-by: Tyler Baicar Suggested-by: Akashi Takahiro Signed-off-by: James Morse Fixes: d28f6df1305a ("arm64/kexec: Add core kexec support") Reviewed-by: Ard Biesheuvel CC: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/setup.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 30ad2f085d1f..5b4fac434c84 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -241,6 +241,44 @@ static void __init request_standard_resources(void) } } +static int __init reserve_memblock_reserved_regions(void) +{ + phys_addr_t start, end, roundup_end = 0; + struct resource *mem, *res; + u64 i; + + for_each_reserved_mem_region(i, &start, &end) { + if (end <= roundup_end) + continue; /* done already */ + + start = __pfn_to_phys(PFN_DOWN(start)); + end = __pfn_to_phys(PFN_UP(end)) - 1; + roundup_end = end; + + res = kzalloc(sizeof(*res), GFP_ATOMIC); + if (WARN_ON(!res)) + return -ENOMEM; + res->start = start; + res->end = end; + res->name = "reserved"; + res->flags = IORESOURCE_MEM; + + mem = request_resource_conflict(&iomem_resource, res); + /* + * We expected memblock_reserve() regions to conflict with + * memory created by request_standard_resources(). + */ + if (WARN_ON_ONCE(!mem)) + continue; + kfree(res); + + reserve_region_with_split(mem, start, end, "reserved"); + } + + return 0; +} +arch_initcall(reserve_memblock_reserved_regions); + u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; void __init setup_arch(char **cmdline_p) From 5bcd44083a082f314032969cd6db1eb8275ac77a Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Mon, 23 Jul 2018 10:57:29 +0900 Subject: [PATCH 71/90] drivers: acpi: add dependency of EFI for arm64 As Ard suggested, CONFIG_ACPI && !CONFIG_EFI doesn't make sense on arm64, while CONFIG_ACPI and CONFIG_CPU_BIG_ENDIAN doesn't make sense either. As CONFIG_EFI already has a dependency of !CONFIG_CPU_BIG_ENDIAN, it is good enough to add a dependency of CONFIG_EFI to avoid any useless combination of configuration. This bug, reported by Will, will be revealed when my patch series, "arm64: kexec,kdump: fix boot failures on acpi-only system," is applied and the kernel is built under allmodconfig. Signed-off-by: AKASHI Takahiro Suggested-by: Ard Biesheuvel Signed-off-by: Will Deacon --- drivers/acpi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index b533eeb6139d..15ab1daaa808 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -6,7 +6,7 @@ menuconfig ACPI bool "ACPI (Advanced Configuration and Power Interface) Support" depends on !IA64_HP_SIM - depends on IA64 || X86 || ARM64 + depends on IA64 || X86 || (ARM64 && EFI) depends on PCI select PNP default y if (IA64 || X86) From 3ea86495aef2f6de26b7cb1599ba350dd6a0c521 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 23 Jul 2018 10:57:30 +0900 Subject: [PATCH 72/90] efi/arm: preserve early mapping of UEFI memory map longer for BGRT The BGRT code validates the contents of the table against the UEFI memory map, and so it expects it to be mapped when the code runs. On ARM, this is currently not the case, since we tear down the early mapping after efi_init() completes, and only create the permanent mapping in arm_enable_runtime_services(), which executes as an early initcall, but still leaves a window where the UEFI memory map is not mapped. So move the call to efi_memmap_unmap() from efi_init() to arm_enable_runtime_services(). Signed-off-by: Ard Biesheuvel [will: fold in EFI_MEMMAP attribute check from Ard] Signed-off-by: Will Deacon --- drivers/firmware/efi/arm-init.c | 1 - drivers/firmware/efi/arm-runtime.c | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index b5214c143fee..388a929baf95 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -259,7 +259,6 @@ void __init efi_init(void) reserve_regions(); efi_esrt_init(); - efi_memmap_unmap(); memblock_reserve(params.mmap & PAGE_MASK, PAGE_ALIGN(params.mmap_size + diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 5889cbea60b8..4712445c3213 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -110,11 +110,13 @@ static int __init arm_enable_runtime_services(void) { u64 mapsize; - if (!efi_enabled(EFI_BOOT)) { + if (!efi_enabled(EFI_BOOT) || !efi_enabled(EFI_MEMMAP)) { pr_info("EFI services will not be available.\n"); return 0; } + efi_memmap_unmap(); + if (efi_runtime_disabled()) { pr_info("EFI runtime services will be disabled.\n"); return 0; From 20d12cf990618845e76d3f24daaebd15d65a5e46 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Mon, 23 Jul 2018 10:57:31 +0900 Subject: [PATCH 73/90] efi/arm: map UEFI memory map even w/o runtime services enabled Under the current implementation, UEFI memory map will be mapped and made available in virtual mappings only if runtime services are enabled. But in a later patch, we want to use UEFI memory map in acpi_os_ioremap() to create mappings of ACPI tables using memory attributes described in UEFI memory map. See the following commit: arm64: acpi: fix alignment fault in accessing ACPI tables So, as a first step, arm_enter_runtime_services() is modified, alongside Ard's patch[1], so that UEFI memory map will not be freed even if efi=noruntime. [1] https://marc.info/?l=linux-efi&m=152930773507524&w=2 Signed-off-by: AKASHI Takahiro Reviewed-by: Ard Biesheuvel Signed-off-by: Will Deacon --- drivers/firmware/efi/arm-runtime.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 4712445c3213..922cfb813109 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -117,6 +117,13 @@ static int __init arm_enable_runtime_services(void) efi_memmap_unmap(); + mapsize = efi.memmap.desc_size * efi.memmap.nr_map; + + if (efi_memmap_init_late(efi.memmap.phys_map, mapsize)) { + pr_err("Failed to remap EFI memory map\n"); + return 0; + } + if (efi_runtime_disabled()) { pr_info("EFI runtime services will be disabled.\n"); return 0; @@ -129,13 +136,6 @@ static int __init arm_enable_runtime_services(void) pr_info("Remapping and enabling EFI services.\n"); - mapsize = efi.memmap.desc_size * efi.memmap.nr_map; - - if (efi_memmap_init_late(efi.memmap.phys_map, mapsize)) { - pr_err("Failed to remap EFI memory map\n"); - return -ENOMEM; - } - if (!efi_virtmap_init()) { pr_err("UEFI virtual mapping missing or invalid -- runtime services will not be available\n"); return -ENOMEM; From 09ffcb0d718a0b100f0bed029b830987ecf53fab Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Mon, 23 Jul 2018 10:57:32 +0900 Subject: [PATCH 74/90] arm64: acpi: fix alignment fault in accessing ACPI This is a fix against the issue that crash dump kernel may hang up during booting, which can happen on any ACPI-based system with "ACPI Reclaim Memory." (kernel messages after panic kicked off kdump) (snip...) Bye! (snip...) ACPI: Core revision 20170728 pud=000000002e7d0003, *pmd=000000002e7c0003, *pte=00e8000039710707 Internal error: Oops: 96000021 [#1] SMP Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.14.0-rc6 #1 task: ffff000008d05180 task.stack: ffff000008cc0000 PC is at acpi_ns_lookup+0x25c/0x3c0 LR is at acpi_ds_load1_begin_op+0xa4/0x294 (snip...) Process swapper/0 (pid: 0, stack limit = 0xffff000008cc0000) Call trace: (snip...) [] acpi_ns_lookup+0x25c/0x3c0 [] acpi_ds_load1_begin_op+0xa4/0x294 [] acpi_ps_build_named_op+0xc4/0x198 [] acpi_ps_create_op+0x14c/0x270 [] acpi_ps_parse_loop+0x188/0x5c8 [] acpi_ps_parse_aml+0xb0/0x2b8 [] acpi_ns_one_complete_parse+0x144/0x184 [] acpi_ns_parse_table+0x48/0x68 [] acpi_ns_load_table+0x4c/0xdc [] acpi_tb_load_namespace+0xe4/0x264 [] acpi_load_tables+0x48/0xc0 [] acpi_early_init+0x9c/0xd0 [] start_kernel+0x3b4/0x43c Code: b9008fb9 2a000318 36380054 32190318 (b94002c0) ---[ end trace c46ed37f9651c58e ]--- Kernel panic - not syncing: Fatal exception Rebooting in 10 seconds.. (diagnosis) * This fault is a data abort, alignment fault (ESR=0x96000021) during reading out ACPI table. * Initial ACPI tables are normally stored in system ram and marked as "ACPI Reclaim memory" by the firmware. * After the commit f56ab9a5b73c ("efi/arm: Don't mark ACPI reclaim memory as MEMBLOCK_NOMAP"), those regions are differently handled as they are "memblock-reserved", without NOMAP bit. * So they are now excluded from device tree's "usable-memory-range" which kexec-tools determines based on a current view of /proc/iomem. * When crash dump kernel boots up, it tries to accesses ACPI tables by mapping them with ioremap(), not ioremap_cache(), in acpi_os_ioremap() since they are no longer part of mapped system ram. * Given that ACPI accessor/helper functions are compiled in without unaligned access support (ACPI_MISALIGNMENT_NOT_SUPPORTED), any unaligned access to ACPI tables can cause a fatal panic. With this patch, acpi_os_ioremap() always honors memory attribute information provided by the firmware (EFI) and retaining cacheability allows the kernel safe access to ACPI tables. Signed-off-by: AKASHI Takahiro Reviewed-by: James Morse Reviewed-by: Ard Biesheuvel Reported-by and Tested-by: Bhupesh Sharma Signed-off-by: Will Deacon --- arch/arm64/include/asm/acpi.h | 25 +++++++++++++++++-------- arch/arm64/kernel/acpi.c | 11 +++-------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index d1c290f29b6f..709208dfdc8b 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -12,10 +12,12 @@ #ifndef _ASM_ACPI_H #define _ASM_ACPI_H +#include #include #include #include +#include #include #include @@ -29,18 +31,22 @@ /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI +pgprot_t __acpi_get_mem_attribute(phys_addr_t addr); + /* ACPI table mapping after acpi_permanent_mmap is set */ static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) { - /* - * EFI's reserve_regions() call adds memory with the WB attribute - * to memblock via early_init_dt_add_memory_arch(). - */ - if (!memblock_is_memory(phys)) - return ioremap(phys, size); + /* For normal memory we already have a cacheable mapping. */ + if (memblock_is_map_memory(phys)) + return (void __iomem *)__phys_to_virt(phys); - return ioremap_cache(phys, size); + /* + * We should still honor the memory's attribute here because + * crash dump kernel possibly excludes some ACPI (reclaim) + * regions from memblock list. + */ + return __ioremap(phys, size, __acpi_get_mem_attribute(phys)); } #define acpi_os_ioremap acpi_os_ioremap @@ -129,7 +135,10 @@ static inline const char *acpi_get_enable_method(int cpu) * for compatibility. */ #define acpi_disable_cmcff 1 -pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr); +static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) +{ + return __acpi_get_mem_attribute(addr); +} #endif /* CONFIG_ACPI_APEI */ #ifdef CONFIG_ACPI_NUMA diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 7b09487ff8fb..ed46dc188b22 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -29,13 +30,9 @@ #include #include +#include #include -#ifdef CONFIG_ACPI_APEI -# include -# include -#endif - int acpi_noirq = 1; /* skip ACPI IRQ initialization */ int acpi_disabled = 1; EXPORT_SYMBOL(acpi_disabled); @@ -239,8 +236,7 @@ done: } } -#ifdef CONFIG_ACPI_APEI -pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) +pgprot_t __acpi_get_mem_attribute(phys_addr_t addr) { /* * According to "Table 8 Map: EFI memory types to AArch64 memory @@ -261,4 +257,3 @@ pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) return __pgprot(PROT_NORMAL_NC); return __pgprot(PROT_DEVICE_nGnRnE); } -#endif From b965746306bcb058a6876d3a61b502d3dbb9fe91 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 20 Jun 2018 17:17:47 +0100 Subject: [PATCH 75/90] rseq/selftests: Add support for arm64 Hook up arm64 support to the rseq selftests. Acked-by: Mathieu Desnoyers Signed-off-by: Will Deacon --- tools/testing/selftests/rseq/param_test.c | 20 + tools/testing/selftests/rseq/rseq-arm64.h | 594 ++++++++++++++++++++++ tools/testing/selftests/rseq/rseq.h | 2 + 3 files changed, 616 insertions(+) create mode 100644 tools/testing/selftests/rseq/rseq-arm64.h diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 615252331813..fa144c556371 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -114,6 +114,26 @@ unsigned int yield_mod_cnt, nr_abort; "bne 222b\n\t" \ "333:\n\t" +#elif defined(__AARCH64EL__) + +#define RSEQ_INJECT_INPUT \ + , [loop_cnt_1] "Qo" (loop_cnt[1]) \ + , [loop_cnt_2] "Qo" (loop_cnt[2]) \ + , [loop_cnt_3] "Qo" (loop_cnt[3]) \ + , [loop_cnt_4] "Qo" (loop_cnt[4]) \ + , [loop_cnt_5] "Qo" (loop_cnt[5]) \ + , [loop_cnt_6] "Qo" (loop_cnt[6]) + +#define INJECT_ASM_REG RSEQ_ASM_TMP_REG32 + +#define RSEQ_INJECT_ASM(n) \ + " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \ + " cbz " INJECT_ASM_REG ", 333f\n" \ + "222:\n" \ + " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \ + " cbnz " INJECT_ASM_REG ", 222b\n" \ + "333:\n" + #elif __PPC__ #define RSEQ_INJECT_INPUT \ diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h new file mode 100644 index 000000000000..954f34671ca6 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-arm64.h @@ -0,0 +1,594 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * rseq-arm64.h + * + * (C) Copyright 2016-2018 - Mathieu Desnoyers + * (C) Copyright 2018 - Will Deacon + */ + +#define RSEQ_SIG 0xd428bc00 /* BRK #0x45E0 */ + +#define rseq_smp_mb() __asm__ __volatile__ ("dmb ish" ::: "memory") +#define rseq_smp_rmb() __asm__ __volatile__ ("dmb ishld" ::: "memory") +#define rseq_smp_wmb() __asm__ __volatile__ ("dmb ishst" ::: "memory") + +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + __typeof(*p) ____p1; \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("ldarb %w0, %1" \ + : "=r" (*(__u8 *)p) \ + : "Q" (*p) : "memory"); \ + break; \ + case 2: \ + asm volatile ("ldarh %w0, %1" \ + : "=r" (*(__u16 *)p) \ + : "Q" (*p) : "memory"); \ + break; \ + case 4: \ + asm volatile ("ldar %w0, %1" \ + : "=r" (*(__u32 *)p) \ + : "Q" (*p) : "memory"); \ + break; \ + case 8: \ + asm volatile ("ldar %0, %1" \ + : "=r" (*(__u64 *)p) \ + : "Q" (*p) : "memory"); \ + break; \ + } \ + ____p1; \ +}) + +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +#define rseq_smp_store_release(p, v) \ +do { \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("stlrb %w1, %0" \ + : "=Q" (*p) \ + : "r" ((__u8)v) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile ("stlrh %w1, %0" \ + : "=Q" (*p) \ + : "r" ((__u16)v) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile ("stlr %w1, %0" \ + : "=Q" (*p) \ + : "r" ((__u32)v) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile ("stlr %1, %0" \ + : "=Q" (*p) \ + : "r" ((__u64)v) \ + : "memory"); \ + break; \ + } \ +} while (0) + +#ifdef RSEQ_SKIP_FASTPATH +#include "rseq-skip.h" +#else /* !RSEQ_SKIP_FASTPATH */ + +#define RSEQ_ASM_TMP_REG32 "w15" +#define RSEQ_ASM_TMP_REG "x15" +#define RSEQ_ASM_TMP_REG_2 "x14" + +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \ + post_commit_offset, abort_ip) \ + " .pushsection __rseq_table, \"aw\"\n" \ + " .balign 32\n" \ + __rseq_str(label) ":\n" \ + " .long " __rseq_str(version) ", " __rseq_str(flags) "\n" \ + " .quad " __rseq_str(start_ip) ", " \ + __rseq_str(post_commit_offset) ", " \ + __rseq_str(abort_ip) "\n" \ + " .popsection\n" + +#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ + (post_commit_ip - start_ip), abort_ip) + +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + " adrp " RSEQ_ASM_TMP_REG ", " __rseq_str(cs_label) "\n" \ + " add " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \ + ", :lo12:" __rseq_str(cs_label) "\n" \ + " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(rseq_cs) "]\n" \ + __rseq_str(label) ":\n" + +#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \ + " b 222f\n" \ + " .inst " __rseq_str(RSEQ_SIG) "\n" \ + __rseq_str(label) ":\n" \ + " b %l[" __rseq_str(abort_label) "]\n" \ + "222:\n" + +#define RSEQ_ASM_OP_STORE(value, var) \ + " str %[" __rseq_str(value) "], %[" __rseq_str(var) "]\n" + +#define RSEQ_ASM_OP_STORE_RELEASE(value, var) \ + " stlr %[" __rseq_str(value) "], %[" __rseq_str(var) "]\n" + +#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \ + RSEQ_ASM_OP_STORE(value, var) \ + __rseq_str(post_commit_label) ":\n" + +#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(value, var, post_commit_label) \ + RSEQ_ASM_OP_STORE_RELEASE(value, var) \ + __rseq_str(post_commit_label) ":\n" + +#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \ + " ldr " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" \ + " sub " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \ + ", %[" __rseq_str(expect) "]\n" \ + " cbnz " RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n" + +#define RSEQ_ASM_OP_CMPEQ32(var, expect, label) \ + " ldr " RSEQ_ASM_TMP_REG32 ", %[" __rseq_str(var) "]\n" \ + " sub " RSEQ_ASM_TMP_REG32 ", " RSEQ_ASM_TMP_REG32 \ + ", %w[" __rseq_str(expect) "]\n" \ + " cbnz " RSEQ_ASM_TMP_REG32 ", " __rseq_str(label) "\n" + +#define RSEQ_ASM_OP_CMPNE(var, expect, label) \ + " ldr " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" \ + " sub " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \ + ", %[" __rseq_str(expect) "]\n" \ + " cbz " RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n" + +#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + RSEQ_ASM_OP_CMPEQ32(current_cpu_id, cpu_id, label) + +#define RSEQ_ASM_OP_R_LOAD(var) \ + " ldr " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" + +#define RSEQ_ASM_OP_R_STORE(var) \ + " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" + +#define RSEQ_ASM_OP_R_LOAD_OFF(offset) \ + " ldr " RSEQ_ASM_TMP_REG ", [" RSEQ_ASM_TMP_REG \ + ", %[" __rseq_str(offset) "]]\n" + +#define RSEQ_ASM_OP_R_ADD(count) \ + " add " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \ + ", %[" __rseq_str(count) "]\n" + +#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \ + " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" \ + __rseq_str(post_commit_label) ":\n" + +#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) \ + " cbz %[" __rseq_str(len) "], 333f\n" \ + " mov " RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(len) "]\n" \ + "222: sub " RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", #1\n" \ + " ldrb " RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(src) "]" \ + ", " RSEQ_ASM_TMP_REG_2 "]\n" \ + " strb " RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(dst) "]" \ + ", " RSEQ_ASM_TMP_REG_2 "]\n" \ + " cbnz " RSEQ_ASM_TMP_REG_2 ", 222b\n" \ + "333:\n" + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "Qo" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, + off_t voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2]) +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_STORE(load) + RSEQ_ASM_OP_R_LOAD_OFF(voffp) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "Qo" (*v), + [expectnot] "r" (expectnot), + [load] "Qo" (*load), + [voffp] "r" (voffp) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_addv(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_ADD(count) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "Qo" (*v), + [count] "r" (count) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + RSEQ_ASM_OP_STORE(newv2, v2) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [expect] "r" (expect), + [v] "Qo" (*v), + [newv] "r" (newv), + [v2] "Qo" (*v2), + [newv2] "r" (newv2) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + RSEQ_ASM_OP_STORE(newv2, v2) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [expect] "r" (expect), + [v] "Qo" (*v), + [newv] "r" (newv), + [v2] "Qo" (*v2), + [newv2] "r" (newv2) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail]) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) + RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3]) +#endif + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "Qo" (*v), + [expect] "r" (expect), + [v2] "Qo" (*v2), + [expect2] "r" (expect2), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +error3: + rseq_bug("2nd expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [expect] "r" (expect), + [v] "Qo" (*v), + [newv] "r" (newv), + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2 + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [expect] "r" (expect), + [v] "Qo" (*v), + [newv] "r" (newv), + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2 + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* !RSEQ_SKIP_FASTPATH */ diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index a4684112676c..b5d94087fe31 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -71,6 +71,8 @@ extern __thread volatile struct rseq __rseq_abi; #include #elif defined(__ARMEL__) #include +#elif defined (__AARCH64EL__) +#include #elif defined(__PPC__) #include #elif defined(__mips__) From 2c870e61132c082a03769d2ac0a2849ba33c10e3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 24 Jul 2018 11:48:45 +0200 Subject: [PATCH 76/90] arm64: fix ACPI dependencies Kconfig reports a warning on x86 builds after the ARM64 dependency was added. drivers/acpi/Kconfig:6:error: recursive dependency detected! drivers/acpi/Kconfig:6: symbol ACPI depends on EFI This rephrases the dependency to keep the ARM64 details out of the shared Kconfig file, so Kconfig no longer gets confused by it. For consistency, all three architectures that support ACPI now select ARCH_SUPPORTS_ACPI in exactly the configuration in which they allow it. We still need the 'default x86', as each one wants a different default: default-y on x86, default-n on arm64, and always-y on ia64. Fixes: 5bcd44083a08 ("drivers: acpi: add dependency of EFI for arm64") Reviewed-by: Rafael J. Wysocki Acked-by: Will Deacon Signed-off-by: Arnd Bergmann Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + arch/ia64/Kconfig | 1 + arch/x86/Kconfig | 1 + drivers/acpi/Kconfig | 8 +++++--- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e2823362c324..27fd50e86dfe 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1262,6 +1262,7 @@ config EFI bool "UEFI runtime support" depends on OF && !CPU_BIG_ENDIAN depends on KERNEL_MODE_NEON + select ARCH_SUPPORTS_ACPI select LIBFDT select UCS2_STRING select EFI_PARAMS_FROM_FDT diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index ff861420b8f5..02eb68d5d1c0 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -16,6 +16,7 @@ config IA64 select ARCH_MIGHT_HAVE_PC_SERIO select PCI if (!IA64_HP_SIM) select ACPI if (!IA64_HP_SIM) + select ARCH_SUPPORTS_ACPI if (!IA64_HP_SIM) select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select HAVE_UNSTABLE_SCHED_CLOCK diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f1dbb4ee19d7..ee99f5296570 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -75,6 +75,7 @@ config X86 select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO + select ARCH_SUPPORTS_ACPI select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 select ARCH_USE_BUILTIN_BSWAP diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 15ab1daaa808..4a46344bf0e3 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -5,11 +5,10 @@ menuconfig ACPI bool "ACPI (Advanced Configuration and Power Interface) Support" - depends on !IA64_HP_SIM - depends on IA64 || X86 || (ARM64 && EFI) + depends on ARCH_SUPPORTS_ACPI depends on PCI select PNP - default y if (IA64 || X86) + default y if X86 help Advanced Configuration and Power Interface (ACPI) support for Linux requires an ACPI-compliant platform (hardware/firmware), @@ -41,6 +40,9 @@ menuconfig ACPI +config ARCH_SUPPORTS_ACPI + bool + if ACPI config ACPI_LEGACY_TABLES_LOOKUP From 06060ea7fb5b6b9b4c3251e0dcabbcca8ad91674 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Thu, 19 Jul 2018 19:26:40 +0800 Subject: [PATCH 77/90] drivers/perf: hisi: update the sccl_id/ccl_id when MT is supported MT bit in MPIDR_EL1 is now supported in certain HiSilicon platforms, so the mapping between sccl_id/ccl_id and affinity level needs to be updated from the generic encoding we originally used. Cc: John Garry Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Shaokun Zhang [will: fixed comment] Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 44df61397a38..9efd2413240c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -350,19 +350,21 @@ void hisi_uncore_pmu_disable(struct pmu *pmu) /* * Read Super CPU cluster and CPU cluster ID from MPIDR_EL1. - * If multi-threading is supported, SCCL_ID is in MPIDR[aff3] and CCL_ID - * is in MPIDR[aff2]; if not, SCCL_ID is in MPIDR[aff2] and CCL_ID is - * in MPIDR[aff1]. If this changes in future, this shall be updated. + * If multi-threading is supported, CCL_ID is the low 3-bits in MPIDR[Aff2] + * and SCCL_ID is the upper 5-bits of Aff2 field; if not, SCCL_ID + * is in MPIDR[Aff2] and CCL_ID is in MPIDR[Aff1]. */ static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id) { u64 mpidr = read_cpuid_mpidr(); if (mpidr & MPIDR_MT_BITMASK) { + int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); + if (sccl_id) - *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 3); + *sccl_id = aff2 >> 3; if (ccl_id) - *ccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); + *ccl_id = aff2 & 0x7; } else { if (sccl_id) *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); From 8a1ccfbc9e0256baafbbce85ccdb72ec89af2aab Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 20 Jul 2018 14:41:53 -0700 Subject: [PATCH 78/90] arm64: Add stack information to on_accessible_stack In preparation for enabling the stackleak plugin on arm64, we need a way to get the bounds of the current stack. Extend on_accessible_stack to get this information. Acked-by: Alexander Popov Reviewed-by: Mark Rutland Signed-off-by: Laura Abbott [will: folded in fix for allmodconfig build breakage w/ sdei] Signed-off-by: Will Deacon --- arch/arm64/include/asm/sdei.h | 9 ++-- arch/arm64/include/asm/stacktrace.h | 73 ++++++++++++++++++++++++----- arch/arm64/kernel/ptrace.c | 2 +- arch/arm64/kernel/sdei.c | 51 ++++++++++++++++---- arch/arm64/kernel/stacktrace.c | 2 +- 5 files changed, 111 insertions(+), 26 deletions(-) diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h index e073e6886685..ffe47d766c25 100644 --- a/arch/arm64/include/asm/sdei.h +++ b/arch/arm64/include/asm/sdei.h @@ -40,15 +40,18 @@ asmlinkage unsigned long __sdei_handler(struct pt_regs *regs, unsigned long sdei_arch_get_entry_point(int conduit); #define sdei_arch_get_entry_point(x) sdei_arch_get_entry_point(x) -bool _on_sdei_stack(unsigned long sp); -static inline bool on_sdei_stack(unsigned long sp) +struct stack_info; + +bool _on_sdei_stack(unsigned long sp, struct stack_info *info); +static inline bool on_sdei_stack(unsigned long sp, + struct stack_info *info) { if (!IS_ENABLED(CONFIG_VMAP_STACK)) return false; if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) return false; if (in_nmi()) - return _on_sdei_stack(sp); + return _on_sdei_stack(sp, info); return false; } diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 902f9edacbea..e86737b7c924 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -32,6 +32,21 @@ struct stackframe { #endif }; +enum stack_type { + STACK_TYPE_UNKNOWN, + STACK_TYPE_TASK, + STACK_TYPE_IRQ, + STACK_TYPE_OVERFLOW, + STACK_TYPE_SDEI_NORMAL, + STACK_TYPE_SDEI_CRITICAL, +}; + +struct stack_info { + unsigned long low; + unsigned long high; + enum stack_type type; +}; + extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data); @@ -39,7 +54,8 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk); DECLARE_PER_CPU(unsigned long *, irq_stack_ptr); -static inline bool on_irq_stack(unsigned long sp) +static inline bool on_irq_stack(unsigned long sp, + struct stack_info *info) { unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr); unsigned long high = low + IRQ_STACK_SIZE; @@ -47,46 +63,79 @@ static inline bool on_irq_stack(unsigned long sp) if (!low) return false; - return (low <= sp && sp < high); + if (sp < low || sp >= high) + return false; + + if (info) { + info->low = low; + info->high = high; + info->type = STACK_TYPE_IRQ; + } + + return true; } -static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp) +static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp, + struct stack_info *info) { unsigned long low = (unsigned long)task_stack_page(tsk); unsigned long high = low + THREAD_SIZE; - return (low <= sp && sp < high); + if (sp < low || sp >= high) + return false; + + if (info) { + info->low = low; + info->high = high; + info->type = STACK_TYPE_TASK; + } + + return true; } #ifdef CONFIG_VMAP_STACK DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack); -static inline bool on_overflow_stack(unsigned long sp) +static inline bool on_overflow_stack(unsigned long sp, + struct stack_info *info) { unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack); unsigned long high = low + OVERFLOW_STACK_SIZE; - return (low <= sp && sp < high); + if (sp < low || sp >= high) + return false; + + if (info) { + info->low = low; + info->high = high; + info->type = STACK_TYPE_OVERFLOW; + } + + return true; } #else -static inline bool on_overflow_stack(unsigned long sp) { return false; } +static inline bool on_overflow_stack(unsigned long sp, + struct stack_info *info) { return false; } #endif + /* * We can only safely access per-cpu stacks from current in a non-preemptible * context. */ -static inline bool on_accessible_stack(struct task_struct *tsk, unsigned long sp) +static inline bool on_accessible_stack(struct task_struct *tsk, + unsigned long sp, + struct stack_info *info) { - if (on_task_stack(tsk, sp)) + if (on_task_stack(tsk, sp, info)) return true; if (tsk != current || preemptible()) return false; - if (on_irq_stack(sp)) + if (on_irq_stack(sp, info)) return true; - if (on_overflow_stack(sp)) + if (on_overflow_stack(sp, info)) return true; - if (on_sdei_stack(sp)) + if (on_sdei_stack(sp, info)) return true; return false; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 299758f9b9c4..6219486fa25f 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -132,7 +132,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) { return ((addr & ~(THREAD_SIZE - 1)) == (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) || - on_irq_stack(addr); + on_irq_stack(addr, NULL); } /** diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 6b8d90d5ceae..a94a868f0532 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -88,23 +89,55 @@ static int init_sdei_stacks(void) return err; } -bool _on_sdei_stack(unsigned long sp) +bool on_sdei_normal_stack(unsigned long sp, + struct stack_info *info) { - unsigned long low, high; + unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); + unsigned long high = low + SDEI_STACK_SIZE; + if (sp < low || sp >= high) + return false; + + if (info) { + info->low = low; + info->high = high; + info->type = STACK_TYPE_SDEI_NORMAL; + } + + return true; +} + +bool on_sdei_critical_stack(unsigned long sp, + struct stack_info *info) +{ + unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); + unsigned long high = low + SDEI_STACK_SIZE; + + if (sp < low || sp >= high) + return false; + + if (info) { + info->low = low; + info->high = high; + info->type = STACK_TYPE_SDEI_CRITICAL; + } + + return true; +} + +bool _on_sdei_stack(unsigned long sp, + struct stack_info *info) +{ if (!IS_ENABLED(CONFIG_VMAP_STACK)) return false; - low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); - high = low + SDEI_STACK_SIZE; - - if (low <= sp && sp < high) + if (on_sdei_critical_stack(sp, info)) return true; - low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); - high = low + SDEI_STACK_SIZE; + if (on_sdei_normal_stack(sp, info)) + return true; - return (low <= sp && sp < high); + return false; } unsigned long sdei_arch_get_entry_point(int conduit) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index d5718a060672..4989f7ea1e59 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -50,7 +50,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) if (!tsk) tsk = current; - if (!on_accessible_stack(tsk, fp)) + if (!on_accessible_stack(tsk, fp, NULL)) return -EINVAL; frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); From 0b3e336601b82c6afa0e9cf21db9cb8793e25399 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 20 Jul 2018 14:41:54 -0700 Subject: [PATCH 79/90] arm64: Add support for STACKLEAK gcc plugin This adds support for the STACKLEAK gcc plugin to arm64 by implementing stackleak_check_alloca(), based heavily on the x86 version, and adding the two helpers used by the stackleak common code: current_top_of_stack() and on_thread_stack(). The stack erasure calls are made at syscall returns. Additionally, this disables the plugin in hypervisor and EFI stub code, which are out of scope for the protection. Acked-by: Alexander Popov Reviewed-by: Mark Rutland Reviewed-by: Kees Cook Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/processor.h | 15 +++++++++++++++ arch/arm64/kernel/entry.S | 3 +++ arch/arm64/kernel/process.c | 22 ++++++++++++++++++++++ arch/arm64/kvm/hyp/Makefile | 3 ++- drivers/firmware/efi/libstub/Makefile | 3 ++- 6 files changed, 45 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 27fd50e86dfe..1494d5cfd681 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -108,6 +108,7 @@ config ARM64 select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_STACKLEAK select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index e02612105d78..79657ad91397 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -266,5 +266,20 @@ extern void __init minsigstksz_setup(void); #define SVE_SET_VL(arg) sve_set_current_vl(arg) #define SVE_GET_VL() sve_get_current_vl() +/* + * For CONFIG_GCC_PLUGIN_STACKLEAK + * + * These need to be macros because otherwise we get stuck in a nightmare + * of header definitions for the use of task_stack_page. + */ + +#define current_top_of_stack() \ +({ \ + struct stack_info _info; \ + BUG_ON(!on_accessible_stack(current, current_stack_pointer, &_info)); \ + _info.high; \ +}) +#define on_thread_stack() (on_task_stack(current, current_stack_pointer, NULL)) + #endif /* __ASSEMBLY__ */ #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index d1440f84668b..09dbea221a27 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -902,6 +902,9 @@ ret_to_user: cbnz x2, work_pending finish_ret_to_user: enable_step_tsk x1, x2 +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + bl stackleak_erase +#endif kernel_exit 0 ENDPROC(ret_to_user) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 740b31f77ade..7f1628effe6d 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -493,3 +493,25 @@ void arch_setup_new_exec(void) { current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0; } + +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +void __used stackleak_check_alloca(unsigned long size) +{ + unsigned long stack_left; + unsigned long current_sp = current_stack_pointer; + struct stack_info info; + + BUG_ON(!on_accessible_stack(current, current_sp, &info)); + + stack_left = current_sp - info.low; + + /* + * There's a good chance we're almost out of stack space if this + * is true. Using panic() over BUG() is more likely to give + * reliable debugging output. + */ + if (size >= stack_left) + panic("alloca() over the kernel stack boundary\n"); +} +EXPORT_SYMBOL(stackleak_check_alloca); +#endif diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 4313f7475333..2fabc2dc1966 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -3,7 +3,8 @@ # Makefile for Kernel-based Virtual Machine module, HYP part # -ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING +ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \ + $(DISABLE_STACKLEAK_PLUGIN) KVM=../../../../virt/kvm diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index a34e9290a699..25dd2a14560d 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -20,7 +20,8 @@ cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \ -D__NO_FORTIFY \ $(call cc-option,-ffreestanding) \ - $(call cc-option,-fno-stack-protector) + $(call cc-option,-fno-stack-protector) \ + $(DISABLE_STACKLEAK_PLUGIN) GCOV_PROFILE := n KASAN_SANITIZE := n From 809092dc3e606f3508b53baa624b27bfff8f0e7f Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 26 Jul 2018 12:23:31 +0100 Subject: [PATCH 80/90] drivers/perf: arm-ccn: Use devm_ioremap_resource() to map memory Instead of checking the return value of platform_get_resource(), we can use devm_ioremap_resource() which has the NULL pointer check and the memory region requesting. devm_ioremap_resource is designed to replace calls to devm_request_mem_region followed by devm_ioremap, so let's use the same. Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Sudeep Holla Signed-off-by: Will Deacon --- drivers/perf/arm-ccn.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index b416ee18e6bb..4b15c36f4631 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -1485,17 +1485,9 @@ static int arm_ccn_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ccn); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -EINVAL; - - if (!devm_request_mem_region(ccn->dev, res->start, - resource_size(res), pdev->name)) - return -EBUSY; - - ccn->base = devm_ioremap(ccn->dev, res->start, - resource_size(res)); - if (!ccn->base) - return -EFAULT; + ccn->base = devm_ioremap_resource(ccn->dev, res); + if (IS_ERR(ccn->base)) + return PTR_ERR(ccn->base); res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (!res) From c5157101e7793b42a56e07368c7f4cb73fb58008 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 12 Jul 2018 00:18:22 +0100 Subject: [PATCH 81/90] arm64: mm: Export __sync_icache_dcache() for xen-privcmd The xen-privcmd driver, which can be modular, calls set_pte_at() which in turn may call __sync_icache_dcache(). The call to __sync_icache_dcache() may be optimised out because it is conditional on !pte_special(), and xen-privcmd calls pte_mkspecial(). But it seems unwise to rely on this optimisation. Fixes: 3ad0876554ca ("xen/privcmd: add IOCTL_PRIVCMD_MMAP_RESOURCE") Acked-by: Catalin Marinas Signed-off-by: Ben Hutchings Signed-off-by: Will Deacon --- arch/arm64/mm/flush.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 9786f9d5d3dc..30695a868107 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -66,6 +66,7 @@ void __sync_icache_dcache(pte_t pte) sync_icache_aliases(page_address(page), PAGE_SIZE << compound_order(page)); } +EXPORT_SYMBOL_GPL(__sync_icache_dcache); /* * This function is called when a page has been modified by the kernel. Mark From efd112353bf7c0f9d50f928b449ea9da0ee9554b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 30 Jul 2018 17:43:39 +0100 Subject: [PATCH 82/90] arm64: svc: Ensure hardirq tracing is updated before return We always run userspace with interrupts enabled, but with the recent conversion of the syscall entry/exit code to C, we don't inform the hardirq tracing code that interrupts are about to become enabled by virtue of restoring the EL0 SPSR. This patch ensures that trace_hardirqs_on() is called on the syscall return path when we return to the assembly code with interrupts still disabled. Fixes: f37099b6992a ("arm64: convert syscall trace logic to C") Reported-by: Julien Grall Signed-off-by: Will Deacon --- arch/arm64/kernel/syscall.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index be00c85794db..032d22312881 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -91,8 +91,15 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) { local_daif_mask(); flags = current_thread_info()->flags; - if (!has_syscall_work(flags)) + if (!has_syscall_work(flags)) { + /* + * We're off to userspace, where interrupts are + * always enabled after we restore the flags from + * the SPSR. + */ + trace_hardirqs_on(); return; + } local_daif_restore(DAIF_PROCCTX); } From 140aada48b5f1a8bed3ba4afb5fc59220651657f Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 30 Jul 2018 10:29:21 -0500 Subject: [PATCH 83/90] arm64: kexec: machine_kexec should call __flush_icache_range machine_kexec flushes the reboot_code_buffer from the icache after stopping the other cpus. Commit 3b8c9f1cdfc5 ("arm64: IPI each CPU after invalidating the I-cache for kernel mappings") added an IPI call to flush_icache_range, which causes a hang here, so replace the call with __flush_icache_range Signed-off-by: Dave Kleikamp Cc: AKASHI Takahiro Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index f62effc6e064..e8c02832b7ad 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -184,7 +184,7 @@ void machine_kexec(struct kimage *kimage) /* Flush the reboot_code_buffer in preparation for its execution. */ __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size); - flush_icache_range((uintptr_t)reboot_code_buffer, + __flush_icache_range((uintptr_t)reboot_code_buffer, arm64_relocate_new_kernel_size); /* Flush the kimage list and its buffers. */ From d26de6c9f458f702b778731b7cb578ccd83623c9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 31 Jul 2018 09:35:45 +0200 Subject: [PATCH 84/90] arm64: drop unused kernel_neon_begin_partial() macro When kernel mode NEON was first introduced to the arm64 kernel, every call to kernel_neon_begin()/_end() stacked resp. unstacked the entire NEON register file, making it worthwile to reduce the number of used NEON registers to a bare minimum, and only stack those. kernel_neon_begin_partial() was introduced for this purpose, but after the refactoring for SVE and other changes, it no longer exists and was simply #define'd to kernel_neon_begin() directly. In the mean time, all users have been updated, so let's remove the fallback macro. Reviewed-by: Dave Martin Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/include/asm/neon.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h index f922eaf780f9..fb9d137256a6 100644 --- a/arch/arm64/include/asm/neon.h +++ b/arch/arm64/include/asm/neon.h @@ -19,11 +19,4 @@ void kernel_neon_begin(void); void kernel_neon_end(void); -/* - * Temporary macro to allow the crypto code to compile. Note that the - * semantics of kernel_neon_begin_partial() are now different from the - * original as it does not allow being called in an interrupt context. - */ -#define kernel_neon_begin_partial(num_regs) kernel_neon_begin() - #endif /* ! __ASM_NEON_H */ From ce279d374ff315264efcbcef75cc21b9fef8b36f Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 30 Jul 2018 11:31:18 -0700 Subject: [PATCH 85/90] efi/libstub: Only disable stackleak plugin for arm64 arm64 uses the full KBUILD_CFLAGS for building libstub as opposed to x86 which doesn't. This means that x86 doesn't pick up the gcc-plugins. We need to disable the stackleak plugin but doing this unconditionally breaks x86 build since it doesn't have any plugins. Switch to disabling the stackleak plugin for arm64 only. Reviewed-by: Kees Cook Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- drivers/firmware/efi/libstub/Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 25dd2a14560d..88c322d7c71e 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -11,7 +11,10 @@ cflags-$(CONFIG_X86) += -m$(BITS) -D__KERNEL__ -O2 \ -fPIC -fno-strict-aliasing -mno-red-zone \ -mno-mmx -mno-sse -fshort-wchar -cflags-$(CONFIG_ARM64) := $(subst -pg,,$(KBUILD_CFLAGS)) -fpie +# arm64 uses the full KBUILD_CFLAGS so it's necessary to explicitly +# disable the stackleak plugin +cflags-$(CONFIG_ARM64) := $(subst -pg,,$(KBUILD_CFLAGS)) -fpie \ + $(DISABLE_STACKLEAK_PLUGIN) cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) \ -fno-builtin -fpic -mno-single-pic-base @@ -21,7 +24,6 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \ -D__NO_FORTIFY \ $(call cc-option,-ffreestanding) \ $(call cc-option,-fno-stack-protector) \ - $(DISABLE_STACKLEAK_PLUGIN) GCOV_PROFILE := n KASAN_SANITIZE := n From 9d2dcc8fc66087d7fd365e07cd4292adc873e568 Mon Sep 17 00:00:00 2001 From: Michael O'Farrell Date: Mon, 30 Jul 2018 13:14:34 -0700 Subject: [PATCH 86/90] arm64: perf: Add cap_user_time aarch64 It is useful to get the running time of a thread. Doing so in an efficient manner can be important for performance of user applications. Avoiding system calls in `clock_gettime` when handling CLOCK_THREAD_CPUTIME_ID is important. Other clocks are handled in the VDSO, but CLOCK_THREAD_CPUTIME_ID falls back on the system call. CLOCK_THREAD_CPUTIME_ID is not handled in the VDSO since it would have costs associated with maintaining updated user space accessible time offsets. These offsets have to be updated everytime the a thread is scheduled/descheduled. However, for programs regularly checking the running time of a thread, this is a performance improvement. This patch takes a middle ground, and adds support for cap_user_time an optional feature of the perf_event API. This way costs are only incurred when the perf_event api is enabled. This is done the same way as it is in x86. Ultimately this allows calculating the thread running time in userspace on aarch64 as follows (adapted from perf_event_open manpage): u32 seq, time_mult, time_shift; u64 running, count, time_offset, quot, rem, delta; struct perf_event_mmap_page *pc; pc = buf; // buf is the perf event mmaped page as documented in the API. if (pc->cap_usr_time) { do { seq = pc->lock; barrier(); running = pc->time_running; count = readCNTVCT_EL0(); // Read ARM hardware clock. time_offset = pc->time_offset; time_mult = pc->time_mult; time_shift = pc->time_shift; barrier(); } while (pc->lock != seq); quot = (count >> time_shift); rem = count & (((u64)1 << time_shift) - 1); delta = time_offset + quot * time_mult + ((rem * time_mult) >> time_shift); running += delta; // running now has the current nanosecond level thread time. } Summary of changes in the patch: For aarch64 systems, make arch_perf_update_userpage update the timing information stored in the perf_event page. Requiring the following calculations: - Calculate the appropriate time_mult, and time_shift factors to convert ticks to nano seconds for the current clock frequency. - Adjust the mult and shift factors to avoid shift factors of 32 bits. (possibly unnecessary) - The time_offset userspace should apply when doing calculations: negative the current sched time (now), because time_running and time_enabled fields of the perf_event page have just been updated. Toggle bits to appropriate values: - Enable cap_user_time Signed-off-by: Michael O'Farrell Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 30 ++++++++++++++++++++++++++++++ kernel/events/core.c | 4 ++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index dfff5ed5c625..8e38d5267f22 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -1278,3 +1279,32 @@ static int __init armv8_pmu_driver_init(void) return arm_pmu_acpi_probe(armv8_pmuv3_init); } device_initcall(armv8_pmu_driver_init) + +void arch_perf_update_userpage(struct perf_event *event, + struct perf_event_mmap_page *userpg, u64 now) +{ + u32 freq; + u32 shift; + + /* + * Internal timekeeping for enabled/running/stopped times + * is always computed with the sched_clock. + */ + freq = arch_timer_get_rate(); + userpg->cap_user_time = 1; + + clocks_calc_mult_shift(&userpg->time_mult, &shift, freq, + NSEC_PER_SEC, 0); + /* + * time_shift is not expected to be greater than 31 due to + * the original published conversion algorithm shifting a + * 32-bit value (now specifies a 64-bit value) - refer + * perf_event_mmap_page documentation in perf_event.h. + */ + if (shift == 32) { + shift = 31; + userpg->time_mult >>= 1; + } + userpg->time_shift = (u16)shift; + userpg->time_offset = -now; +} diff --git a/kernel/events/core.c b/kernel/events/core.c index 8f0434a9951a..1a16dcca0da2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5246,8 +5246,8 @@ void perf_event_update_userpage(struct perf_event *event) userpg = rb->user_page; /* - * Disable preemption so as to not let the corresponding user-space - * spin too long if we get preempted. + * Disable preemption to guarantee consistent time stamps are stored to + * the user page. */ preempt_disable(); ++userpg->lock; From e401b7c2c69008ad2fcdc154f7c5421281c90042 Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Mon, 30 Jul 2018 11:54:43 +0530 Subject: [PATCH 87/90] arm64, kaslr: export offset in VMCOREINFO ELF notes Include KASLR offset in arm64 VMCOREINFO ELF notes to assist in debugging. vmcore parsing in user-space already expects this value in the notes and we are providing it for portability of those existing tools with x86. Ideally we would like core code to do this (so that way this information won't be missed when an architecture adds KASLR support), but mips has CONFIG_RANDOMIZE_BASE, and doesn't provide kaslr_offset(), so I am not sure if this is needed for mips (and other such similar arch cases in future). So, lets keep this architecture specific for now. As an example of a user-space use-case, consider the makedumpfile user-space utility which will need fixup to use this KASLR offset to work with cases where we need to find a way to translate symbol address from vmlinux to kernel run time address in case of KASLR boot on arm64. I have already submitted the makedumpfile user-space patch upstream and the maintainer has suggested to wait for the kernel changes to be included (see [0]). I tested this on my qualcomm amberwing board both for KASLR and non-KASLR boot cases: Without this patch: # cat > scrub.conf << EOF [vmlinux] erase jiffies erase init_task.utime for tsk in init_task.tasks.next within task_struct:tasks erase tsk.utime endfor EOF # makedumpfile --split -d 31 -x vmlinux --config scrub.conf vmcore dumpfile_{1,2,3} readpage_elf: Attempt to read non-existent page at 0xffffa8a5bf180000. readmem: type_addr: 1, addr:ffffa8a5bf180000, size:8 vaddr_to_paddr_arm64: Can't read pgd readmem: Can't convert a virtual address(ffff0000092a542c) to physical address. readmem: type_addr: 0, addr:ffff0000092a542c, size:390 check_release: Can't get the address of system_utsname After this patch check_release() is ok, and also we are able to erase symbol from vmcore (I checked this with kernel 4.18.0-rc4+): # makedumpfile --split -d 31 -x vmlinux --config scrub.conf vmcore dumpfile_{1,2,3} The kernel version is not supported. The makedumpfile operation may be incomplete. Checking for memory holes : [100.0 %] \ Checking for memory holes : [100.0 %] | Checking foExcluding unnecessary pages : [100.0 %] \ Excluding unnecessary pages : [100.0 %] \ The dumpfiles are saved to dumpfile_1, dumpfile_2, and dumpfile_3. makedumpfile Completed. [0] https://www.spinics.net/lists/kexec/msg21195.html Cc: Ard Biesheuvel Cc: Will Deacon Cc: Mark Rutland Cc: Catalin Marinas Cc: James Morse Acked-by: James Morse Signed-off-by: Bhupesh Sharma Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index e8c02832b7ad..4c0eb30bfede 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -360,4 +360,5 @@ void arch_crash_save_vmcoreinfo(void) kimage_voffset); vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", PHYS_OFFSET); + vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); } From eab1cecc1223378fee700ab6acebf0f39fa6da81 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 31 Jul 2018 12:02:18 +0100 Subject: [PATCH 88/90] arm64: sdei: Mark sdei stack helper functions as static The SDEI stack helper functions are only used by _on_sdei_stack() and refer to symbols (e.g. sdei_stack_normal_ptr) that are only defined if CONFIG_VMAP_STACK=y. Mark these functions as static, so we don't run into errors at link-time due to references to undefined symbols. Stick all the parameters onto the same line whilst we're passing through. Signed-off-by: Will Deacon --- arch/arm64/kernel/sdei.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index a94a868f0532..5ba4465e44f0 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -89,8 +89,7 @@ static int init_sdei_stacks(void) return err; } -bool on_sdei_normal_stack(unsigned long sp, - struct stack_info *info) +static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info) { unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); unsigned long high = low + SDEI_STACK_SIZE; @@ -107,8 +106,7 @@ bool on_sdei_normal_stack(unsigned long sp, return true; } -bool on_sdei_critical_stack(unsigned long sp, - struct stack_info *info) +static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info) { unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); unsigned long high = low + SDEI_STACK_SIZE; @@ -125,8 +123,7 @@ bool on_sdei_critical_stack(unsigned long sp, return true; } -bool _on_sdei_stack(unsigned long sp, - struct stack_info *info) +bool _on_sdei_stack(unsigned long sp, struct stack_info *info) { if (!IS_ENABLED(CONFIG_VMAP_STACK)) return false; From dcab90d90935f990407c86b671a7f1ac285d106c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 31 Jul 2018 12:09:03 +0100 Subject: [PATCH 89/90] arm64: kexec: Add comment to explain use of __flush_icache_range() Now that we understand the deadlock arising from flush_icache_range() on the kexec crash kernel path, add a comment to justify the use of __flush_icache_range() here. Reported-by: Dave Kleikamp Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 4c0eb30bfede..f6a5c6bc1434 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -184,8 +184,15 @@ void machine_kexec(struct kimage *kimage) /* Flush the reboot_code_buffer in preparation for its execution. */ __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size); + + /* + * Although we've killed off the secondary CPUs, we don't update + * the online mask if we're handling a crash kernel and consequently + * need to avoid flush_icache_range(), which will attempt to IPI + * the offline CPUs. Therefore, we must use the __* variant here. + */ __flush_icache_range((uintptr_t)reboot_code_buffer, - arm64_relocate_new_kernel_size); + arm64_relocate_new_kernel_size); /* Flush the kimage list and its buffers. */ kexec_list_flush(kimage); From 3c4d9137eefecf273a520d392071ffc9df0a9a7a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 7 Aug 2018 18:59:57 -0500 Subject: [PATCH 90/90] arm64: alternative: Use true and false for boolean values Return statements in functions returning bool should use true or false instead of an integer value. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Will Deacon --- arch/arm64/kernel/alternative.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 36fb069fd049..b5d603992d40 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -47,11 +47,11 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) unsigned long replptr; if (kernel_text_address(pc)) - return 1; + return true; replptr = (unsigned long)ALT_REPL_PTR(alt); if (pc >= replptr && pc <= (replptr + alt->alt_len)) - return 0; + return false; /* * Branching into *another* alternate sequence is doomed, and