From bb4a23c994aebcd96c567a0be8e964d516bd4a61 Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Wed, 1 Sep 2021 02:46:19 +0000 Subject: [PATCH 1/6] riscv/vdso: Refactor asm/vdso.h The asm/vdso.h will be included in vdso.lds.S in the next patch, the following cleanup is needed to avoid syntax error: 1.the declaration of sys_riscv_flush_icache() is moved into asm/syscall.h. 2.the definition of struct vdso_data is moved into kernel/vdso.c. 2.the definition of VDSO_SYMBOL is placed under "#ifndef __ASSEMBLY__". Also remove the redundant linux/types.h include. Signed-off-by: Tong Tiangen Reviewed-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/syscall.h | 1 + arch/riscv/include/asm/vdso.h | 16 ++++++++++------ arch/riscv/kernel/syscall_table.c | 1 - arch/riscv/kernel/vdso.c | 5 ++++- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index b933b1583c9f..34fbb3ea21d5 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -82,4 +82,5 @@ static inline int syscall_get_arch(struct task_struct *task) #endif } +asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t); #endif /* _ASM_RISCV_SYSCALL_H */ diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index 893e47195e30..a4a979c89ea0 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -16,18 +16,22 @@ #ifdef CONFIG_MMU #include -#include +/* + * All systems with an MMU have a VDSO, but systems without an MMU don't + * support shared libraries and therefor don't have one. + */ +#ifdef CONFIG_MMU -#ifndef CONFIG_GENERIC_TIME_VSYSCALL -struct vdso_data { -}; -#endif +#ifndef __ASSEMBLY__ +#include #define VDSO_SYMBOL(base, name) \ (void __user *)((unsigned long)(base) + __vdso_##name##_offset) #endif /* CONFIG_MMU */ -asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t); +#endif /* !__ASSEMBLY__ */ + +#endif /* CONFIG_MMU */ #endif /* _ASM_RISCV_VDSO_H */ diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c index a63c667c27b3..44b1420a2270 100644 --- a/arch/riscv/kernel/syscall_table.c +++ b/arch/riscv/kernel/syscall_table.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #undef __SYSCALL diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 25a3b8849599..72e93d218335 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -12,10 +12,13 @@ #include #include #include +#include + #ifdef CONFIG_GENERIC_TIME_VSYSCALL #include #else -#include +struct vdso_data { +}; #endif extern char vdso_start[], vdso_end[]; From 78a743cd82a35ca0724179fc22834f06a2151fc2 Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Wed, 1 Sep 2021 02:46:20 +0000 Subject: [PATCH 2/6] riscv/vdso: Move vdso data page up front As commit 601255ae3c98 ("arm64: vdso: move data page before code pages"), the same issue exists on riscv, testcase is shown below, make sure that vdso.so is bigger than page size, struct timespec tp; clock_gettime(5, &tp); printf("tv_sec: %ld, tv_nsec: %ld\n", tp.tv_sec, tp.tv_nsec); without this patch, test result : tv_sec: 0, tv_nsec: 0 with this patch, test result : tv_sec: 1629271537, tv_nsec: 748000000 Move the vdso data page in front of the VDSO area to fix the issue. Fixes: ad5d1122b82fb ("riscv: use vDSO common flow to reduce the latency of the time-related functions") Signed-off-by: Tong Tiangen Reviewed-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/vdso.h | 2 ++ arch/riscv/kernel/vdso.c | 48 ++++++++++++++++++------------- arch/riscv/kernel/vdso/vdso.lds.S | 3 +- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index a4a979c89ea0..208e31bc5d1c 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -22,6 +22,8 @@ */ #ifdef CONFIG_MMU +#define __VVAR_PAGES 1 + #ifndef __ASSEMBLY__ #include diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 72e93d218335..e7bd92d8749b 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -23,6 +23,13 @@ struct vdso_data { extern char vdso_start[], vdso_end[]; +enum vvar_pages { + VVAR_DATA_PAGE_OFFSET, + VVAR_NR_PAGES, +}; + +#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT) + static unsigned int vdso_pages __ro_after_init; static struct page **vdso_pagelist __ro_after_init; @@ -41,7 +48,7 @@ static int __init vdso_init(void) vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; vdso_pagelist = - kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL); + kcalloc(vdso_pages + VVAR_NR_PAGES, sizeof(struct page *), GFP_KERNEL); if (unlikely(vdso_pagelist == NULL)) { pr_err("vdso: pagelist allocation failed\n"); return -ENOMEM; @@ -66,7 +73,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, unsigned long vdso_base, vdso_len; int ret; - vdso_len = (vdso_pages + 1) << PAGE_SHIFT; + BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + + vdso_len = (vdso_pages + VVAR_NR_PAGES) << PAGE_SHIFT; mmap_write_lock(mm); vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0); @@ -75,29 +84,28 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, goto end; } + mm->context.vdso = NULL; + ret = install_special_mapping(mm, vdso_base, VVAR_SIZE, + (VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]); + if (unlikely(ret)) + goto end; + + ret = + install_special_mapping(mm, vdso_base + VVAR_SIZE, + vdso_pages << PAGE_SHIFT, + (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC), + vdso_pagelist); + + if (unlikely(ret)) + goto end; + /* * Put vDSO base into mm struct. We need to do this before calling * install_special_mapping or the perf counter mmap tracking code * will fail to recognise it as a vDSO (since arch_vma_name fails). */ - mm->context.vdso = (void *)vdso_base; + mm->context.vdso = (void *)vdso_base + VVAR_SIZE; - ret = - install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, - (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC), - vdso_pagelist); - - if (unlikely(ret)) { - mm->context.vdso = NULL; - goto end; - } - - vdso_base += (vdso_pages << PAGE_SHIFT); - ret = install_special_mapping(mm, vdso_base, PAGE_SIZE, - (VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]); - - if (unlikely(ret)) - mm->context.vdso = NULL; end: mmap_write_unlock(mm); return ret; @@ -108,7 +116,7 @@ const char *arch_vma_name(struct vm_area_struct *vma) if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso)) return "[vdso]"; if (vma->vm_mm && (vma->vm_start == - (long)vma->vm_mm->context.vdso + PAGE_SIZE)) + (long)vma->vm_mm->context.vdso - VVAR_SIZE)) return "[vdso_data]"; return NULL; } diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index e6f558bca71b..e9111f700af0 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -3,12 +3,13 @@ * Copyright (C) 2012 Regents of the University of California */ #include +#include OUTPUT_ARCH(riscv) SECTIONS { - PROVIDE(_vdso_data = . + PAGE_SIZE); + PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); . = SIZEOF_HEADERS; .hash : { *(.hash) } :text From 8bb0ab3ae7a4dbe6cf32deb830cf2bdbf5736867 Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Wed, 1 Sep 2021 02:46:21 +0000 Subject: [PATCH 3/6] riscv/vdso: make arch_setup_additional_pages wait for mmap_sem for write killable riscv architectures relying on mmap_sem for write in their arch_setup_additional_pages. If the waiting task gets killed by the oom killer it would block oom_reaper from asynchronous address space reclaim and reduce the chances of timely OOM resolving. Wait for the lock in the killable mode and return with EINTR if the task got killed while waiting. Signed-off-by: Tong Tiangen Reviewed-by: Kefeng Wang Fixes: 76d2a0493a17 ("RISC-V: Init and Halt Code") Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index e7bd92d8749b..b70956d80408 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -77,7 +77,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, vdso_len = (vdso_pages + VVAR_NR_PAGES) << PAGE_SHIFT; - mmap_write_lock(mm); + if (mmap_write_lock_killable(mm)) + return -EINTR; + vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0); if (IS_ERR_VALUE(vdso_base)) { ret = vdso_base; From 59a4e0d5511ba61353ea9a4efdb1b86c23ecf134 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Sat, 2 Oct 2021 17:21:20 -0700 Subject: [PATCH 4/6] RISC-V: Include clone3() on rv32 As far as I can tell this should be enabled on rv32 as well, I'm not sure why it's rv64-only. checksyscalls is complaining about our lack of clone3() on rv32. Fixes: 56ac5e213933 ("riscv: enable sys_clone3 syscall for rv64") Signed-off-by: Palmer Dabbelt Reviewed-by: Arnd Bergmann Acked-by: Christian Brauner Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/unistd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h index 4b989ae15d59..8062996c2dfd 100644 --- a/arch/riscv/include/uapi/asm/unistd.h +++ b/arch/riscv/include/uapi/asm/unistd.h @@ -18,9 +18,10 @@ #ifdef __LP64__ #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SET_GET_RLIMIT -#define __ARCH_WANT_SYS_CLONE3 #endif /* __LP64__ */ +#define __ARCH_WANT_SYS_CLONE3 + #include /* From bb8958d5dc79acbd071397abb57b8756375fe1ce Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sat, 18 Sep 2021 18:02:21 +0200 Subject: [PATCH 5/6] riscv: Flush current cpu icache before other cpus On SiFive Unmatched, I recently fell onto the following BUG when booting: [ 0.000000] ftrace: allocating 36610 entries in 144 pages [ 0.000000] Oops - illegal instruction [#1] [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.13.1+ #5 [ 0.000000] Hardware name: SiFive HiFive Unmatched A00 (DT) [ 0.000000] epc : riscv_cpuid_to_hartid_mask+0x6/0xae [ 0.000000] ra : __sbi_rfence_v02+0xc8/0x10a [ 0.000000] epc : ffffffff80007240 ra : ffffffff80009964 sp : ffffffff81803e10 [ 0.000000] gp : ffffffff81a1ea70 tp : ffffffff8180f500 t0 : ffffffe07fe30000 [ 0.000000] t1 : 0000000000000004 t2 : 0000000000000000 s0 : ffffffff81803e60 [ 0.000000] s1 : 0000000000000000 a0 : ffffffff81a22238 a1 : ffffffff81803e10 [ 0.000000] a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000 [ 0.000000] a5 : 0000000000000000 a6 : ffffffff8000989c a7 : 0000000052464e43 [ 0.000000] s2 : ffffffff81a220c8 s3 : 0000000000000000 s4 : 0000000000000000 [ 0.000000] s5 : 0000000000000000 s6 : 0000000200000100 s7 : 0000000000000001 [ 0.000000] s8 : ffffffe07fe04040 s9 : ffffffff81a22c80 s10: 0000000000001000 [ 0.000000] s11: 0000000000000004 t3 : 0000000000000001 t4 : 0000000000000008 [ 0.000000] t5 : ffffffcf04000808 t6 : ffffffe3ffddf188 [ 0.000000] status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000002 [ 0.000000] [] riscv_cpuid_to_hartid_mask+0x6/0xae [ 0.000000] [] sbi_remote_fence_i+0x1e/0x26 [ 0.000000] [] flush_icache_all+0x12/0x1a [ 0.000000] [] patch_text_nosync+0x26/0x32 [ 0.000000] [] ftrace_init_nop+0x52/0x8c [ 0.000000] [] ftrace_process_locs.isra.0+0x29c/0x360 [ 0.000000] [] ftrace_init+0x80/0x130 [ 0.000000] [] start_kernel+0x5c4/0x8f6 [ 0.000000] ---[ end trace f67eb9af4d8d492b ]--- [ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task! [ 0.000000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]--- While ftrace is looping over a list of addresses to patch, it always failed when patching the same function: riscv_cpuid_to_hartid_mask. Looking at the backtrace, the illegal instruction is encountered in this same function. However, patch_text_nosync, after patching the instructions, calls flush_icache_range. But looking at what happens in this function: flush_icache_range -> flush_icache_all -> sbi_remote_fence_i -> __sbi_rfence_v02 -> riscv_cpuid_to_hartid_mask The icache and dcache of the current cpu are never synchronized between the patching of riscv_cpuid_to_hartid_mask and calling this same function. So fix this by flushing the current cpu's icache before asking for the other cpus to do the same. Signed-off-by: Alexandre Ghiti Fixes: fab957c11efe ("RISC-V: Atomic and Locking Code") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/cacheflush.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 094118663285..89f81067e09e 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -16,6 +16,8 @@ static void ipi_remote_fence_i(void *info) void flush_icache_all(void) { + local_flush_icache_all(); + if (IS_ENABLED(CONFIG_RISCV_SBI)) sbi_remote_fence_i(NULL); else From 3ef6ca4f354c53abf263cbeb51e7272523c294d8 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Sat, 2 Oct 2021 16:57:13 -0700 Subject: [PATCH 6/6] checksyscalls: Unconditionally ignore fstat{,at}64 These can be replaced by statx(). Since rv32 has a 64-bit time_t we just never ended up with them in the first place. This is now an error due to -Werror. Suggested-by: Arnd Bergmann Reviewed-by: Arnd Bergmann Signed-off-by: Palmer Dabbelt --- scripts/checksyscalls.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh index fd9777f63f14..9dbab13329fa 100755 --- a/scripts/checksyscalls.sh +++ b/scripts/checksyscalls.sh @@ -82,10 +82,8 @@ cat << EOF #define __IGNORE_truncate64 #define __IGNORE_stat64 #define __IGNORE_lstat64 -#define __IGNORE_fstat64 #define __IGNORE_fcntl64 #define __IGNORE_fadvise64_64 -#define __IGNORE_fstatat64 #define __IGNORE_fstatfs64 #define __IGNORE_statfs64 #define __IGNORE_llseek @@ -253,6 +251,10 @@ cat << EOF #define __IGNORE_getpmsg #define __IGNORE_putpmsg #define __IGNORE_vserver + +/* 64-bit ports never needed these, and new 32-bit ports can use statx */ +#define __IGNORE_fstat64 +#define __IGNORE_fstatat64 EOF }