From 24fc18087f4237d98d892280abe97711e0f4bc9e Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 15 May 2023 11:19:08 +0530 Subject: [PATCH 01/78] riscv: move sbi_init() earlier before jump_label_init() We call jump_label_init() in setup_arch() is to use static key mechanism earlier, but riscv jump label relies on the sbi functions, If we enable static key before sbi_init(), the code path looks like: static_branch_enable() .. arch_jump_label_transform() patch_text_nosync() flush_icache_range() flush_icache_all() sbi_remote_fence_i() for CONFIG_RISCV_SBI case __sbi_rfence() Since sbi isn't initialized, so NULL deference! Here is a typical panic log: [ 0.000000] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 0.000000] Oops [#1] [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.18.0-rc7+ #79 [ 0.000000] Hardware name: riscv-virtio,qemu (DT) [ 0.000000] epc : 0x0 [ 0.000000] ra : sbi_remote_fence_i+0x1e/0x26 [ 0.000000] epc : 0000000000000000 ra : ffffffff80005826 sp : ffffffff80c03d50 [ 0.000000] gp : ffffffff80ca6178 tp : ffffffff80c0ad80 t0 : 6200000000000000 [ 0.000000] t1 : 0000000000000000 t2 : 62203a6b746e6972 s0 : ffffffff80c03d60 [ 0.000000] s1 : ffffffff80001af6 a0 : 0000000000000000 a1 : 0000000000000000 [ 0.000000] a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000 [ 0.000000] a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000000080200 [ 0.000000] s2 : ffffffff808b3e48 s3 : ffffffff808bf698 s4 : ffffffff80cb2818 [ 0.000000] s5 : 0000000000000001 s6 : ffffffff80c9c345 s7 : ffffffff80895aa0 [ 0.000000] s8 : 0000000000000001 s9 : 000000000000007f s10: 0000000000000000 [ 0.000000] s11: 0000000000000000 t3 : ffffffff80824d08 t4 : 0000000000000022 [ 0.000000] t5 : 000000000000003d t6 : 0000000000000000 [ 0.000000] status: 0000000000000100 badaddr: 0000000000000000 cause: 000000000000000c [ 0.000000] ---[ end trace 0000000000000000 ]--- [ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task! [ 0.000000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]--- Fix this issue by moving sbi_init() earlier before jump_label_init() Signed-off-by: Jisheng Zhang Reviewed-by: Anup Patel Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20230515054928.2079268-2-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 36b026057503..9fb839074e16 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -270,6 +270,7 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; early_ioremap_setup(); + sbi_init(); jump_label_init(); parse_early_param(); @@ -283,7 +284,6 @@ void __init setup_arch(char **cmdline_p) misc_mem_init(); init_resources(); - sbi_init(); #ifdef CONFIG_KASAN kasan_init(); From 7f2e20459b281449b0228338d0dd5b044bc55eb6 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:09 +0530 Subject: [PATCH 02/78] platform/surface: Disable for RISC-V With CONFIG_ACPI enabled for RISC-V, this driver gets enabled in allmodconfig build. However, RISC-V doesn't support sub-word atomics which is used by this driver and hence allmodconfig build will fail. There is currently no plan to support this driver for RISC-V. So, disable this driver for RISC-V even when ACPI is enabled. Signed-off-by: Sunil V L Acked-by: Maximilian Luz Link: https://lore.kernel.org/r/20230515054928.2079268-3-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- drivers/platform/surface/aggregator/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/surface/aggregator/Kconfig b/drivers/platform/surface/aggregator/Kconfig index c114f9dd5fe1..88afc38ffdc5 100644 --- a/drivers/platform/surface/aggregator/Kconfig +++ b/drivers/platform/surface/aggregator/Kconfig @@ -4,7 +4,7 @@ menuconfig SURFACE_AGGREGATOR tristate "Microsoft Surface System Aggregator Module Subsystem and Drivers" depends on SERIAL_DEV_BUS - depends on ACPI + depends on ACPI && !RISCV select CRC_CCITT help The Surface System Aggregator Module (Surface SAM or SSAM) is an From fbb995a7b27c72d83963bf43ed76dcaf0449a2a9 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:10 +0530 Subject: [PATCH 03/78] crypto: hisilicon/qm: Fix to enable build with RISC-V clang With CONFIG_ACPI enabled for RISC-V, this driver gets enabled in allmodconfig build. However, build fails with clang and below error is seen. drivers/crypto/hisilicon/qm.c:627:10: error: invalid output constraint '+Q' in asm "+Q" (*((char __iomem *)fun_base)) ^ This is expected error with clang due to the way it is designed. To fix this issue, move arm64 assembly code under #if. Link: https://github.com/ClangBuiltLinux/linux/issues/999 Signed-off-by: Nathan Chancellor [sunilvl@ventanamicro.com: Moved tmp0 and tmp1 into the #if] Signed-off-by: Sunil V L Acked-by: Herbert Xu Link: https://lore.kernel.org/r/20230515054928.2079268-4-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- drivers/crypto/hisilicon/qm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index ad0c042b5e66..edc6fd44e7ca 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -610,7 +610,10 @@ EXPORT_SYMBOL_GPL(hisi_qm_wait_mb_ready); static void qm_mb_write(struct hisi_qm *qm, const void *src) { void __iomem *fun_base = qm->io_base + QM_MB_CMD_SEND_BASE; + +#if IS_ENABLED(CONFIG_ARM64) unsigned long tmp0 = 0, tmp1 = 0; +#endif if (!IS_ENABLED(CONFIG_ARM64)) { memcpy_toio(fun_base, src, 16); @@ -618,6 +621,7 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src) return; } +#if IS_ENABLED(CONFIG_ARM64) asm volatile("ldp %0, %1, %3\n" "stp %0, %1, %2\n" "dmb oshst\n" @@ -626,6 +630,7 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src) "+Q" (*((char __iomem *)fun_base)) : "Q" (*((char *)src)) : "memory"); +#endif } static int qm_mb_nolock(struct hisi_qm *qm, struct qm_mailbox *mailbox) From 4d02d88d2b922807307a3574a7b401dcccb870d1 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:11 +0530 Subject: [PATCH 04/78] ACPI: tables: Print RINTC information when MADT is parsed When MADT is parsed, print RINTC information as below: ACPI: RISC-V INTC (acpi_uid[0x0000] hart_id[0x0] enabled) ACPI: RISC-V INTC (acpi_uid[0x0001] hart_id[0x1] enabled) ... ACPI: RISC-V INTC (acpi_uid[0x000f] hart_id[0xf] enabled) This debug information will be very helpful during bring up. Signed-off-by: Sunil V L Acked-by: Rafael J. Wysocki Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20230515054928.2079268-5-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- drivers/acpi/tables.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 7b4680da57d7..8ab0a82b4da4 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -220,6 +220,16 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header) } break; + case ACPI_MADT_TYPE_RINTC: + { + struct acpi_madt_rintc *p = (struct acpi_madt_rintc *)header; + + pr_debug("RISC-V INTC (acpi_uid[0x%04x] hart_id[0x%llx] %s)\n", + p->uid, p->hart_id, + (p->flags & ACPI_MADT_ENABLED) ? "enabled" : "disabled"); + } + break; + default: pr_warn("Found unsupported MADT entry (type = 0x%x)\n", header->type); From 214c236223b8449177a7e4a4c49dd65892f6cd59 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:12 +0530 Subject: [PATCH 05/78] ACPI: OSL: Make should_use_kmap() 0 for RISC-V Without this, if the tables are larger than 4K, acpi_map() will fail. Signed-off-by: Sunil V L Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230515054928.2079268-6-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- drivers/acpi/osl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 3269a888fb7a..f725813d0cce 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -276,7 +276,7 @@ acpi_map_lookup_virt(void __iomem *virt, acpi_size size) return NULL; } -#if defined(CONFIG_IA64) || defined(CONFIG_ARM64) +#if defined(CONFIG_IA64) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV) /* ioremap will take care of cache attributes */ #define should_use_kmap(pfn) 0 #else From a91a9ffbd3a55a0ae1bb75e2b6e85b2a03f64e8f Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:13 +0530 Subject: [PATCH 06/78] RISC-V: Add support to build the ACPI core Enable ACPI core for RISC-V after adding architecture-specific interfaces and header files required to build the ACPI core. 1) Couple of header files are required unconditionally by the ACPI core. Add empty acenv.h and cpu.h header files. 2) If CONFIG_PCI is enabled, a few PCI related interfaces need to be provided by the architecture. Define dummy interfaces for now so that build succeeds. Actual implementation will be added when PCI support is added for ACPI along with external interrupt controller support. 3) A few globals and memory mapping related functions specific to the architecture need to be provided. Signed-off-by: Sunil V L Acked-by: Rafael J. Wysocki Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230515054928.2079268-7-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 5 +++ arch/riscv/include/asm/acenv.h | 11 +++++ arch/riscv/include/asm/acpi.h | 61 ++++++++++++++++++++++++++ arch/riscv/include/asm/cpu.h | 8 ++++ arch/riscv/kernel/Makefile | 1 + arch/riscv/kernel/acpi.c | 80 ++++++++++++++++++++++++++++++++++ 6 files changed, 166 insertions(+) create mode 100644 arch/riscv/include/asm/acenv.h create mode 100644 arch/riscv/include/asm/acpi.h create mode 100644 arch/riscv/include/asm/cpu.h create mode 100644 arch/riscv/kernel/acpi.c diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 348c0fa1fc8c..491ecd7d2336 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -12,6 +12,8 @@ config 32BIT config RISCV def_bool y + select ACPI_GENERIC_GSI if ACPI + select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ARCH_DMA_DEFAULT_COHERENT select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 @@ -707,6 +709,7 @@ config EFI depends on OF && !XIP_KERNEL depends on MMU default y + select ARCH_SUPPORTS_ACPI if 64BIT select EFI_GENERIC_STUB select EFI_PARAMS_FROM_FDT select EFI_RUNTIME_WRAPPERS @@ -816,3 +819,5 @@ source "drivers/cpufreq/Kconfig" endmenu # "CPU Power Management" source "arch/riscv/kvm/Kconfig" + +source "drivers/acpi/Kconfig" diff --git a/arch/riscv/include/asm/acenv.h b/arch/riscv/include/asm/acenv.h new file mode 100644 index 000000000000..43ae2e32c779 --- /dev/null +++ b/arch/riscv/include/asm/acenv.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * RISC-V specific ACPICA environments and implementation + */ + +#ifndef _ASM_ACENV_H +#define _ASM_ACENV_H + +/* This header is required unconditionally by the ACPI core */ + +#endif /* _ASM_ACENV_H */ diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h new file mode 100644 index 000000000000..bcade255bd6e --- /dev/null +++ b/arch/riscv/include/asm/acpi.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2013-2014, Linaro Ltd. + * Author: Al Stone + * Author: Graeme Gregory + * Author: Hanjun Guo + * + * Copyright (C) 2021-2023, Ventana Micro Systems Inc. + * Author: Sunil V L + */ + +#ifndef _ASM_ACPI_H +#define _ASM_ACPI_H + +/* Basic configuration for ACPI */ +#ifdef CONFIG_ACPI + +/* ACPI table mapping after acpi_permanent_mmap is set */ +void *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); +#define acpi_os_ioremap acpi_os_ioremap + +#define acpi_strict 1 /* No out-of-spec workarounds on RISC-V */ +extern int acpi_disabled; +extern int acpi_noirq; +extern int acpi_pci_disabled; + +static inline void disable_acpi(void) +{ + acpi_disabled = 1; + acpi_pci_disabled = 1; + acpi_noirq = 1; +} + +static inline void enable_acpi(void) +{ + acpi_disabled = 0; + acpi_pci_disabled = 0; + acpi_noirq = 0; +} + +/* + * The ACPI processor driver for ACPI core code needs this macro + * to find out whether this cpu was already mapped (mapping from CPU hardware + * ID to CPU logical ID) or not. + */ +#define cpu_physical_id(cpu) cpuid_to_hartid_map(cpu) + +/* + * Since MADT must provide at least one RINTC structure, the + * CPU will be always available in MADT on RISC-V. + */ +static inline bool acpi_has_cpu_in_madt(void) +{ + return true; +} + +static inline void arch_fix_phys_package_id(int num, u32 slot) { } + +#endif /* CONFIG_ACPI */ + +#endif /*_ASM_ACPI_H*/ diff --git a/arch/riscv/include/asm/cpu.h b/arch/riscv/include/asm/cpu.h new file mode 100644 index 000000000000..28d45a6678ce --- /dev/null +++ b/arch/riscv/include/asm/cpu.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_CPU_H +#define _ASM_CPU_H + +/* This header is required unconditionally by the ACPI core */ + +#endif /* _ASM_CPU_H */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index fbdccc21418a..ed5fcd90036e 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -92,3 +92,4 @@ obj-$(CONFIG_COMPAT) += compat_signal.o obj-$(CONFIG_COMPAT) += compat_vdso/ obj-$(CONFIG_64BIT) += pi/ +obj-$(CONFIG_ACPI) += acpi.o diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c new file mode 100644 index 000000000000..81d448c41714 --- /dev/null +++ b/arch/riscv/kernel/acpi.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * RISC-V Specific Low-Level ACPI Boot Support + * + * Copyright (C) 2013-2014, Linaro Ltd. + * Author: Al Stone + * Author: Graeme Gregory + * Author: Hanjun Guo + * Author: Tomasz Nowicki + * Author: Naresh Bhat + * + * Copyright (C) 2021-2023, Ventana Micro Systems Inc. + * Author: Sunil V L + */ + +#include +#include +#include + +int acpi_noirq = 1; /* skip ACPI IRQ initialization */ +int acpi_disabled = 1; +EXPORT_SYMBOL(acpi_disabled); + +int acpi_pci_disabled = 1; /* skip ACPI PCI scan and IRQ initialization */ +EXPORT_SYMBOL(acpi_pci_disabled); + +/* + * __acpi_map_table() will be called before paging_init(), so early_ioremap() + * or early_memremap() should be called here to for ACPI table mapping. + */ +void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size) +{ + if (!size) + return NULL; + + return early_memremap(phys, size); +} + +void __init __acpi_unmap_table(void __iomem *map, unsigned long size) +{ + if (!map || !size) + return; + + early_memunmap(map, size); +} + +void *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) +{ + return memremap(phys, size, MEMREMAP_WB); +} + +#ifdef CONFIG_PCI + +/* + * These interfaces are defined just to enable building ACPI core. + * TODO: Update it with actual implementation when external interrupt + * controller support is added in RISC-V ACPI. + */ +int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, + int reg, int len, u32 *val) +{ + return PCIBIOS_DEVICE_NOT_FOUND; +} + +int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, + int reg, int len, u32 val) +{ + return PCIBIOS_DEVICE_NOT_FOUND; +} + +int acpi_pci_bus_find_domain_nr(struct pci_bus *bus) +{ + return -1; +} + +struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) +{ + return NULL; +} +#endif /* CONFIG_PCI */ From 8b7809e289524e02f8f0755ca632ea9e9aefbd0e Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:14 +0530 Subject: [PATCH 07/78] ACPI: processor_core: RISC-V: Enable mapping processor to the hartid processor_core needs arch-specific functions to map the ACPI ID to the physical ID. In RISC-V platforms, hartid is the physical id and RINTC structure in MADT provides this mapping. Add arch-specific function to get this mapping from RINTC. Signed-off-by: Sunil V L Acked-by: Rafael J. Wysocki Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20230515054928.2079268-8-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/acpi.h | 3 +++ drivers/acpi/processor_core.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h index bcade255bd6e..9be52b6ffae1 100644 --- a/arch/riscv/include/asm/acpi.h +++ b/arch/riscv/include/asm/acpi.h @@ -15,6 +15,9 @@ /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI +typedef u64 phys_cpuid_t; +#define PHYS_CPUID_INVALID INVALID_HARTID + /* ACPI table mapping after acpi_permanent_mmap is set */ void *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); #define acpi_os_ioremap acpi_os_ioremap diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 2ac48cda5b20..d6606a9f2da6 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -106,6 +106,32 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry, return -EINVAL; } +/* + * Retrieve the RISC-V hartid for the processor + */ +static int map_rintc_hartid(struct acpi_subtable_header *entry, + int device_declaration, u32 acpi_id, + phys_cpuid_t *hartid) +{ + struct acpi_madt_rintc *rintc = + container_of(entry, struct acpi_madt_rintc, header); + + if (!(rintc->flags & ACPI_MADT_ENABLED)) + return -ENODEV; + + /* device_declaration means Device object in DSDT, in the + * RISC-V, logical processors are required to + * have a Processor Device object in the DSDT, so we should + * check device_declaration here + */ + if (device_declaration && rintc->uid == acpi_id) { + *hartid = rintc->hart_id; + return 0; + } + + return -EINVAL; +} + static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt, int type, u32 acpi_id) { @@ -136,6 +162,9 @@ static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt, } else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) { if (!map_gicc_mpidr(header, type, acpi_id, &phys_id)) break; + } else if (header->type == ACPI_MADT_TYPE_RINTC) { + if (!map_rintc_hartid(header, type, acpi_id, &phys_id)) + break; } entry += header->length; } From 724f4c0df7665a1bb9cb105a20131dfca5c032dd Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:15 +0530 Subject: [PATCH 08/78] RISC-V: Add ACPI initialization in setup_arch() Initialize the ACPI core for RISC-V during boot. ACPI tables and interpreter are initialized based on the information passed from the firmware and the value of the kernel parameter 'acpi'. With ACPI support added for RISC-V, the kernel parameter 'acpi' is also supported on RISC-V. Hence, update the documentation. Signed-off-by: Sunil V L Acked-by: Rafael J. Wysocki Reviewed-by: Andrew Jones Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20230515054928.2079268-9-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- .../admin-guide/kernel-parameters.txt | 8 +- arch/riscv/kernel/acpi.c | 126 ++++++++++++++++++ arch/riscv/kernel/setup.c | 5 + 3 files changed, 135 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9e5bab29685f..d910fba25f2c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1,17 +1,17 @@ - acpi= [HW,ACPI,X86,ARM64] + acpi= [HW,ACPI,X86,ARM64,RISCV64] Advanced Configuration and Power Interface Format: { force | on | off | strict | noirq | rsdt | copy_dsdt } force -- enable ACPI if default was off - on -- enable ACPI but allow fallback to DT [arm64] + on -- enable ACPI but allow fallback to DT [arm64,riscv64] off -- disable ACPI if default was on noirq -- do not use ACPI for IRQ routing strict -- Be less tolerant of platforms that are not strictly ACPI specification compliant. rsdt -- prefer RSDT over (default) XSDT copy_dsdt -- copy DSDT to memory - For ARM64, ONLY "acpi=off", "acpi=on" or "acpi=force" - are available + For ARM64 and RISCV64, ONLY "acpi=off", "acpi=on" or + "acpi=force" are available See also Documentation/power/runtime_pm.rst, pci=noacpi diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c index 81d448c41714..7c080c8cbccf 100644 --- a/arch/riscv/kernel/acpi.c +++ b/arch/riscv/kernel/acpi.c @@ -16,6 +16,7 @@ #include #include #include +#include int acpi_noirq = 1; /* skip ACPI IRQ initialization */ int acpi_disabled = 1; @@ -24,6 +25,131 @@ EXPORT_SYMBOL(acpi_disabled); int acpi_pci_disabled = 1; /* skip ACPI PCI scan and IRQ initialization */ EXPORT_SYMBOL(acpi_pci_disabled); +static bool param_acpi_off __initdata; +static bool param_acpi_on __initdata; +static bool param_acpi_force __initdata; + +static int __init parse_acpi(char *arg) +{ + if (!arg) + return -EINVAL; + + /* "acpi=off" disables both ACPI table parsing and interpreter */ + if (strcmp(arg, "off") == 0) + param_acpi_off = true; + else if (strcmp(arg, "on") == 0) /* prefer ACPI over DT */ + param_acpi_on = true; + else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */ + param_acpi_force = true; + else + return -EINVAL; /* Core will print when we return error */ + + return 0; +} +early_param("acpi", parse_acpi); + +/* + * acpi_fadt_sanity_check() - Check FADT presence and carry out sanity + * checks on it + * + * Return 0 on success, <0 on failure + */ +static int __init acpi_fadt_sanity_check(void) +{ + struct acpi_table_header *table; + struct acpi_table_fadt *fadt; + acpi_status status; + int ret = 0; + + /* + * FADT is required on riscv; retrieve it to check its presence + * and carry out revision and ACPI HW reduced compliancy tests + */ + status = acpi_get_table(ACPI_SIG_FADT, 0, &table); + if (ACPI_FAILURE(status)) { + const char *msg = acpi_format_exception(status); + + pr_err("Failed to get FADT table, %s\n", msg); + return -ENODEV; + } + + fadt = (struct acpi_table_fadt *)table; + + /* + * The revision in the table header is the FADT's Major revision. The + * FADT also has a minor revision, which is stored in the FADT itself. + * + * TODO: Currently, we check for 6.5 as the minimum version to check + * for HW_REDUCED flag. However, once RISC-V updates are released in + * the ACPI spec, we need to update this check for exact minor revision + */ + if (table->revision < 6 || (table->revision == 6 && fadt->minor_revision < 5)) + pr_err(FW_BUG "Unsupported FADT revision %d.%d, should be 6.5+\n", + table->revision, fadt->minor_revision); + + if (!(fadt->flags & ACPI_FADT_HW_REDUCED)) { + pr_err("FADT not ACPI hardware reduced compliant\n"); + ret = -EINVAL; + } + + /* + * acpi_get_table() creates FADT table mapping that + * should be released after parsing and before resuming boot + */ + acpi_put_table(table); + return ret; +} + +/* + * acpi_boot_table_init() called from setup_arch(), always. + * 1. find RSDP and get its address, and then find XSDT + * 2. extract all tables and checksums them all + * 3. check ACPI FADT HW reduced flag + * + * We can parse ACPI boot-time tables such as MADT after + * this function is called. + * + * On return ACPI is enabled if either: + * + * - ACPI tables are initialized and sanity checks passed + * - acpi=force was passed in the command line and ACPI was not disabled + * explicitly through acpi=off command line parameter + * + * ACPI is disabled on function return otherwise + */ +void __init acpi_boot_table_init(void) +{ + /* + * Enable ACPI instead of device tree unless + * - ACPI has been disabled explicitly (acpi=off), or + * - firmware has not populated ACPI ptr in EFI system table + * and ACPI has not been [force] enabled (acpi=on|force) + */ + if (param_acpi_off || + (!param_acpi_on && !param_acpi_force && + efi.acpi20 == EFI_INVALID_TABLE_ADDR)) + return; + + /* + * ACPI is disabled at this point. Enable it in order to parse + * the ACPI tables and carry out sanity checks + */ + enable_acpi(); + + /* + * If ACPI tables are initialized and FADT sanity checks passed, + * leave ACPI enabled and carry on booting; otherwise disable ACPI + * on initialization error. + * If acpi=force was passed on the command line it forces ACPI + * to be enabled even if its initialization failed. + */ + if (acpi_table_init() || acpi_fadt_sanity_check()) { + pr_err("Failed to init ACPI tables\n"); + if (!param_acpi_force) + disable_acpi(); + } +} + /* * __acpi_map_table() will be called before paging_init(), so early_ioremap() * or early_memremap() should be called here to for ACPI table mapping. diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 9fb839074e16..45df7cc88b19 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -8,6 +8,7 @@ * Nick Kossifidis */ +#include #include #include #include @@ -276,6 +277,10 @@ void __init setup_arch(char **cmdline_p) efi_init(); paging_init(); + + /* Parse the ACPI tables for possible boot-time configuration */ + acpi_boot_table_init(); + #if IS_ENABLED(CONFIG_BUILTIN_DTB) unflatten_and_copy_device_tree(); #else From f995611994704b5c039731287b897993808e63e3 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:16 +0530 Subject: [PATCH 09/78] RISC-V: ACPI: Cache and retrieve the RINTC structure RINTC structures in the MADT provide mapping between the hartid and the CPU. This is required many times even at run time like cpuinfo. So, instead of parsing the ACPI table every time, cache the RINTC structures and provide a function to get the correct RINTC structure for a given cpu. Signed-off-by: Sunil V L Acked-by: Rafael J. Wysocki Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20230515054928.2079268-10-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/acpi.h | 10 ++++++++ arch/riscv/kernel/acpi.c | 45 +++++++++++++++++++++++++++++++++++ arch/riscv/kernel/setup.c | 4 ++++ 3 files changed, 59 insertions(+) diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h index 9be52b6ffae1..6519529c8bdf 100644 --- a/arch/riscv/include/asm/acpi.h +++ b/arch/riscv/include/asm/acpi.h @@ -59,6 +59,16 @@ static inline bool acpi_has_cpu_in_madt(void) static inline void arch_fix_phys_package_id(int num, u32 slot) { } +void acpi_init_rintc_map(void); +struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu); +u32 get_acpi_id_for_cpu(int cpu); +#else +static inline void acpi_init_rintc_map(void) { } +static inline struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu) +{ + return NULL; +} + #endif /* CONFIG_ACPI */ #endif /*_ASM_ACPI_H*/ diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c index 7c080c8cbccf..df5a45a2eb93 100644 --- a/arch/riscv/kernel/acpi.c +++ b/arch/riscv/kernel/acpi.c @@ -29,6 +29,8 @@ static bool param_acpi_off __initdata; static bool param_acpi_on __initdata; static bool param_acpi_force __initdata; +static struct acpi_madt_rintc cpu_madt_rintc[NR_CPUS]; + static int __init parse_acpi(char *arg) { if (!arg) @@ -150,6 +152,49 @@ void __init acpi_boot_table_init(void) } } +static int acpi_parse_madt_rintc(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_rintc *rintc = (struct acpi_madt_rintc *)header; + int cpuid; + + if (!(rintc->flags & ACPI_MADT_ENABLED)) + return 0; + + cpuid = riscv_hartid_to_cpuid(rintc->hart_id); + /* + * When CONFIG_SMP is disabled, mapping won't be created for + * all cpus. + * CPUs more than num_possible_cpus, will be ignored. + */ + if (cpuid >= 0 && cpuid < num_possible_cpus()) + cpu_madt_rintc[cpuid] = *rintc; + + return 0; +} + +/* + * Instead of parsing (and freeing) the ACPI table, cache + * the RINTC structures since they are frequently used + * like in cpuinfo. + */ +void __init acpi_init_rintc_map(void) +{ + if (acpi_table_parse_madt(ACPI_MADT_TYPE_RINTC, acpi_parse_madt_rintc, 0) <= 0) { + pr_err("No valid RINTC entries exist\n"); + BUG(); + } +} + +struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu) +{ + return &cpu_madt_rintc[cpu]; +} + +u32 get_acpi_id_for_cpu(int cpu) +{ + return acpi_cpu_get_madt_rintc(cpu)->uid; +} + /* * __acpi_map_table() will be called before paging_init(), so early_ioremap() * or early_memremap() should be called here to for ACPI table mapping. diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 45df7cc88b19..2ab4cdaa2e68 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -298,6 +299,9 @@ void __init setup_arch(char **cmdline_p) setup_smp(); #endif + if (!acpi_disabled) + acpi_init_rintc_map(); + riscv_init_cbo_blocksizes(); riscv_fill_hwcap(); apply_boot_alternatives(); From e6b9d8eddb1772d99a676a906d42865293934edd Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:17 +0530 Subject: [PATCH 10/78] drivers/acpi: RISC-V: Add RHCT related code RHCT is a new table defined for RISC-V to communicate the features of the CPU to the OS. Create a new architecture folder in drivers/acpi and add RHCT parsing code. Signed-off-by: Sunil V L Acked-by: Rafael J. Wysocki Reviewed-by: Andrew Jones Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230515054928.2079268-11-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/acpi.h | 8 ++++ drivers/acpi/Makefile | 2 + drivers/acpi/riscv/Makefile | 2 + drivers/acpi/riscv/rhct.c | 83 +++++++++++++++++++++++++++++++++++ 4 files changed, 95 insertions(+) create mode 100644 drivers/acpi/riscv/Makefile create mode 100644 drivers/acpi/riscv/rhct.c diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h index 6519529c8bdf..39471759bec1 100644 --- a/arch/riscv/include/asm/acpi.h +++ b/arch/riscv/include/asm/acpi.h @@ -62,6 +62,8 @@ static inline void arch_fix_phys_package_id(int num, u32 slot) { } void acpi_init_rintc_map(void); struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu); u32 get_acpi_id_for_cpu(int cpu); +int acpi_get_riscv_isa(struct acpi_table_header *table, + unsigned int cpu, const char **isa); #else static inline void acpi_init_rintc_map(void) { } static inline struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu) @@ -69,6 +71,12 @@ static inline struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu) return NULL; } +static inline int acpi_get_riscv_isa(struct acpi_table_header *table, + unsigned int cpu, const char **isa) +{ + return -EINVAL; +} + #endif /* CONFIG_ACPI */ #endif /*_ASM_ACPI_H*/ diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index feb36c0b9446..3fc5a0d54f6e 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -131,3 +131,5 @@ obj-y += dptf/ obj-$(CONFIG_ARM64) += arm64/ obj-$(CONFIG_ACPI_VIOT) += viot.o + +obj-$(CONFIG_RISCV) += riscv/ diff --git a/drivers/acpi/riscv/Makefile b/drivers/acpi/riscv/Makefile new file mode 100644 index 000000000000..8b3b126e0b94 --- /dev/null +++ b/drivers/acpi/riscv/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-y += rhct.o diff --git a/drivers/acpi/riscv/rhct.c b/drivers/acpi/riscv/rhct.c new file mode 100644 index 000000000000..b280b3e9c7d9 --- /dev/null +++ b/drivers/acpi/riscv/rhct.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022-2023, Ventana Micro Systems Inc + * Author: Sunil V L + * + */ + +#define pr_fmt(fmt) "ACPI: RHCT: " fmt + +#include + +static struct acpi_table_header *acpi_get_rhct(void) +{ + static struct acpi_table_header *rhct; + acpi_status status; + + /* + * RHCT will be used at runtime on every CPU, so we + * don't need to call acpi_put_table() to release the table mapping. + */ + if (!rhct) { + status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct); + if (ACPI_FAILURE(status)) { + pr_warn_once("No RHCT table found\n"); + return NULL; + } + } + + return rhct; +} + +/* + * During early boot, the caller should call acpi_get_table() and pass its pointer to + * these functions(and free up later). At run time, since this table can be used + * multiple times, NULL may be passed in order to use the cached table. + */ +int acpi_get_riscv_isa(struct acpi_table_header *table, unsigned int cpu, const char **isa) +{ + struct acpi_rhct_node_header *node, *ref_node, *end; + u32 size_hdr = sizeof(struct acpi_rhct_node_header); + u32 size_hartinfo = sizeof(struct acpi_rhct_hart_info); + struct acpi_rhct_hart_info *hart_info; + struct acpi_rhct_isa_string *isa_node; + struct acpi_table_rhct *rhct; + u32 *hart_info_node_offset; + u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu); + + BUG_ON(acpi_disabled); + + if (!table) { + rhct = (struct acpi_table_rhct *)acpi_get_rhct(); + if (!rhct) + return -ENOENT; + } else { + rhct = (struct acpi_table_rhct *)table; + } + + end = ACPI_ADD_PTR(struct acpi_rhct_node_header, rhct, rhct->header.length); + + for (node = ACPI_ADD_PTR(struct acpi_rhct_node_header, rhct, rhct->node_offset); + node < end; + node = ACPI_ADD_PTR(struct acpi_rhct_node_header, node, node->length)) { + if (node->type == ACPI_RHCT_NODE_TYPE_HART_INFO) { + hart_info = ACPI_ADD_PTR(struct acpi_rhct_hart_info, node, size_hdr); + hart_info_node_offset = ACPI_ADD_PTR(u32, hart_info, size_hartinfo); + if (acpi_cpu_id != hart_info->uid) + continue; + + for (int i = 0; i < hart_info->num_offsets; i++) { + ref_node = ACPI_ADD_PTR(struct acpi_rhct_node_header, + rhct, hart_info_node_offset[i]); + if (ref_node->type == ACPI_RHCT_NODE_TYPE_ISA_STRING) { + isa_node = ACPI_ADD_PTR(struct acpi_rhct_isa_string, + ref_node, size_hdr); + *isa = isa_node->isa; + return 0; + } + } + } + } + + return -1; +} From 61946127ab49d0bb47786b8de2aff73a051e054f Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:18 +0530 Subject: [PATCH 11/78] RISC-V: smpboot: Create wrapper setup_smp() setup_smp() currently assumes DT-based platforms. To enable ACPI, first make this a wrapper function and move existing code to a separate DT-specific function. Signed-off-by: Sunil V L Acked-by: Rafael J. Wysocki Reviewed-by: Conor Dooley Reviewed-by: Andrew Jones Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230515054928.2079268-12-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/smpboot.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 445a4efee267..a2e66126b733 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -70,7 +70,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } } -void __init setup_smp(void) +static void __init of_parse_and_init_cpus(void) { struct device_node *dn; unsigned long hart; @@ -116,6 +116,11 @@ void __init setup_smp(void) } } +void __init setup_smp(void) +{ + of_parse_and_init_cpus(); +} + static int start_secondary_cpu(int cpu, struct task_struct *tidle) { if (cpu_ops[cpu]->cpu_start) From ce92546cd63779445d205e3153defedacf8b08c6 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:19 +0530 Subject: [PATCH 12/78] RISC-V: smpboot: Add ACPI support in setup_smp() Enable SMP boot on ACPI based platforms by using the RINTC structures in the MADT table. Signed-off-by: Sunil V L Acked-by: Rafael J. Wysocki Reviewed-by: Conor Dooley Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20230515054928.2079268-13-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/acpi.h | 2 + arch/riscv/kernel/smpboot.c | 72 ++++++++++++++++++++++++++++++++++- 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h index 39471759bec1..f71ce21ff684 100644 --- a/arch/riscv/include/asm/acpi.h +++ b/arch/riscv/include/asm/acpi.h @@ -64,6 +64,8 @@ struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu); u32 get_acpi_id_for_cpu(int cpu); int acpi_get_riscv_isa(struct acpi_table_header *table, unsigned int cpu, const char **isa); + +static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; } #else static inline void acpi_init_rintc_map(void) { } static inline struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu) diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index a2e66126b733..67bc5ef3e8b2 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -8,6 +8,7 @@ * Copyright (C) 2017 SiFive */ +#include #include #include #include @@ -70,6 +71,72 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } } +#ifdef CONFIG_ACPI +static unsigned int cpu_count = 1; + +static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const unsigned long end) +{ + unsigned long hart; + static bool found_boot_cpu; + struct acpi_madt_rintc *processor = (struct acpi_madt_rintc *)header; + + /* + * Each RINTC structure in MADT will have a flag. If ACPI_MADT_ENABLED + * bit in the flag is not enabled, it means OS should not try to enable + * the cpu to which RINTC belongs. + */ + if (!(processor->flags & ACPI_MADT_ENABLED)) + return 0; + + if (BAD_MADT_ENTRY(processor, end)) + return -EINVAL; + + acpi_table_print_madt_entry(&header->common); + + hart = processor->hart_id; + if (hart == INVALID_HARTID) { + pr_warn("Invalid hartid\n"); + return 0; + } + + if (hart == cpuid_to_hartid_map(0)) { + BUG_ON(found_boot_cpu); + found_boot_cpu = true; + early_map_cpu_to_node(0, acpi_numa_get_nid(cpu_count)); + return 0; + } + + if (cpu_count >= NR_CPUS) { + pr_warn("NR_CPUS is too small for the number of ACPI tables.\n"); + return 0; + } + + cpuid_to_hartid_map(cpu_count) = hart; + early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count)); + cpu_count++; + + return 0; +} + +static void __init acpi_parse_and_init_cpus(void) +{ + int cpuid; + + cpu_set_ops(0); + + acpi_table_parse_madt(ACPI_MADT_TYPE_RINTC, acpi_parse_rintc, 0); + + for (cpuid = 1; cpuid < nr_cpu_ids; cpuid++) { + if (cpuid_to_hartid_map(cpuid) != INVALID_HARTID) { + cpu_set_ops(cpuid); + set_cpu_possible(cpuid, true); + } + } +} +#else +#define acpi_parse_and_init_cpus(...) do { } while (0) +#endif + static void __init of_parse_and_init_cpus(void) { struct device_node *dn; @@ -118,7 +185,10 @@ static void __init of_parse_and_init_cpus(void) void __init setup_smp(void) { - of_parse_and_init_cpus(); + if (acpi_disabled) + of_parse_and_init_cpus(); + else + acpi_parse_and_init_cpus(); } static int start_secondary_cpu(int cpu, struct task_struct *tidle) From 914d6f44fc50744163e9bba7178644231f77a46a Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:20 +0530 Subject: [PATCH 13/78] RISC-V: only iterate over possible CPUs in ISA string parser During boot we call riscv_of_processor_hartid() for each hart that we add to the possible cpus list. Repeating the call again here is not required, if we iterate over the list of possible CPUs, rather than the list of all CPUs. The call to of_property_read_string() for "riscv,isa" cannot fail either, as it has previously succeeded in riscv_of_processor_hartid(), but leaving in the error checking makes the operation of the loop more obvious & provides leeway for future refactoring of riscv_of_processor_hartid(). Signed-off-by: Sunil V L Co-developed-by: Conor Dooley Signed-off-by: Conor Dooley Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20230515054928.2079268-14-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index b1d6b7e4b829..c607db2c842c 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -99,7 +100,7 @@ void __init riscv_fill_hwcap(void) char print_str[NUM_ALPHA_EXTS + 1]; int i, j, rc; unsigned long isa2hwcap[26] = {0}; - unsigned long hartid; + unsigned int cpu; isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I; isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M; @@ -112,16 +113,20 @@ void __init riscv_fill_hwcap(void) bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX); - for_each_of_cpu_node(node) { + for_each_possible_cpu(cpu) { unsigned long this_hwcap = 0; DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX); const char *temp; - rc = riscv_of_processor_hartid(node, &hartid); - if (rc < 0) + node = of_cpu_device_node_get(cpu); + if (!node) { + pr_warn("Unable to find cpu node\n"); continue; + } - if (of_property_read_string(node, "riscv,isa", &isa)) { + rc = of_property_read_string(node, "riscv,isa", &isa); + of_node_put(node); + if (rc) { pr_warn("Unable to find \"riscv,isa\" devicetree entry\n"); continue; } From 396c018332a10a845e8a555ca3fa288c952a37af Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:21 +0530 Subject: [PATCH 14/78] RISC-V: cpufeature: Add ACPI support in riscv_fill_hwcap() On ACPI based systems, the information about the hart like ISA is provided by the RISC-V Hart Capabilities Table (RHCT). Enable filling up hwcap structure based on the information in RHCT. Signed-off-by: Sunil V L Acked-by: Rafael J. Wysocki Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230515054928.2079268-15-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 41 +++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index c607db2c842c..6ba8e20c5346 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -6,6 +6,7 @@ * Copyright (C) 2017 SiFive */ +#include #include #include #include @@ -13,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -100,6 +102,8 @@ void __init riscv_fill_hwcap(void) char print_str[NUM_ALPHA_EXTS + 1]; int i, j, rc; unsigned long isa2hwcap[26] = {0}; + struct acpi_table_header *rhct; + acpi_status status; unsigned int cpu; isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I; @@ -113,22 +117,36 @@ void __init riscv_fill_hwcap(void) bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX); + if (!acpi_disabled) { + status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct); + if (ACPI_FAILURE(status)) + return; + } + for_each_possible_cpu(cpu) { unsigned long this_hwcap = 0; DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX); const char *temp; - node = of_cpu_device_node_get(cpu); - if (!node) { - pr_warn("Unable to find cpu node\n"); - continue; - } + if (acpi_disabled) { + node = of_cpu_device_node_get(cpu); + if (!node) { + pr_warn("Unable to find cpu node\n"); + continue; + } - rc = of_property_read_string(node, "riscv,isa", &isa); - of_node_put(node); - if (rc) { - pr_warn("Unable to find \"riscv,isa\" devicetree entry\n"); - continue; + rc = of_property_read_string(node, "riscv,isa", &isa); + of_node_put(node); + if (rc) { + pr_warn("Unable to find \"riscv,isa\" devicetree entry\n"); + continue; + } + } else { + rc = acpi_get_riscv_isa(rhct, cpu, &isa); + if (rc < 0) { + pr_warn("Unable to get ISA for the hart - %d\n", cpu); + continue; + } } temp = isa; @@ -265,6 +283,9 @@ void __init riscv_fill_hwcap(void) bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX); } + if (!acpi_disabled && rhct) + acpi_put_table((struct acpi_table_header *)rhct); + /* We don't support systems with F but without D, so mask those out * here. */ if ((elf_hwcap & COMPAT_HWCAP_ISA_F) && !(elf_hwcap & COMPAT_HWCAP_ISA_D)) { From 0b144c8189895038cd624035b0cee24869de54f7 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:22 +0530 Subject: [PATCH 15/78] RISC-V: cpu: Enable cpuinfo for ACPI systems On ACPI based platforms, few details like ISA need to be read from the ACPI table. Enable cpuinfo on ACPI based systems. ACPI has nothing similar to DT compatible property for each CPU. Hence, cpuinfo will not print "uarch". Signed-off-by: Sunil V L Acked-by: Rafael J. Wysocki Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230515054928.2079268-16-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpu.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index c96aa56cf1c7..5de6fb703cc2 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -3,10 +3,12 @@ * Copyright (C) 2012 Regents of the University of California */ +#include #include #include #include #include +#include #include #include #include @@ -283,23 +285,35 @@ static void c_stop(struct seq_file *m, void *v) static int c_show(struct seq_file *m, void *v) { unsigned long cpu_id = (unsigned long)v - 1; - struct device_node *node = of_get_cpu_node(cpu_id, NULL); struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id); + struct device_node *node; const char *compat, *isa; seq_printf(m, "processor\t: %lu\n", cpu_id); seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id)); - if (!of_property_read_string(node, "riscv,isa", &isa)) - print_isa(m, isa); - print_mmu(m); - if (!of_property_read_string(node, "compatible", &compat) - && strcmp(compat, "riscv")) - seq_printf(m, "uarch\t\t: %s\n", compat); + + if (acpi_disabled) { + node = of_get_cpu_node(cpu_id, NULL); + if (!of_property_read_string(node, "riscv,isa", &isa)) + print_isa(m, isa); + + print_mmu(m); + if (!of_property_read_string(node, "compatible", &compat) && + strcmp(compat, "riscv")) + seq_printf(m, "uarch\t\t: %s\n", compat); + + of_node_put(node); + } else { + if (!acpi_get_riscv_isa(NULL, cpu_id, &isa)) + print_isa(m, isa); + + print_mmu(m); + } + seq_printf(m, "mvendorid\t: 0x%lx\n", ci->mvendorid); seq_printf(m, "marchid\t\t: 0x%lx\n", ci->marchid); seq_printf(m, "mimpid\t\t: 0x%lx\n", ci->mimpid); seq_puts(m, "\n"); - of_node_put(node); return 0; } From 7023b9d83f039d849d13a845c8eceea19703de0d Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:23 +0530 Subject: [PATCH 16/78] irqchip/riscv-intc: Add ACPI support Add support for initializing the RISC-V INTC driver on ACPI platforms. Signed-off-by: Sunil V L Acked-by: Rafael J. Wysocki Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230515054928.2079268-17-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- drivers/irqchip/irq-riscv-intc.c | 76 ++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 18 deletions(-) diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c index f229e3e66387..4adeee1bc391 100644 --- a/drivers/irqchip/irq-riscv-intc.c +++ b/drivers/irqchip/irq-riscv-intc.c @@ -6,6 +6,7 @@ */ #define pr_fmt(fmt) "riscv-intc: " fmt +#include #include #include #include @@ -112,6 +113,30 @@ static struct fwnode_handle *riscv_intc_hwnode(void) return intc_domain->fwnode; } +static int __init riscv_intc_init_common(struct fwnode_handle *fn) +{ + int rc; + + intc_domain = irq_domain_create_linear(fn, BITS_PER_LONG, + &riscv_intc_domain_ops, NULL); + if (!intc_domain) { + pr_err("unable to add IRQ domain\n"); + return -ENXIO; + } + + rc = set_handle_irq(&riscv_intc_irq); + if (rc) { + pr_err("failed to set irq handler\n"); + return rc; + } + + riscv_set_intc_hwnode_fn(riscv_intc_hwnode); + + pr_info("%d local interrupts mapped\n", BITS_PER_LONG); + + return 0; +} + static int __init riscv_intc_init(struct device_node *node, struct device_node *parent) { @@ -133,24 +158,39 @@ static int __init riscv_intc_init(struct device_node *node, if (riscv_hartid_to_cpuid(hartid) != smp_processor_id()) return 0; - intc_domain = irq_domain_add_linear(node, BITS_PER_LONG, - &riscv_intc_domain_ops, NULL); - if (!intc_domain) { - pr_err("unable to add IRQ domain\n"); - return -ENXIO; - } - - rc = set_handle_irq(&riscv_intc_irq); - if (rc) { - pr_err("failed to set irq handler\n"); - return rc; - } - - riscv_set_intc_hwnode_fn(riscv_intc_hwnode); - - pr_info("%d local interrupts mapped\n", BITS_PER_LONG); - - return 0; + return riscv_intc_init_common(of_node_to_fwnode(node)); } IRQCHIP_DECLARE(riscv, "riscv,cpu-intc", riscv_intc_init); + +#ifdef CONFIG_ACPI + +static int __init riscv_intc_acpi_init(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct fwnode_handle *fn; + struct acpi_madt_rintc *rintc; + + rintc = (struct acpi_madt_rintc *)header; + + /* + * The ACPI MADT will have one INTC for each CPU (or HART) + * so riscv_intc_acpi_init() function will be called once + * for each INTC. We only do INTC initialization + * for the INTC belonging to the boot CPU (or boot HART). + */ + if (riscv_hartid_to_cpuid(rintc->hart_id) != smp_processor_id()) + return 0; + + fn = irq_domain_alloc_named_fwnode("RISCV-INTC"); + if (!fn) { + pr_err("unable to allocate INTC FW node\n"); + return -ENOMEM; + } + + return riscv_intc_init_common(fn); +} + +IRQCHIP_ACPI_DECLARE(riscv_intc, ACPI_MADT_TYPE_RINTC, NULL, + ACPI_MADT_RINTC_VERSION_V1, riscv_intc_acpi_init); +#endif From cd12d206685af04b30a222a42137a700bff3c7fd Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:24 +0530 Subject: [PATCH 17/78] clocksource/timer-riscv: Refactor riscv_timer_init_dt() Refactor the timer init function such that few things can be shared by both DT and ACPI based platforms. Co-developed-by: Anup Patel Signed-off-by: Anup Patel Signed-off-by: Sunil V L Acked-by: Rafael J. Wysocki Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230515054928.2079268-18-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- drivers/clocksource/timer-riscv.c | 81 +++++++++++++++---------------- 1 file changed, 40 insertions(+), 41 deletions(-) diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 5f0f10c7e222..cecc4662293b 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -124,61 +124,28 @@ static irqreturn_t riscv_timer_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static int __init riscv_timer_init_dt(struct device_node *n) +static int __init riscv_timer_init_common(void) { - int cpuid, error; - unsigned long hartid; - struct device_node *child; + int error; struct irq_domain *domain; + struct fwnode_handle *intc_fwnode = riscv_get_intc_hwnode(); - error = riscv_of_processor_hartid(n, &hartid); - if (error < 0) { - pr_warn("Not valid hartid for node [%pOF] error = [%lu]\n", - n, hartid); - return error; - } - - cpuid = riscv_hartid_to_cpuid(hartid); - if (cpuid < 0) { - pr_warn("Invalid cpuid for hartid [%lu]\n", hartid); - return cpuid; - } - - if (cpuid != smp_processor_id()) - return 0; - - child = of_find_compatible_node(NULL, NULL, "riscv,timer"); - if (child) { - riscv_timer_cannot_wake_cpu = of_property_read_bool(child, - "riscv,timer-cannot-wake-cpu"); - of_node_put(child); - } - - domain = NULL; - child = of_get_compatible_child(n, "riscv,cpu-intc"); - if (!child) { - pr_err("Failed to find INTC node [%pOF]\n", n); - return -ENODEV; - } - domain = irq_find_host(child); - of_node_put(child); + domain = irq_find_matching_fwnode(intc_fwnode, DOMAIN_BUS_ANY); if (!domain) { - pr_err("Failed to find IRQ domain for node [%pOF]\n", n); + pr_err("Failed to find irq_domain for INTC node [%pfwP]\n", + intc_fwnode); return -ENODEV; } riscv_clock_event_irq = irq_create_mapping(domain, RV_IRQ_TIMER); if (!riscv_clock_event_irq) { - pr_err("Failed to map timer interrupt for node [%pOF]\n", n); + pr_err("Failed to map timer interrupt for node [%pfwP]\n", intc_fwnode); return -ENODEV; } - pr_info("%s: Registering clocksource cpuid [%d] hartid [%lu]\n", - __func__, cpuid, hartid); error = clocksource_register_hz(&riscv_clocksource, riscv_timebase); if (error) { - pr_err("RISCV timer register failed [%d] for cpu = [%d]\n", - error, cpuid); + pr_err("RISCV timer registration failed [%d]\n", error); return error; } @@ -207,4 +174,36 @@ static int __init riscv_timer_init_dt(struct device_node *n) return error; } +static int __init riscv_timer_init_dt(struct device_node *n) +{ + int cpuid, error; + unsigned long hartid; + struct device_node *child; + + error = riscv_of_processor_hartid(n, &hartid); + if (error < 0) { + pr_warn("Invalid hartid for node [%pOF] error = [%lu]\n", + n, hartid); + return error; + } + + cpuid = riscv_hartid_to_cpuid(hartid); + if (cpuid < 0) { + pr_warn("Invalid cpuid for hartid [%lu]\n", hartid); + return cpuid; + } + + if (cpuid != smp_processor_id()) + return 0; + + child = of_find_compatible_node(NULL, NULL, "riscv,timer"); + if (child) { + riscv_timer_cannot_wake_cpu = of_property_read_bool(child, + "riscv,timer-cannot-wake-cpu"); + of_node_put(child); + } + + return riscv_timer_init_common(); +} + TIMER_OF_DECLARE(riscv_timer, "riscv", riscv_timer_init_dt); From 21f4f92410dc302b43c6c8307191704ba93c748d Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:25 +0530 Subject: [PATCH 18/78] clocksource/timer-riscv: Add ACPI support Initialize the timer driver based on RHCT table on ACPI based platforms. Currently, ACPI doesn't support a flag to indicate that the timer interrupt can wake up the cpu irrespective of its power state. It will be added in future update. Signed-off-by: Sunil V L Acked-by: Rafael J. Wysocki Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230515054928.2079268-19-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- drivers/clocksource/timer-riscv.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index cecc4662293b..da3071b387eb 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -10,6 +10,7 @@ #define pr_fmt(fmt) "riscv-timer: " fmt +#include #include #include #include @@ -207,3 +208,13 @@ static int __init riscv_timer_init_dt(struct device_node *n) } TIMER_OF_DECLARE(riscv_timer, "riscv", riscv_timer_init_dt); + +#ifdef CONFIG_ACPI +static int __init riscv_timer_acpi_init(struct acpi_table_header *table) +{ + return riscv_timer_init_common(); +} + +TIMER_ACPI_DECLARE(aclint_mtimer, ACPI_SIG_RHCT, riscv_timer_acpi_init); + +#endif From 714aa1d1c8cad1c005bb93a1ba46dfa145ec2e6f Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:26 +0530 Subject: [PATCH 19/78] RISC-V: time.c: Add ACPI support for time_init() On ACPI based platforms, timer related information is available in RHCT. Add ACPI based probe support to the timer initialization. Signed-off-by: Sunil V L Acked-by: Rafael J. Wysocki Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230515054928.2079268-20-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/time.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index babaf3b48ba8..23641e82a9df 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -4,6 +4,7 @@ * Copyright (C) 2017 SiFive */ +#include #include #include #include @@ -18,17 +19,29 @@ EXPORT_SYMBOL_GPL(riscv_timebase); void __init time_init(void) { struct device_node *cpu; + struct acpi_table_rhct *rhct; + acpi_status status; u32 prop; - cpu = of_find_node_by_path("/cpus"); - if (!cpu || of_property_read_u32(cpu, "timebase-frequency", &prop)) - panic(KERN_WARNING "RISC-V system with no 'timebase-frequency' in DTS\n"); - of_node_put(cpu); - riscv_timebase = prop; + if (acpi_disabled) { + cpu = of_find_node_by_path("/cpus"); + if (!cpu || of_property_read_u32(cpu, "timebase-frequency", &prop)) + panic("RISC-V system with no 'timebase-frequency' in DTS\n"); + + of_node_put(cpu); + riscv_timebase = prop; + of_clk_init(NULL); + } else { + status = acpi_get_table(ACPI_SIG_RHCT, 0, (struct acpi_table_header **)&rhct); + if (ACPI_FAILURE(status)) + panic("RISC-V ACPI system with no RHCT table\n"); + + riscv_timebase = rhct->time_base_freq; + acpi_put_table((struct acpi_table_header *)rhct); + } lpj_fine = riscv_timebase / HZ; - of_clk_init(NULL); timer_probe(); tick_setup_hrtimer_broadcast(); From 0b8e15ca008260cf28b354e27f3d2824f33a18b2 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:27 +0530 Subject: [PATCH 20/78] RISC-V: Enable ACPI in defconfig Add support to build ACPI subsystem in defconfig. Signed-off-by: Sunil V L Acked-by: Rafael J. Wysocki Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230515054928.2079268-21-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index d98d6e90b2b8..d3d1fbf2dd5f 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -37,6 +37,7 @@ CONFIG_PM=y CONFIG_CPU_IDLE=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=m +CONFIG_ACPI=y CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y From cc9e654a7e81684091ade9c9d16d8a4e0b6cf53b Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 15 May 2023 11:19:28 +0530 Subject: [PATCH 21/78] MAINTAINERS: Add entry for drivers/acpi/riscv ACPI defines few RISC-V specific tables which need parsing code added in drivers/acpi/riscv. Add maintainer entries for this newly created folder. Signed-off-by: Sunil V L Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230515054928.2079268-22-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 7e0b87d5aa2e..0d6ecb5a4107 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -412,6 +412,13 @@ L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/serial-multi-instantiate.c +ACPI FOR RISC-V (ACPI/riscv) +M: Sunil V L +L: linux-acpi@vger.kernel.org +L: linux-riscv@lists.infradead.org +S: Maintained +F: drivers/acpi/riscv/ + ACPI PCC(Platform Communication Channel) MAILBOX DRIVER M: Sudeep Holla L: linux-acpi@vger.kernel.org From 255b34d799ddaaef5e8672b96c47a3b94fe85da9 Mon Sep 17 00:00:00 2001 From: Yangyu Chen Date: Tue, 2 May 2023 00:17:38 +0800 Subject: [PATCH 22/78] riscv: allow case-insensitive ISA string parsing According to RISC-V Hart Capabilities Table (RHCT) description in UEFI Forum ECR, the format of the ISA string is defined in the RISC-V unprivileged specification which is case-insensitive. However, the current ISA string parser in the kernel does not support ISA strings with uppercase letters. This patch modifies the ISA string parser in the kernel to support case-insensitive ISA string parsing. Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Signed-off-by: Yangyu Chen Link: https://lore.kernel.org/r/tencent_B30EED51C7235CA1988890E5C658BE35C107@qq.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpu.c | 3 ++- arch/riscv/kernel/cpufeature.c | 35 +++++++++++++++++----------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index c96aa56cf1c7..9d3a5363037b 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -42,7 +43,7 @@ int riscv_of_processor_hartid(struct device_node *node, unsigned long *hart) pr_warn("CPU with hartid=%lu has no \"riscv,isa\" property\n", *hart); return -ENODEV; } - if (isa[0] != 'r' || isa[1] != 'v') { + if (tolower(isa[0]) != 'r' || tolower(isa[1]) != 'v') { pr_warn("CPU with hartid=%lu has an invalid ISA of \"%s\"\n", *hart, isa); return -ENODEV; } diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index b1d6b7e4b829..157687965ce5 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -127,13 +127,10 @@ void __init riscv_fill_hwcap(void) } temp = isa; -#if IS_ENABLED(CONFIG_32BIT) - if (!strncmp(isa, "rv32", 4)) + if (IS_ENABLED(CONFIG_32BIT) && !strncasecmp(isa, "rv32", 4)) isa += 4; -#elif IS_ENABLED(CONFIG_64BIT) - if (!strncmp(isa, "rv64", 4)) + else if (IS_ENABLED(CONFIG_64BIT) && !strncasecmp(isa, "rv64", 4)) isa += 4; -#endif /* The riscv,isa DT property must start with rv64 or rv32 */ if (temp == isa) continue; @@ -157,13 +154,15 @@ void __init riscv_fill_hwcap(void) break; } fallthrough; + case 'S': case 'x': + case 'X': case 'z': + case 'Z': ext_long = true; /* Multi-letter extension must be delimited */ for (; *isa && *isa != '_'; ++isa) - if (unlikely(!islower(*isa) - && !isdigit(*isa))) + if (unlikely(!isalnum(*isa))) ext_err = true; /* Parse backwards */ ext_end = isa; @@ -174,7 +173,7 @@ void __init riscv_fill_hwcap(void) /* Skip the minor version */ while (isdigit(*--ext_end)) ; - if (ext_end[0] != 'p' + if (tolower(ext_end[0]) != 'p' || !isdigit(ext_end[-1])) { /* Advance it to offset the pre-decrement */ ++ext_end; @@ -186,7 +185,7 @@ void __init riscv_fill_hwcap(void) ++ext_end; break; default: - if (unlikely(!islower(*ext))) { + if (unlikely(!isalpha(*ext))) { ext_err = true; break; } @@ -196,7 +195,7 @@ void __init riscv_fill_hwcap(void) /* Skip the minor version */ while (isdigit(*++isa)) ; - if (*isa != 'p') + if (tolower(*isa) != 'p') break; if (!isdigit(*++isa)) { --isa; @@ -210,18 +209,18 @@ void __init riscv_fill_hwcap(void) if (*isa != '_') --isa; -#define SET_ISA_EXT_MAP(name, bit) \ - do { \ - if ((ext_end - ext == sizeof(name) - 1) && \ - !memcmp(ext, name, sizeof(name) - 1) && \ - riscv_isa_extension_check(bit)) \ - set_bit(bit, this_isa); \ - } while (false) \ +#define SET_ISA_EXT_MAP(name, bit) \ + do { \ + if ((ext_end - ext == sizeof(name) - 1) && \ + !strncasecmp(ext, name, sizeof(name) - 1) && \ + riscv_isa_extension_check(bit)) \ + set_bit(bit, this_isa); \ + } while (false) \ if (unlikely(ext_err)) continue; if (!ext_long) { - int nr = *ext - 'a'; + int nr = tolower(*ext) - 'a'; if (riscv_isa_extension_check(nr)) { this_hwcap |= isa2hwcap[nr]; From 9e320d7ca46aecf565c3900452acae579a7d0a9a Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Tue, 2 May 2023 00:17:39 +0800 Subject: [PATCH 23/78] dt-bindings: riscv: drop invalid comment about riscv,isa lower-case reasoning "Ease of parsing" may have been the initial argument for keeping this string in lower-case, but parsers may have been written that expect lower-case only. For example, the one in released kernels currently does not behave correctly for multi-letter extensions that begin with a capital letter. Allowing upper-case here brings about no benefit but would break compatibility between new devicetrees and older kernels. Drop the comment to avoid confusing people. Signed-off-by: Conor Dooley Reviewed-by: Andrew Jones Acked-by: Rob Herring Signed-off-by: Yangyu Chen Link: https://lore.kernel.org/r/tencent_3B8290DDC66D3E624132ED39C7465CDC9807@qq.com Signed-off-by: Palmer Dabbelt --- Documentation/devicetree/bindings/riscv/cpus.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index 3d2934b15e80..db5253a2a74a 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -94,7 +94,7 @@ properties: While the isa strings in ISA specification are case insensitive, letters in the riscv,isa string must be all - lowercase to simplify parsing. + lowercase. $ref: "/schemas/types.yaml#/definitions/string" pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[a-z])+)?(?:_[hsxz](?:[a-z])+)*$ From 419d5d38ac5d79dfd899522274c872854cfe17ac Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Mon, 5 Jun 2023 11:06:58 +0000 Subject: [PATCH 24/78] riscv: Rename __switch_to_aux() -> fpu The name of __switch_to_aux() is not clear and rename it with the determine function: __switch_to_fpu(). Next we could add other regs' switch. Signed-off-by: Guo Ren Signed-off-by: Guo Ren Signed-off-by: Greentime Hu Reviewed-by: Anup Patel Reviewed-by: Palmer Dabbelt Signed-off-by: Andy Chiu Tested-by: Heiko Stuebner Reviewed-by: Heiko Stuebner Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230605110724.21391-2-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/switch_to.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 60f8ca01d36e..4b96b13dee27 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -46,7 +46,7 @@ static inline void fstate_restore(struct task_struct *task, } } -static inline void __switch_to_aux(struct task_struct *prev, +static inline void __switch_to_fpu(struct task_struct *prev, struct task_struct *next) { struct pt_regs *regs; @@ -66,7 +66,7 @@ static __always_inline bool has_fpu(void) static __always_inline bool has_fpu(void) { return false; } #define fstate_save(task, regs) do { } while (0) #define fstate_restore(task, regs) do { } while (0) -#define __switch_to_aux(__prev, __next) do { } while (0) +#define __switch_to_fpu(__prev, __next) do { } while (0) #endif extern struct task_struct *__switch_to(struct task_struct *, @@ -77,7 +77,7 @@ do { \ struct task_struct *__prev = (prev); \ struct task_struct *__next = (next); \ if (has_fpu()) \ - __switch_to_aux(__prev, __next); \ + __switch_to_fpu(__prev, __next); \ ((last) = __switch_to(__prev, __next)); \ } while (0) From dc6667a4e7e36f283bcd0264a0be55adae4d6f86 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Mon, 5 Jun 2023 11:06:59 +0000 Subject: [PATCH 25/78] riscv: Extending cpufeature.c to detect V-extension Add V-extension into riscv_isa_ext_keys array and detect it with isa string parsing. Signed-off-by: Guo Ren Signed-off-by: Guo Ren Signed-off-by: Greentime Hu Suggested-by: Vineet Gupta Co-developed-by: Andy Chiu Signed-off-by: Andy Chiu Reviewed-by: Conor Dooley Reviewed-by: Heiko Stuebner Tested-by: Heiko Stuebner Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230605110724.21391-3-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hwcap.h | 1 + arch/riscv/include/asm/vector.h | 26 ++++++++++++++++++++++++++ arch/riscv/include/uapi/asm/hwcap.h | 1 + arch/riscv/kernel/cpufeature.c | 11 +++++++++++ 4 files changed, 39 insertions(+) create mode 100644 arch/riscv/include/asm/vector.h diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index e0c40a4c63d5..574385930ba7 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -22,6 +22,7 @@ #define RISCV_ISA_EXT_m ('m' - 'a') #define RISCV_ISA_EXT_s ('s' - 'a') #define RISCV_ISA_EXT_u ('u' - 'a') +#define RISCV_ISA_EXT_v ('v' - 'a') /* * These macros represent the logical IDs of each multi-letter RISC-V ISA diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h new file mode 100644 index 000000000000..bdbb05b70151 --- /dev/null +++ b/arch/riscv/include/asm/vector.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 SiFive + */ + +#ifndef __ASM_RISCV_VECTOR_H +#define __ASM_RISCV_VECTOR_H + +#include + +#ifdef CONFIG_RISCV_ISA_V + +#include + +static __always_inline bool has_vector(void) +{ + return riscv_has_extension_unlikely(RISCV_ISA_EXT_v); +} + +#else /* ! CONFIG_RISCV_ISA_V */ + +static __always_inline bool has_vector(void) { return false; } + +#endif /* CONFIG_RISCV_ISA_V */ + +#endif /* ! __ASM_RISCV_VECTOR_H */ diff --git a/arch/riscv/include/uapi/asm/hwcap.h b/arch/riscv/include/uapi/asm/hwcap.h index 46dc3f5ee99f..c52bb7bbbabe 100644 --- a/arch/riscv/include/uapi/asm/hwcap.h +++ b/arch/riscv/include/uapi/asm/hwcap.h @@ -21,5 +21,6 @@ #define COMPAT_HWCAP_ISA_F (1 << ('F' - 'A')) #define COMPAT_HWCAP_ISA_D (1 << ('D' - 'A')) #define COMPAT_HWCAP_ISA_C (1 << ('C' - 'A')) +#define COMPAT_HWCAP_ISA_V (1 << ('V' - 'A')) #endif /* _UAPI_ASM_RISCV_HWCAP_H */ diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index b1d6b7e4b829..7aaf92fff64e 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -107,6 +107,7 @@ void __init riscv_fill_hwcap(void) isa2hwcap['f' - 'a'] = COMPAT_HWCAP_ISA_F; isa2hwcap['d' - 'a'] = COMPAT_HWCAP_ISA_D; isa2hwcap['c' - 'a'] = COMPAT_HWCAP_ISA_C; + isa2hwcap['v' - 'a'] = COMPAT_HWCAP_ISA_V; elf_hwcap = 0; @@ -267,6 +268,16 @@ void __init riscv_fill_hwcap(void) elf_hwcap &= ~COMPAT_HWCAP_ISA_F; } + if (elf_hwcap & COMPAT_HWCAP_ISA_V) { + /* + * ISA string in device tree might have 'v' flag, but + * CONFIG_RISCV_ISA_V is disabled in kernel. + * Clear V flag in elf_hwcap if CONFIG_RISCV_ISA_V is disabled. + */ + if (!IS_ENABLED(CONFIG_RISCV_ISA_V)) + elf_hwcap &= ~COMPAT_HWCAP_ISA_V; + } + memset(print_str, 0, sizeof(print_str)); for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) if (riscv_isa[0] & BIT_MASK(i)) From 162e4df137c1fea6557fda3e4cdf5dc6ca6d5510 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Mon, 5 Jun 2023 11:07:00 +0000 Subject: [PATCH 26/78] riscv: hwprobe: Add support for probing V in RISCV_HWPROBE_KEY_IMA_EXT_0 Probing kernel support for Vector extension is available now. This only add detection for V only. Extenions like Zvfh, Zk are not in this scope. Signed-off-by: Andy Chiu Reviewed-by: Conor Dooley Reviewed-by: Evan Green Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230605110724.21391-4-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- Documentation/riscv/hwprobe.rst | 3 +++ arch/riscv/include/uapi/asm/hwprobe.h | 1 + arch/riscv/kernel/sys_riscv.c | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst index 9f0dd62dcb5d..7431d9d01c73 100644 --- a/Documentation/riscv/hwprobe.rst +++ b/Documentation/riscv/hwprobe.rst @@ -64,6 +64,9 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_IMA_C`: The C extension is supported, as defined by version 2.2 of the RISC-V ISA manual. + * :c:macro:`RISCV_HWPROBE_IMA_V`: The V extension is supported, as defined by + version 1.0 of the RISC-V Vector extension manual. + * :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance information about the selected set of processors. diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 8d745a4ad8a2..7c6fdcf7ced5 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -25,6 +25,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_KEY_IMA_EXT_0 4 #define RISCV_HWPROBE_IMA_FD (1 << 0) #define RISCV_HWPROBE_IMA_C (1 << 1) +#define RISCV_HWPROBE_IMA_V (1 << 2) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 5db29683ebee..88357a848797 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -171,6 +172,9 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, if (riscv_isa_extension_available(NULL, c)) pair->value |= RISCV_HWPROBE_IMA_C; + if (has_vector()) + pair->value |= RISCV_HWPROBE_IMA_V; + break; case RISCV_HWPROBE_KEY_CPUPERF_0: From b5665d2a94325c3244584f504d039a573cfd63e8 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Mon, 5 Jun 2023 11:07:01 +0000 Subject: [PATCH 27/78] riscv: Add new csr defines related to vector extension Follow the riscv vector spec to add new csr numbers. Acked-by: Guo Ren Co-developed-by: Guo Ren Signed-off-by: Guo Ren Co-developed-by: Vincent Chen Signed-off-by: Vincent Chen Signed-off-by: Greentime Hu Reviewed-by: Palmer Dabbelt Suggested-by: Vineet Gupta Signed-off-by: Andy Chiu Reviewed-by: Conor Dooley Reviewed-by: Heiko Stuebner Tested-by: Heiko Stuebner Link: https://lore.kernel.org/r/20230605110724.21391-5-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/csr.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index b6acb7ed115f..b98b3b6c9da2 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -24,16 +24,24 @@ #define SR_FS_CLEAN _AC(0x00004000, UL) #define SR_FS_DIRTY _AC(0x00006000, UL) +#define SR_VS _AC(0x00000600, UL) /* Vector Status */ +#define SR_VS_OFF _AC(0x00000000, UL) +#define SR_VS_INITIAL _AC(0x00000200, UL) +#define SR_VS_CLEAN _AC(0x00000400, UL) +#define SR_VS_DIRTY _AC(0x00000600, UL) + #define SR_XS _AC(0x00018000, UL) /* Extension Status */ #define SR_XS_OFF _AC(0x00000000, UL) #define SR_XS_INITIAL _AC(0x00008000, UL) #define SR_XS_CLEAN _AC(0x00010000, UL) #define SR_XS_DIRTY _AC(0x00018000, UL) +#define SR_FS_VS (SR_FS | SR_VS) /* Vector and Floating-Point Unit */ + #ifndef CONFIG_64BIT -#define SR_SD _AC(0x80000000, UL) /* FS/XS dirty */ +#define SR_SD _AC(0x80000000, UL) /* FS/VS/XS dirty */ #else -#define SR_SD _AC(0x8000000000000000, UL) /* FS/XS dirty */ +#define SR_SD _AC(0x8000000000000000, UL) /* FS/VS/XS dirty */ #endif #ifdef CONFIG_64BIT @@ -375,6 +383,12 @@ #define CSR_MVIPH 0x319 #define CSR_MIPH 0x354 +#define CSR_VSTART 0x8 +#define CSR_VCSR 0xf +#define CSR_VL 0xc20 +#define CSR_VTYPE 0xc21 +#define CSR_VLENB 0xc22 + #ifdef CONFIG_RISCV_M_MODE # define CSR_STATUS CSR_MSTATUS # define CSR_IE CSR_MIE From 6b533828726af4e3609aeb6e5f494e936f9a7cde Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Mon, 5 Jun 2023 11:07:02 +0000 Subject: [PATCH 28/78] riscv: Clear vector regfile on bootup clear vector registers on boot if kernel supports V. Signed-off-by: Greentime Hu Signed-off-by: Vineet Gupta Signed-off-by: Andy Chiu Acked-by: Conor Dooley Reviewed-by: Heiko Stuebner Tested-by: Heiko Stuebner Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230605110724.21391-6-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/head.S | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 4bf6c449d78b..3fd6a4bd9c3e 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -392,7 +392,7 @@ ENTRY(reset_regs) #ifdef CONFIG_FPU csrr t0, CSR_MISA andi t0, t0, (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D) - beqz t0, .Lreset_regs_done + beqz t0, .Lreset_regs_done_fpu li t1, SR_FS csrs CSR_STATUS, t1 @@ -430,8 +430,31 @@ ENTRY(reset_regs) fmv.s.x f31, zero csrw fcsr, 0 /* note that the caller must clear SR_FS */ +.Lreset_regs_done_fpu: #endif /* CONFIG_FPU */ -.Lreset_regs_done: + +#ifdef CONFIG_RISCV_ISA_V + csrr t0, CSR_MISA + li t1, COMPAT_HWCAP_ISA_V + and t0, t0, t1 + beqz t0, .Lreset_regs_done_vector + + /* + * Clear vector registers and reset vcsr + * VLMAX has a defined value, VLEN is a constant, + * and this form of vsetvli is defined to set vl to VLMAX. + */ + li t1, SR_VS + csrs CSR_STATUS, t1 + csrs CSR_VCSR, x0 + vsetvli t1, x0, e8, m8, ta, ma + vmv.v.i v0, 0 + vmv.v.i v8, 0 + vmv.v.i v16, 0 + vmv.v.i v24, 0 + /* note that the caller must clear SR_VS */ +.Lreset_regs_done_vector: +#endif /* CONFIG_RISCV_ISA_V */ ret END(reset_regs) #endif /* CONFIG_RISCV_M_MODE */ From 74abe5a39d3a110f4c87c8ff34b80705009a96e0 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Mon, 5 Jun 2023 11:07:03 +0000 Subject: [PATCH 29/78] riscv: Disable Vector Instructions for kernel itself Disable vector instructions execution for kernel mode at its entrances. This helps find illegal uses of vector in the kernel space, which is similar to the fpu. Signed-off-by: Guo Ren Co-developed-by: Vincent Chen Signed-off-by: Vincent Chen Co-developed-by: Han-Kuan Chen Signed-off-by: Han-Kuan Chen Co-developed-by: Greentime Hu Signed-off-by: Greentime Hu Signed-off-by: Vineet Gupta Signed-off-by: Andy Chiu Reviewed-by: Conor Dooley Reviewed-by: Heiko Stuebner Tested-by: Heiko Stuebner Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230605110724.21391-7-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 6 +++--- arch/riscv/kernel/head.S | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 3fbb100bc9e4..e9ae284a55c1 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -48,10 +48,10 @@ _save_context: * Disable user-mode memory access as it should only be set in the * actual user copy routines. * - * Disable the FPU to detect illegal usage of floating point in kernel - * space. + * Disable the FPU/Vector to detect illegal usage of floating point + * or vector in kernel space. */ - li t0, SR_SUM | SR_FS + li t0, SR_SUM | SR_FS_VS REG_L s0, TASK_TI_USER_SP(tp) csrrc s1, CSR_STATUS, t0 diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 3fd6a4bd9c3e..e16bb2185d55 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -140,10 +140,10 @@ secondary_start_sbi: .option pop /* - * Disable FPU to detect illegal usage of - * floating point in kernel space + * Disable FPU & VECTOR to detect illegal usage of + * floating point or vector in kernel space */ - li t0, SR_FS + li t0, SR_FS_VS csrc CSR_STATUS, t0 /* Set trap vector to spin forever to help debug */ @@ -234,10 +234,10 @@ pmp_done: .option pop /* - * Disable FPU to detect illegal usage of - * floating point in kernel space + * Disable FPU & VECTOR to detect illegal usage of + * floating point or vector in kernel space */ - li t0, SR_FS + li t0, SR_FS_VS csrc CSR_STATUS, t0 #ifdef CONFIG_RISCV_BOOT_SPINWAIT From 0a3381a01dcc3d0537732794c007f32e4dfd1efc Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Mon, 5 Jun 2023 11:07:04 +0000 Subject: [PATCH 30/78] riscv: Introduce Vector enable/disable helpers These are small and likely to be frequently called so implement as inline routines (vs. function call). Co-developed-by: Guo Ren Signed-off-by: Guo Ren Co-developed-by: Vincent Chen Signed-off-by: Vincent Chen Signed-off-by: Greentime Hu Signed-off-by: Vineet Gupta Signed-off-by: Andy Chiu Reviewed-by: Conor Dooley Reviewed-by: Heiko Stuebner Tested-by: Heiko Stuebner Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230605110724.21391-8-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/vector.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index bdbb05b70151..51bb37232943 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -11,12 +11,23 @@ #ifdef CONFIG_RISCV_ISA_V #include +#include static __always_inline bool has_vector(void) { return riscv_has_extension_unlikely(RISCV_ISA_EXT_v); } +static __always_inline void riscv_v_enable(void) +{ + csr_set(CSR_SSTATUS, SR_VS); +} + +static __always_inline void riscv_v_disable(void) +{ + csr_clear(CSR_SSTATUS, SR_VS); +} + #else /* ! CONFIG_RISCV_ISA_V */ static __always_inline bool has_vector(void) { return false; } From 7017858eb2d7ed7a295be02c71124049a6409295 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Mon, 5 Jun 2023 11:07:05 +0000 Subject: [PATCH 31/78] riscv: Introduce riscv_v_vsize to record size of Vector context This patch is used to detect the size of CPU vector registers and use riscv_v_vsize to save the size of all the vector registers. It assumes all harts has the same capabilities in a SMP system. If a core detects VLENB that is different from the boot core, then it warns and turns off V support for user space. Co-developed-by: Guo Ren Signed-off-by: Guo Ren Co-developed-by: Vincent Chen Signed-off-by: Vincent Chen Signed-off-by: Greentime Hu Signed-off-by: Andy Chiu Reviewed-by: Conor Dooley Reviewed-by: Heiko Stuebner Tested-by: Heiko Stuebner Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230605110724.21391-9-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/vector.h | 8 ++++++++ arch/riscv/kernel/Makefile | 1 + arch/riscv/kernel/cpufeature.c | 2 ++ arch/riscv/kernel/smpboot.c | 7 +++++++ arch/riscv/kernel/vector.c | 36 +++++++++++++++++++++++++++++++++ 5 files changed, 54 insertions(+) create mode 100644 arch/riscv/kernel/vector.c diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index 51bb37232943..df3b5caecc87 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -7,12 +7,16 @@ #define __ASM_RISCV_VECTOR_H #include +#include #ifdef CONFIG_RISCV_ISA_V #include #include +extern unsigned long riscv_v_vsize; +int riscv_v_setup_vsize(void); + static __always_inline bool has_vector(void) { return riscv_has_extension_unlikely(RISCV_ISA_EXT_v); @@ -30,7 +34,11 @@ static __always_inline void riscv_v_disable(void) #else /* ! CONFIG_RISCV_ISA_V */ +struct pt_regs; + +static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; } static __always_inline bool has_vector(void) { return false; } +#define riscv_v_vsize (0) #endif /* CONFIG_RISCV_ISA_V */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index fbdccc21418a..c51f34c2756a 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -56,6 +56,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_RISCV_M_MODE) += traps_misaligned.o obj-$(CONFIG_FPU) += fpu.o +obj-$(CONFIG_RISCV_ISA_V) += vector.o obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += cpu_ops.o diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 7aaf92fff64e..28032b083463 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -18,6 +18,7 @@ #include #include #include +#include #define NUM_ALPHA_EXTS ('z' - 'a' + 1) @@ -269,6 +270,7 @@ void __init riscv_fill_hwcap(void) } if (elf_hwcap & COMPAT_HWCAP_ISA_V) { + riscv_v_setup_vsize(); /* * ISA string in device tree might have 'v' flag, but * CONFIG_RISCV_ISA_V is disabled in kernel. diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 445a4efee267..66011bf2b36e 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include "head.h" @@ -169,6 +171,11 @@ asmlinkage __visible void smp_callin(void) set_cpu_online(curr_cpuid, 1); probe_vendor_features(curr_cpuid); + if (has_vector()) { + if (riscv_v_setup_vsize()) + elf_hwcap &= ~COMPAT_HWCAP_ISA_V; + } + /* * Remote TLB flushes are ignored while the CPU is offline, so emit * a local TLB flush right now just in case. diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c new file mode 100644 index 000000000000..120f1ce9abf9 --- /dev/null +++ b/arch/riscv/kernel/vector.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 SiFive + * Author: Andy Chiu + */ +#include + +#include +#include +#include +#include + +unsigned long riscv_v_vsize __read_mostly; +EXPORT_SYMBOL_GPL(riscv_v_vsize); + +int riscv_v_setup_vsize(void) +{ + unsigned long this_vsize; + + /* There are 32 vector registers with vlenb length. */ + riscv_v_enable(); + this_vsize = csr_read(CSR_VLENB) * 32; + riscv_v_disable(); + + if (!riscv_v_vsize) { + riscv_v_vsize = this_vsize; + return 0; + } + + if (riscv_v_vsize != this_vsize) { + WARN(1, "RISCV_ISA_V only supports one vlenb on SMP systems"); + return -EOPNOTSUPP; + } + + return 0; +} From 03c3fcd9941a172abdea84456eefce2d2b7b415c Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Mon, 5 Jun 2023 11:07:06 +0000 Subject: [PATCH 32/78] riscv: Introduce struct/helpers to save/restore per-task Vector state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add vector state context struct to be added later in thread_struct. And prepare low-level helper functions to save/restore vector contexts. This include Vector Regfile and CSRs holding dynamic configuration state (vstart, vl, vtype, vcsr). The Vec Register width could be implementation defined, but same for all processes, so that is saved separately. This is not yet wired into final thread_struct - will be done when __switch_to actually starts doing this in later patches. Given the variable (and potentially large) size of regfile, they are saved in dynamically allocated memory, pointed to by datap pointer in __riscv_v_ext_state. Co-developed-by: Vincent Chen Signed-off-by: Vincent Chen Signed-off-by: Greentime Hu Signed-off-by: Vineet Gupta Signed-off-by: Andy Chiu Acked-by: Conor Dooley Reviewed-by: Guo Ren Reviewed-by: Björn Töpel Reviewed-by: Heiko Stuebner Tested-by: Heiko Stuebner Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230605110724.21391-10-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/vector.h | 95 ++++++++++++++++++++++++++++ arch/riscv/include/uapi/asm/ptrace.h | 17 +++++ 2 files changed, 112 insertions(+) diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index df3b5caecc87..3c29f4eb552a 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -11,8 +11,10 @@ #ifdef CONFIG_RISCV_ISA_V +#include #include #include +#include extern unsigned long riscv_v_vsize; int riscv_v_setup_vsize(void); @@ -22,6 +24,26 @@ static __always_inline bool has_vector(void) return riscv_has_extension_unlikely(RISCV_ISA_EXT_v); } +static inline void __riscv_v_vstate_clean(struct pt_regs *regs) +{ + regs->status = (regs->status & ~SR_VS) | SR_VS_CLEAN; +} + +static inline void riscv_v_vstate_off(struct pt_regs *regs) +{ + regs->status = (regs->status & ~SR_VS) | SR_VS_OFF; +} + +static inline void riscv_v_vstate_on(struct pt_regs *regs) +{ + regs->status = (regs->status & ~SR_VS) | SR_VS_INITIAL; +} + +static inline bool riscv_v_vstate_query(struct pt_regs *regs) +{ + return (regs->status & SR_VS) != 0; +} + static __always_inline void riscv_v_enable(void) { csr_set(CSR_SSTATUS, SR_VS); @@ -32,13 +54,86 @@ static __always_inline void riscv_v_disable(void) csr_clear(CSR_SSTATUS, SR_VS); } +static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) +{ + asm volatile ( + "csrr %0, " __stringify(CSR_VSTART) "\n\t" + "csrr %1, " __stringify(CSR_VTYPE) "\n\t" + "csrr %2, " __stringify(CSR_VL) "\n\t" + "csrr %3, " __stringify(CSR_VCSR) "\n\t" + : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl), + "=r" (dest->vcsr) : :); +} + +static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src) +{ + asm volatile ( + ".option push\n\t" + ".option arch, +v\n\t" + "vsetvl x0, %2, %1\n\t" + ".option pop\n\t" + "csrw " __stringify(CSR_VSTART) ", %0\n\t" + "csrw " __stringify(CSR_VCSR) ", %3\n\t" + : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl), + "r" (src->vcsr) :); +} + +static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, + void *datap) +{ + unsigned long vl; + + riscv_v_enable(); + __vstate_csr_save(save_to); + asm volatile ( + ".option push\n\t" + ".option arch, +v\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + "vse8.v v0, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v8, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v16, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v24, (%1)\n\t" + ".option pop\n\t" + : "=&r" (vl) : "r" (datap) : "memory"); + riscv_v_disable(); +} + +static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_from, + void *datap) +{ + unsigned long vl; + + riscv_v_enable(); + asm volatile ( + ".option push\n\t" + ".option arch, +v\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + "vle8.v v0, (%1)\n\t" + "add %1, %1, %0\n\t" + "vle8.v v8, (%1)\n\t" + "add %1, %1, %0\n\t" + "vle8.v v16, (%1)\n\t" + "add %1, %1, %0\n\t" + "vle8.v v24, (%1)\n\t" + ".option pop\n\t" + : "=&r" (vl) : "r" (datap) : "memory"); + __vstate_csr_restore(restore_from); + riscv_v_disable(); +} + #else /* ! CONFIG_RISCV_ISA_V */ struct pt_regs; static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; } static __always_inline bool has_vector(void) { return false; } +static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } #define riscv_v_vsize (0) +#define riscv_v_vstate_off(regs) do {} while (0) +#define riscv_v_vstate_on(regs) do {} while (0) #endif /* CONFIG_RISCV_ISA_V */ diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h index 882547f6bd5c..586786d023c4 100644 --- a/arch/riscv/include/uapi/asm/ptrace.h +++ b/arch/riscv/include/uapi/asm/ptrace.h @@ -77,6 +77,23 @@ union __riscv_fp_state { struct __riscv_q_ext_state q; }; +struct __riscv_v_ext_state { + unsigned long vstart; + unsigned long vl; + unsigned long vtype; + unsigned long vcsr; + void *datap; + /* + * In signal handler, datap will be set a correct user stack offset + * and vector registers will be copied to the address of datap + * pointer. + * + * In ptrace syscall, datap will be set to zero and the vector + * registers will be copied to the address right after this + * structure. + */ +}; + #endif /* __ASSEMBLY__ */ #endif /* _UAPI_ASM_RISCV_PTRACE_H */ From 3a2df6323defbb42234aaae804a8ad6af397016a Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Mon, 5 Jun 2023 11:07:07 +0000 Subject: [PATCH 33/78] riscv: Add task switch support for vector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds task switch support for vector. It also supports all lengths of vlen. Suggested-by: Andrew Waterman Co-developed-by: Nick Knight Signed-off-by: Nick Knight Co-developed-by: Guo Ren Signed-off-by: Guo Ren Co-developed-by: Vincent Chen Signed-off-by: Vincent Chen Co-developed-by: Ruinland Tsai Signed-off-by: Ruinland Tsai Signed-off-by: Greentime Hu Signed-off-by: Vineet Gupta Signed-off-by: Andy Chiu Reviewed-by: Conor Dooley Reviewed-by: Björn Töpel Reviewed-by: Heiko Stuebner Tested-by: Heiko Stuebner Link: https://lore.kernel.org/r/20230605110724.21391-11-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/processor.h | 1 + arch/riscv/include/asm/switch_to.h | 3 +++ arch/riscv/include/asm/thread_info.h | 3 +++ arch/riscv/include/asm/vector.h | 38 ++++++++++++++++++++++++++++ arch/riscv/kernel/process.c | 19 ++++++++++++++ 5 files changed, 64 insertions(+) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 94a0590c6971..f0ddf691ac5e 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -39,6 +39,7 @@ struct thread_struct { unsigned long s[12]; /* s[0]: frame pointer */ struct __riscv_d_ext_state fstate; unsigned long bad_cause; + struct __riscv_v_ext_state vstate; }; /* Whitelist the fstate from the task_struct for hardened usercopy */ diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 4b96b13dee27..a727be723c56 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -78,6 +79,8 @@ do { \ struct task_struct *__next = (next); \ if (has_fpu()) \ __switch_to_fpu(__prev, __next); \ + if (has_vector()) \ + __switch_to_vector(__prev, __next); \ ((last) = __switch_to(__prev, __next)); \ } while (0) diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index e0d202134b44..97e6f65ec176 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -81,6 +81,9 @@ struct thread_info { .preempt_count = INIT_PREEMPT_COUNT, \ } +void arch_release_task_struct(struct task_struct *tsk); +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); + #endif /* !__ASSEMBLY__ */ /* diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index 3c29f4eb552a..ce6a75e9cf62 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -12,6 +12,9 @@ #ifdef CONFIG_RISCV_ISA_V #include +#include +#include +#include #include #include #include @@ -124,6 +127,38 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ riscv_v_disable(); } +static inline void riscv_v_vstate_save(struct task_struct *task, + struct pt_regs *regs) +{ + if ((regs->status & SR_VS) == SR_VS_DIRTY) { + struct __riscv_v_ext_state *vstate = &task->thread.vstate; + + __riscv_v_vstate_save(vstate, vstate->datap); + __riscv_v_vstate_clean(regs); + } +} + +static inline void riscv_v_vstate_restore(struct task_struct *task, + struct pt_regs *regs) +{ + if ((regs->status & SR_VS) != SR_VS_OFF) { + struct __riscv_v_ext_state *vstate = &task->thread.vstate; + + __riscv_v_vstate_restore(vstate, vstate->datap); + __riscv_v_vstate_clean(regs); + } +} + +static inline void __switch_to_vector(struct task_struct *prev, + struct task_struct *next) +{ + struct pt_regs *regs; + + regs = task_pt_regs(prev); + riscv_v_vstate_save(prev, regs); + riscv_v_vstate_restore(next, task_pt_regs(next)); +} + #else /* ! CONFIG_RISCV_ISA_V */ struct pt_regs; @@ -132,6 +167,9 @@ static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; } static __always_inline bool has_vector(void) { return false; } static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } #define riscv_v_vsize (0) +#define riscv_v_vstate_save(task, regs) do {} while (0) +#define riscv_v_vstate_restore(task, regs) do {} while (0) +#define __switch_to_vector(__prev, __next) do {} while (0) #define riscv_v_vstate_off(regs) do {} while (0) #define riscv_v_vstate_on(regs) do {} while (0) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index e2a060066730..78eb5ac45888 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -24,6 +24,7 @@ #include #include #include +#include register unsigned long gp_in_global __asm__("gp"); @@ -146,12 +147,28 @@ void flush_thread(void) fstate_off(current, task_pt_regs(current)); memset(¤t->thread.fstate, 0, sizeof(current->thread.fstate)); #endif +#ifdef CONFIG_RISCV_ISA_V + /* Reset vector state */ + riscv_v_vstate_off(task_pt_regs(current)); + kfree(current->thread.vstate.datap); + memset(¤t->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); +#endif +} + +void arch_release_task_struct(struct task_struct *tsk) +{ + /* Free the vector context of datap. */ + if (has_vector()) + kfree(tsk->thread.vstate.datap); } int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { fstate_save(src, task_pt_regs(src)); *dst = *src; + /* clear entire V context, including datap for a new task */ + memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); + return 0; } @@ -176,6 +193,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.s[1] = (unsigned long)args->fn_arg; } else { *childregs = *(current_pt_regs()); + /* Turn off status.VS */ + riscv_v_vstate_off(childregs); if (usp) /* User fork */ childregs->sp = usp; if (clone_flags & CLONE_SETTLS) From cd054837243b5f36ff395c21135ff153871180f1 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Mon, 5 Jun 2023 11:07:08 +0000 Subject: [PATCH 34/78] riscv: Allocate user's vector context in the first-use trap Vector unit is disabled by default for all user processes. Thus, a process will take a trap (illegal instruction) into kernel at the first time when it uses Vector. Only after then, the kernel allocates V context and starts take care of the context for that user process. Suggested-by: Richard Henderson Link: https://lore.kernel.org/r/3923eeee-e4dc-0911-40bf-84c34aee962d@linaro.org Signed-off-by: Andy Chiu Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230605110724.21391-12-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/insn.h | 29 ++++++++++ arch/riscv/include/asm/vector.h | 2 + arch/riscv/kernel/traps.c | 26 ++++++++- arch/riscv/kernel/vector.c | 95 +++++++++++++++++++++++++++++++++ 4 files changed, 150 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/insn.h b/arch/riscv/include/asm/insn.h index 8d5c84f2d5ef..4e1505cef8aa 100644 --- a/arch/riscv/include/asm/insn.h +++ b/arch/riscv/include/asm/insn.h @@ -137,6 +137,26 @@ #define RVG_OPCODE_JALR 0x67 #define RVG_OPCODE_JAL 0x6f #define RVG_OPCODE_SYSTEM 0x73 +#define RVG_SYSTEM_CSR_OFF 20 +#define RVG_SYSTEM_CSR_MASK GENMASK(12, 0) + +/* parts of opcode for RVF, RVD and RVQ */ +#define RVFDQ_FL_FS_WIDTH_OFF 12 +#define RVFDQ_FL_FS_WIDTH_MASK GENMASK(3, 0) +#define RVFDQ_FL_FS_WIDTH_W 2 +#define RVFDQ_FL_FS_WIDTH_D 3 +#define RVFDQ_LS_FS_WIDTH_Q 4 +#define RVFDQ_OPCODE_FL 0x07 +#define RVFDQ_OPCODE_FS 0x27 + +/* parts of opcode for RVV */ +#define RVV_OPCODE_VECTOR 0x57 +#define RVV_VL_VS_WIDTH_8 0 +#define RVV_VL_VS_WIDTH_16 5 +#define RVV_VL_VS_WIDTH_32 6 +#define RVV_VL_VS_WIDTH_64 7 +#define RVV_OPCODE_VL RVFDQ_OPCODE_FL +#define RVV_OPCODE_VS RVFDQ_OPCODE_FS /* parts of opcode for RVC*/ #define RVC_OPCODE_C0 0x0 @@ -304,6 +324,15 @@ static __always_inline bool riscv_insn_is_branch(u32 code) (RVC_X(x_, RVC_B_IMM_7_6_OPOFF, RVC_B_IMM_7_6_MASK) << RVC_B_IMM_7_6_OFF) | \ (RVC_IMM_SIGN(x_) << RVC_B_IMM_SIGN_OFF); }) +#define RVG_EXTRACT_SYSTEM_CSR(x) \ + ({typeof(x) x_ = (x); RV_X(x_, RVG_SYSTEM_CSR_OFF, RVG_SYSTEM_CSR_MASK); }) + +#define RVFDQ_EXTRACT_FL_FS_WIDTH(x) \ + ({typeof(x) x_ = (x); RV_X(x_, RVFDQ_FL_FS_WIDTH_OFF, \ + RVFDQ_FL_FS_WIDTH_MASK); }) + +#define RVV_EXRACT_VL_VS_WIDTH(x) RVFDQ_EXTRACT_FL_FS_WIDTH(x) + /* * Get the immediate from a J-type instruction. * diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index ce6a75e9cf62..8e56da67b5cf 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -21,6 +21,7 @@ extern unsigned long riscv_v_vsize; int riscv_v_setup_vsize(void); +bool riscv_v_first_use_handler(struct pt_regs *regs); static __always_inline bool has_vector(void) { @@ -165,6 +166,7 @@ struct pt_regs; static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; } static __always_inline bool has_vector(void) { return false; } +static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } #define riscv_v_vsize (0) #define riscv_v_vstate_save(task, regs) do {} while (0) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 8c258b78c925..05ffdcd1424e 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -26,6 +26,7 @@ #include #include #include +#include int show_unhandled_signals = 1; @@ -145,8 +146,29 @@ DO_ERROR_INFO(do_trap_insn_misaligned, SIGBUS, BUS_ADRALN, "instruction address misaligned"); DO_ERROR_INFO(do_trap_insn_fault, SIGSEGV, SEGV_ACCERR, "instruction access fault"); -DO_ERROR_INFO(do_trap_insn_illegal, - SIGILL, ILL_ILLOPC, "illegal instruction"); + +asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *regs) +{ + if (user_mode(regs)) { + irqentry_enter_from_user_mode(regs); + + local_irq_enable(); + + if (!riscv_v_first_use_handler(regs)) + do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->epc, + "Oops - illegal instruction"); + + irqentry_exit_to_user_mode(regs); + } else { + irqentry_state_t state = irqentry_nmi_enter(regs); + + do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->epc, + "Oops - illegal instruction"); + + irqentry_nmi_exit(regs, state); + } +} + DO_ERROR_INFO(do_trap_load_fault, SIGSEGV, SEGV_ACCERR, "load access fault"); #ifndef CONFIG_RISCV_M_MODE diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index 120f1ce9abf9..9d81d1b2a7f3 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -4,10 +4,19 @@ * Author: Andy Chiu */ #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include +#include #include unsigned long riscv_v_vsize __read_mostly; @@ -34,3 +43,89 @@ int riscv_v_setup_vsize(void) return 0; } + +static bool insn_is_vector(u32 insn_buf) +{ + u32 opcode = insn_buf & __INSN_OPCODE_MASK; + u32 width, csr; + + /* + * All V-related instructions, including CSR operations are 4-Byte. So, + * do not handle if the instruction length is not 4-Byte. + */ + if (unlikely(GET_INSN_LENGTH(insn_buf) != 4)) + return false; + + switch (opcode) { + case RVV_OPCODE_VECTOR: + return true; + case RVV_OPCODE_VL: + case RVV_OPCODE_VS: + width = RVV_EXRACT_VL_VS_WIDTH(insn_buf); + if (width == RVV_VL_VS_WIDTH_8 || width == RVV_VL_VS_WIDTH_16 || + width == RVV_VL_VS_WIDTH_32 || width == RVV_VL_VS_WIDTH_64) + return true; + + break; + case RVG_OPCODE_SYSTEM: + csr = RVG_EXTRACT_SYSTEM_CSR(insn_buf); + if ((csr >= CSR_VSTART && csr <= CSR_VCSR) || + (csr >= CSR_VL && csr <= CSR_VLENB)) + return true; + } + + return false; +} + +static int riscv_v_thread_zalloc(void) +{ + void *datap; + + datap = kzalloc(riscv_v_vsize, GFP_KERNEL); + if (!datap) + return -ENOMEM; + + current->thread.vstate.datap = datap; + memset(¤t->thread.vstate, 0, offsetof(struct __riscv_v_ext_state, + datap)); + return 0; +} + +bool riscv_v_first_use_handler(struct pt_regs *regs) +{ + u32 __user *epc = (u32 __user *)regs->epc; + u32 insn = (u32)regs->badaddr; + + /* Do not handle if V is not supported, or disabled */ + if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V)) + return false; + + /* If V has been enabled then it is not the first-use trap */ + if (riscv_v_vstate_query(regs)) + return false; + + /* Get the instruction */ + if (!insn) { + if (__get_user(insn, epc)) + return false; + } + + /* Filter out non-V instructions */ + if (!insn_is_vector(insn)) + return false; + + /* Sanity check. datap should be null by the time of the first-use trap */ + WARN_ON(current->thread.vstate.datap); + + /* + * Now we sure that this is a V instruction. And it executes in the + * context where VS has been off. So, try to allocate the user's V + * context and resume execution. + */ + if (riscv_v_thread_zalloc()) { + force_sig(SIGBUS); + return true; + } + riscv_v_vstate_on(regs); + return true; +} From 0c59922c769a1361d4699ef6694b59031767a74e Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Mon, 5 Jun 2023 11:07:09 +0000 Subject: [PATCH 35/78] riscv: Add ptrace vector support This patch adds ptrace support for riscv vector. The vector registers will be saved in datap pointer of __riscv_v_ext_state. This pointer will be set right after the __riscv_v_ext_state data structure then it will be put in ubuf for ptrace system call to get or set. It will check if the datap got from ubuf is set to the correct address or not when the ptrace system call is trying to set the vector registers. Co-developed-by: Vincent Chen Signed-off-by: Vincent Chen Signed-off-by: Greentime Hu Signed-off-by: Andy Chiu Reviewed-by: Conor Dooley Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230605110724.21391-13-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/ptrace.h | 7 +++ arch/riscv/kernel/ptrace.c | 70 ++++++++++++++++++++++++++++ include/uapi/linux/elf.h | 1 + 3 files changed, 78 insertions(+) diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h index 586786d023c4..e8d127ec5cf7 100644 --- a/arch/riscv/include/uapi/asm/ptrace.h +++ b/arch/riscv/include/uapi/asm/ptrace.h @@ -94,6 +94,13 @@ struct __riscv_v_ext_state { */ }; +/* + * According to spec: The number of bits in a single vector register, + * VLEN >= ELEN, which must be a power of 2, and must be no greater than + * 2^16 = 65536bits = 8192bytes + */ +#define RISCV_MAX_VLENB (8192) + #endif /* __ASSEMBLY__ */ #endif /* _UAPI_ASM_RISCV_PTRACE_H */ diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 23c48b14a0e7..1d572cf3140f 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -7,6 +7,7 @@ * Copied from arch/tile/kernel/ptrace.c */ +#include #include #include #include @@ -24,6 +25,9 @@ enum riscv_regset { #ifdef CONFIG_FPU REGSET_F, #endif +#ifdef CONFIG_RISCV_ISA_V + REGSET_V, +#endif }; static int riscv_gpr_get(struct task_struct *target, @@ -80,6 +84,61 @@ static int riscv_fpr_set(struct task_struct *target, } #endif +#ifdef CONFIG_RISCV_ISA_V +static int riscv_vr_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct __riscv_v_ext_state *vstate = &target->thread.vstate; + + if (!riscv_v_vstate_query(task_pt_regs(target))) + return -EINVAL; + + /* + * Ensure the vector registers have been saved to the memory before + * copying them to membuf. + */ + if (target == current) + riscv_v_vstate_save(current, task_pt_regs(current)); + + /* Copy vector header from vstate. */ + membuf_write(&to, vstate, offsetof(struct __riscv_v_ext_state, datap)); + membuf_zero(&to, sizeof(vstate->datap)); + + /* Copy all the vector registers from vstate. */ + return membuf_write(&to, vstate->datap, riscv_v_vsize); +} + +static int riscv_vr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret, size; + struct __riscv_v_ext_state *vstate = &target->thread.vstate; + + if (!riscv_v_vstate_query(task_pt_regs(target))) + return -EINVAL; + + /* Copy rest of the vstate except datap */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate, 0, + offsetof(struct __riscv_v_ext_state, datap)); + if (unlikely(ret)) + return ret; + + /* Skip copy datap. */ + size = sizeof(vstate->datap); + count -= size; + ubuf += size; + + /* Copy all the vector registers. */ + pos = 0; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate->datap, + 0, riscv_v_vsize); + return ret; +} +#endif + static const struct user_regset riscv_user_regset[] = { [REGSET_X] = { .core_note_type = NT_PRSTATUS, @@ -99,6 +158,17 @@ static const struct user_regset riscv_user_regset[] = { .set = riscv_fpr_set, }, #endif +#ifdef CONFIG_RISCV_ISA_V + [REGSET_V] = { + .core_note_type = NT_RISCV_VECTOR, + .align = 16, + .n = ((32 * RISCV_MAX_VLENB) + + sizeof(struct __riscv_v_ext_state)) / sizeof(__u32), + .size = sizeof(__u32), + .regset_get = riscv_vr_get, + .set = riscv_vr_set, + }, +#endif }; static const struct user_regset_view riscv_user_native_view = { diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index ac3da855fb19..7d8d9ae36615 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -440,6 +440,7 @@ typedef struct elf64_shdr { #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ #define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */ #define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */ +#define NT_RISCV_VECTOR 0x900 /* RISC-V vector registers */ #define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */ #define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers */ #define NT_LOONGARCH_LSX 0xa02 /* LoongArch Loongson SIMD Extension registers */ From a45cedaa1ac0da7b30882afb42ff9d5285e9bb44 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Mon, 5 Jun 2023 11:07:10 +0000 Subject: [PATCH 36/78] riscv: signal: check fp-reserved words unconditionally In order to let kernel/user locate and identify an extension context on the existing sigframe, we are going to utilize reserved space of fp and encode the information there. And since the sigcontext has already preserved a space for fp context w or w/o CONFIG_FPU, we move those reserved words checking/setting routine back into generic code. This commit also undone an additional logical change carried by the refactor commit 007f5c3589578 ("Refactor FPU code in signal setup/return procedures"). Originally we did not restore fp context if restoring of gpr have failed. And it was fine on the other side. In such way the kernel could keep the regfiles intact, and potentially react at the failing point of restore. Signed-off-by: Andy Chiu Acked-by: Conor Dooley Acked-by: Heiko Stuebner Tested-by: Heiko Stuebner Link: https://lore.kernel.org/r/20230605110724.21391-14-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/signal.c | 55 +++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 9aff9d720590..6b4a5c90bd87 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -40,26 +40,13 @@ static long restore_fp_state(struct pt_regs *regs, { long err; struct __riscv_d_ext_state __user *state = &sc_fpregs->d; - size_t i; err = __copy_from_user(¤t->thread.fstate, state, sizeof(*state)); if (unlikely(err)) return err; fstate_restore(current, regs); - - /* We support no other extension state at this time. */ - for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) { - u32 value; - - err = __get_user(value, &sc_fpregs->q.reserved[i]); - if (unlikely(err)) - break; - if (value != 0) - return -EINVAL; - } - - return err; + return 0; } static long save_fp_state(struct pt_regs *regs, @@ -67,20 +54,9 @@ static long save_fp_state(struct pt_regs *regs, { long err; struct __riscv_d_ext_state __user *state = &sc_fpregs->d; - size_t i; fstate_save(current, regs); err = __copy_to_user(state, ¤t->thread.fstate, sizeof(*state)); - if (unlikely(err)) - return err; - - /* We support no other extension state at this time. */ - for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) { - err = __put_user(0, &sc_fpregs->q.reserved[i]); - if (unlikely(err)) - break; - } - return err; } #else @@ -92,11 +68,30 @@ static long restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) { long err; + size_t i; + /* sc_regs is structured the same as the start of pt_regs */ err = __copy_from_user(regs, &sc->sc_regs, sizeof(sc->sc_regs)); + if (unlikely(err)) + return err; + /* Restore the floating-point state. */ - if (has_fpu()) - err |= restore_fp_state(regs, &sc->sc_fpregs); + if (has_fpu()) { + err = restore_fp_state(regs, &sc->sc_fpregs); + if (unlikely(err)) + return err; + } + + /* We support no other extension state at this time. */ + for (i = 0; i < ARRAY_SIZE(sc->sc_fpregs.q.reserved); i++) { + u32 value; + + err = __get_user(value, &sc->sc_fpregs.q.reserved[i]); + if (unlikely(err)) + break; + if (value != 0) + return -EINVAL; + } return err; } @@ -147,11 +142,17 @@ static long setup_sigcontext(struct rt_sigframe __user *frame, { struct sigcontext __user *sc = &frame->uc.uc_mcontext; long err; + size_t i; + /* sc_regs is structured the same as the start of pt_regs */ err = __copy_to_user(&sc->sc_regs, regs, sizeof(sc->sc_regs)); /* Save the floating-point state. */ if (has_fpu()) err |= save_fp_state(regs, &sc->sc_fpregs); + /* We support no other extension state at this time. */ + for (i = 0; i < ARRAY_SIZE(sc->sc_fpregs.q.reserved); i++) + err |= __put_user(0, &sc->sc_fpregs.q.reserved[i]); + return err; } From 8ee0b41898fa26f66e32237f179b6989c65600d6 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Mon, 5 Jun 2023 11:07:11 +0000 Subject: [PATCH 37/78] riscv: signal: Add sigcontext save/restore for vector This patch facilitates the existing fp-reserved words for placement of the first extension's context header on the user's sigframe. A context header consists of a distinct magic word and the size, including the header itself, of an extension on the stack. Then, the frame is followed by the context of that extension, and then a header + context body for another extension if exists. If there is no more extension to come, then the frame must be ended with a null context header. A special case is rv64gc, where the kernel support no extensions requiring to expose additional regfile to the user. In such case the kernel would place the null context header right after the first reserved word of __riscv_q_ext_state when saving sigframe. And the kernel would check if all reserved words are zeros when a signal handler returns. __riscv_q_ext_state---->| |<-__riscv_extra_ext_header ~ ~ .reserved[0]--->|0 |<- .reserved <-------|magic |<- .hdr | |size |_______ end of sc_fpregs | |ext-bdy| | ~ ~ +)size ------->|magic |<- another context header |size | |ext-bdy| ~ ~ |magic:0|<- null context header |size:0 | The vector registers will be saved in datap pointer. The datap pointer will be allocated dynamically when the task needs in kernel space. On the other hand, datap pointer on the sigframe will be set right after the __riscv_v_ext_state data structure. Co-developed-by: Vincent Chen Signed-off-by: Vincent Chen Signed-off-by: Greentime Hu Suggested-by: Vineet Gupta Suggested-by: Richard Henderson Co-developed-by: Andy Chiu Signed-off-by: Andy Chiu Acked-by: Conor Dooley Acked-by: Heiko Stuebner Tested-by: Heiko Stuebner Link: https://lore.kernel.org/r/20230605110724.21391-15-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/ptrace.h | 15 ++ arch/riscv/include/uapi/asm/sigcontext.h | 16 ++- arch/riscv/kernel/setup.c | 3 + arch/riscv/kernel/signal.c | 174 +++++++++++++++++++++-- 4 files changed, 193 insertions(+), 15 deletions(-) diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h index e8d127ec5cf7..e17c550986a6 100644 --- a/arch/riscv/include/uapi/asm/ptrace.h +++ b/arch/riscv/include/uapi/asm/ptrace.h @@ -71,6 +71,21 @@ struct __riscv_q_ext_state { __u32 reserved[3]; }; +struct __riscv_ctx_hdr { + __u32 magic; + __u32 size; +}; + +struct __riscv_extra_ext_header { + __u32 __padding[129] __attribute__((aligned(16))); + /* + * Reserved for expansion of sigcontext structure. Currently zeroed + * upon signal, and must be zero upon sigreturn. + */ + __u32 reserved; + struct __riscv_ctx_hdr hdr; +}; + union __riscv_fp_state { struct __riscv_f_ext_state f; struct __riscv_d_ext_state d; diff --git a/arch/riscv/include/uapi/asm/sigcontext.h b/arch/riscv/include/uapi/asm/sigcontext.h index 84f2dfcfdbce..8b8a8541673a 100644 --- a/arch/riscv/include/uapi/asm/sigcontext.h +++ b/arch/riscv/include/uapi/asm/sigcontext.h @@ -8,6 +8,17 @@ #include +/* The Magic number for signal context frame header. */ +#define RISCV_V_MAGIC 0x53465457 +#define END_MAGIC 0x0 + +/* The size of END signal context header. */ +#define END_HDR_SIZE 0x0 + +struct __sc_riscv_v_state { + struct __riscv_v_ext_state v_state; +} __attribute__((aligned(16))); + /* * Signal context structure * @@ -16,7 +27,10 @@ */ struct sigcontext { struct user_regs_struct sc_regs; - union __riscv_fp_state sc_fpregs; + union { + union __riscv_fp_state sc_fpregs; + struct __riscv_extra_ext_header sc_extdesc; + }; }; #endif /* _UAPI_ASM_RISCV_SIGCONTEXT_H */ diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 36b026057503..60ebe757ef20 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -262,6 +262,8 @@ static void __init parse_dtb(void) #endif } +extern void __init init_rt_signal_env(void); + void __init setup_arch(char **cmdline_p) { parse_dtb(); @@ -295,6 +297,7 @@ void __init setup_arch(char **cmdline_p) riscv_init_cbo_blocksizes(); riscv_fill_hwcap(); + init_rt_signal_env(); apply_boot_alternatives(); if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) && riscv_isa_extension_available(NULL, ZICBOM)) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 6b4a5c90bd87..c46f3dc039bb 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -19,10 +19,12 @@ #include #include #include +#include #include #include extern u32 __user_rt_sigreturn[2]; +static size_t riscv_v_sc_size __ro_after_init; #define DEBUG_SIG 0 @@ -64,12 +66,87 @@ static long save_fp_state(struct pt_regs *regs, #define restore_fp_state(task, regs) (0) #endif +#ifdef CONFIG_RISCV_ISA_V + +static long save_v_state(struct pt_regs *regs, void __user **sc_vec) +{ + struct __riscv_ctx_hdr __user *hdr; + struct __sc_riscv_v_state __user *state; + void __user *datap; + long err; + + hdr = *sc_vec; + /* Place state to the user's signal context space after the hdr */ + state = (struct __sc_riscv_v_state __user *)(hdr + 1); + /* Point datap right after the end of __sc_riscv_v_state */ + datap = state + 1; + + /* datap is designed to be 16 byte aligned for better performance */ + WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16))); + + riscv_v_vstate_save(current, regs); + /* Copy everything of vstate but datap. */ + err = __copy_to_user(&state->v_state, ¤t->thread.vstate, + offsetof(struct __riscv_v_ext_state, datap)); + /* Copy the pointer datap itself. */ + err |= __put_user(datap, &state->v_state.datap); + /* Copy the whole vector content to user space datap. */ + err |= __copy_to_user(datap, current->thread.vstate.datap, riscv_v_vsize); + /* Copy magic to the user space after saving all vector conetext */ + err |= __put_user(RISCV_V_MAGIC, &hdr->magic); + err |= __put_user(riscv_v_sc_size, &hdr->size); + if (unlikely(err)) + return err; + + /* Only progress the sv_vec if everything has done successfully */ + *sc_vec += riscv_v_sc_size; + return 0; +} + +/* + * Restore Vector extension context from the user's signal frame. This function + * assumes a valid extension header. So magic and size checking must be done by + * the caller. + */ +static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) +{ + long err; + struct __sc_riscv_v_state __user *state = sc_vec; + void __user *datap; + + /* Copy everything of __sc_riscv_v_state except datap. */ + err = __copy_from_user(¤t->thread.vstate, &state->v_state, + offsetof(struct __riscv_v_ext_state, datap)); + if (unlikely(err)) + return err; + + /* Copy the pointer datap itself. */ + err = __get_user(datap, &state->v_state.datap); + if (unlikely(err)) + return err; + /* + * Copy the whole vector content from user space datap. Use + * copy_from_user to prevent information leak. + */ + err = copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); + if (unlikely(err)) + return err; + + riscv_v_vstate_restore(current, regs); + + return err; +} +#else +#define save_v_state(task, regs) (0) +#define __restore_v_state(task, regs) (0) +#endif + static long restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) { + void __user *sc_ext_ptr = &sc->sc_extdesc.hdr; + __u32 rsvd; long err; - size_t i; - /* sc_regs is structured the same as the start of pt_regs */ err = __copy_from_user(regs, &sc->sc_regs, sizeof(sc->sc_regs)); if (unlikely(err)) @@ -82,32 +159,81 @@ static long restore_sigcontext(struct pt_regs *regs, return err; } - /* We support no other extension state at this time. */ - for (i = 0; i < ARRAY_SIZE(sc->sc_fpregs.q.reserved); i++) { - u32 value; + /* Check the reserved word before extensions parsing */ + err = __get_user(rsvd, &sc->sc_extdesc.reserved); + if (unlikely(err)) + return err; + if (unlikely(rsvd)) + return -EINVAL; - err = __get_user(value, &sc->sc_fpregs.q.reserved[i]); + while (!err) { + __u32 magic, size; + struct __riscv_ctx_hdr __user *head = sc_ext_ptr; + + err |= __get_user(magic, &head->magic); + err |= __get_user(size, &head->size); if (unlikely(err)) + return err; + + sc_ext_ptr += sizeof(*head); + switch (magic) { + case END_MAGIC: + if (size != END_HDR_SIZE) + return -EINVAL; + + return 0; + case RISCV_V_MAGIC: + if (!has_vector() || !riscv_v_vstate_query(regs) || + size != riscv_v_sc_size) + return -EINVAL; + + err = __restore_v_state(regs, sc_ext_ptr); break; - if (value != 0) + default: return -EINVAL; + } + sc_ext_ptr = (void __user *)head + size; } return err; } +static size_t get_rt_frame_size(void) +{ + struct rt_sigframe __user *frame; + size_t frame_size; + size_t total_context_size = 0; + + frame_size = sizeof(*frame); + + if (has_vector() && riscv_v_vstate_query(task_pt_regs(current))) + total_context_size += riscv_v_sc_size; + /* + * Preserved a __riscv_ctx_hdr for END signal context header if an + * extension uses __riscv_extra_ext_header + */ + if (total_context_size) + total_context_size += sizeof(struct __riscv_ctx_hdr); + + frame_size += total_context_size; + + frame_size = round_up(frame_size, 16); + return frame_size; +} + SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; struct task_struct *task; sigset_t set; + size_t frame_size = get_rt_frame_size(); /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; frame = (struct rt_sigframe __user *)regs->sp; - if (!access_ok(frame, sizeof(*frame))) + if (!access_ok(frame, frame_size)) goto badframe; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) @@ -141,17 +267,22 @@ static long setup_sigcontext(struct rt_sigframe __user *frame, struct pt_regs *regs) { struct sigcontext __user *sc = &frame->uc.uc_mcontext; + struct __riscv_ctx_hdr __user *sc_ext_ptr = &sc->sc_extdesc.hdr; long err; - size_t i; /* sc_regs is structured the same as the start of pt_regs */ err = __copy_to_user(&sc->sc_regs, regs, sizeof(sc->sc_regs)); /* Save the floating-point state. */ if (has_fpu()) err |= save_fp_state(regs, &sc->sc_fpregs); - /* We support no other extension state at this time. */ - for (i = 0; i < ARRAY_SIZE(sc->sc_fpregs.q.reserved); i++) - err |= __put_user(0, &sc->sc_fpregs.q.reserved[i]); + /* Save the vector state. */ + if (has_vector() && riscv_v_vstate_query(regs)) + err |= save_v_state(regs, (void __user **)&sc_ext_ptr); + /* Write zero to fp-reserved space and check it on restore_sigcontext */ + err |= __put_user(0, &sc->sc_extdesc.reserved); + /* And put END __riscv_ctx_hdr at the end. */ + err |= __put_user(END_MAGIC, &sc_ext_ptr->magic); + err |= __put_user(END_HDR_SIZE, &sc_ext_ptr->size); return err; } @@ -176,6 +307,13 @@ static inline void __user *get_sigframe(struct ksignal *ksig, /* Align the stack frame. */ sp &= ~0xfUL; + /* + * Fail if the size of the altstack is not large enough for the + * sigframe construction. + */ + if (current->sas_ss_size && sp < current->sas_ss_sp) + return (void __user __force *)-1UL; + return (void __user *)sp; } @@ -185,9 +323,10 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct rt_sigframe __user *frame; long err = 0; unsigned long __maybe_unused addr; + size_t frame_size = get_rt_frame_size(); - frame = get_sigframe(ksig, regs, sizeof(*frame)); - if (!access_ok(frame, sizeof(*frame))) + frame = get_sigframe(ksig, regs, frame_size); + if (!access_ok(frame, frame_size)) return -EFAULT; err |= copy_siginfo_to_user(&frame->info, &ksig->info); @@ -320,3 +459,10 @@ void arch_do_signal_or_restart(struct pt_regs *regs) */ restore_saved_sigmask(); } + +void init_rt_signal_env(void); +void __init init_rt_signal_env(void) +{ + riscv_v_sc_size = sizeof(struct __riscv_ctx_hdr) + + sizeof(struct __sc_riscv_v_state) + riscv_v_vsize; +} From e92f469b0771e6db9688a58c0e34a8342da6a6bc Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Mon, 5 Jun 2023 11:07:12 +0000 Subject: [PATCH 38/78] riscv: signal: Report signal frame size to userspace via auxv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vector register belongs to the signal context. They need to be stored and restored as entering and leaving the signal handler. According to the V-extension specification, the maximum length of the vector registers can be 2^16. Hence, if userspace refers to the MINSIGSTKSZ to create a sigframe, it may not be enough. To resolve this problem, this patch refers to the commit 94b07c1f8c39c ("arm64: signal: Report signal frame size to userspace via auxv") to enable userspace to know the minimum required sigframe size through the auxiliary vector and use it to allocate enough memory for signal context. Note that auxv always reports size of the sigframe as if V exists for all starting processes, whenever the kernel has CONFIG_RISCV_ISA_V. The reason is that users usually reference this value to allocate an alternative signal stack, and the user may use V anytime. So the user must reserve a space for V-context in sigframe in case that the signal handler invokes after the kernel allocating V. Signed-off-by: Greentime Hu Signed-off-by: Vincent Chen Signed-off-by: Andy Chiu Acked-by: Conor Dooley Reviewed-by: Björn Töpel Reviewed-by: Guo Ren Reviewed-by: Heiko Stuebner Tested-by: Heiko Stuebner Link: https://lore.kernel.org/r/20230605110724.21391-16-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/elf.h | 9 +++++++++ arch/riscv/include/asm/processor.h | 2 ++ arch/riscv/include/uapi/asm/auxvec.h | 1 + arch/riscv/kernel/signal.c | 20 +++++++++++++++----- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index 30e7d2455960..ca23c4f6c440 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -105,6 +105,15 @@ do { \ get_cache_size(3, CACHE_TYPE_UNIFIED)); \ NEW_AUX_ENT(AT_L3_CACHEGEOMETRY, \ get_cache_geometry(3, CACHE_TYPE_UNIFIED)); \ + /* \ + * Should always be nonzero unless there's a kernel bug. \ + * If we haven't determined a sensible value to give to \ + * userspace, omit the entry: \ + */ \ + if (likely(signal_minsigstksz)) \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, signal_minsigstksz); \ + else \ + NEW_AUX_ENT(AT_IGNORE, 0); \ } while (0) #define ARCH_HAS_SETUP_ADDITIONAL_PAGES struct linux_binprm; diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index f0ddf691ac5e..38ded8c5f207 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -7,6 +7,7 @@ #define _ASM_RISCV_PROCESSOR_H #include +#include #include @@ -81,6 +82,7 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid); extern void riscv_fill_hwcap(void); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); +extern unsigned long signal_minsigstksz __ro_after_init; #endif /* __ASSEMBLY__ */ #endif /* _ASM_RISCV_PROCESSOR_H */ diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h index fb187a33ce58..10aaa83db89e 100644 --- a/arch/riscv/include/uapi/asm/auxvec.h +++ b/arch/riscv/include/uapi/asm/auxvec.h @@ -35,5 +35,6 @@ /* entries in ARCH_DLINFO */ #define AT_VECTOR_SIZE_ARCH 9 +#define AT_MINSIGSTKSZ 51 #endif /* _UAPI_ASM_RISCV_AUXVEC_H */ diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index c46f3dc039bb..f117641c1c49 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -23,6 +23,8 @@ #include #include +unsigned long signal_minsigstksz __ro_after_init; + extern u32 __user_rt_sigreturn[2]; static size_t riscv_v_sc_size __ro_after_init; @@ -197,7 +199,7 @@ static long restore_sigcontext(struct pt_regs *regs, return err; } -static size_t get_rt_frame_size(void) +static size_t get_rt_frame_size(bool cal_all) { struct rt_sigframe __user *frame; size_t frame_size; @@ -205,8 +207,10 @@ static size_t get_rt_frame_size(void) frame_size = sizeof(*frame); - if (has_vector() && riscv_v_vstate_query(task_pt_regs(current))) - total_context_size += riscv_v_sc_size; + if (has_vector()) { + if (cal_all || riscv_v_vstate_query(task_pt_regs(current))) + total_context_size += riscv_v_sc_size; + } /* * Preserved a __riscv_ctx_hdr for END signal context header if an * extension uses __riscv_extra_ext_header @@ -226,7 +230,7 @@ SYSCALL_DEFINE0(rt_sigreturn) struct rt_sigframe __user *frame; struct task_struct *task; sigset_t set; - size_t frame_size = get_rt_frame_size(); + size_t frame_size = get_rt_frame_size(false); /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; @@ -323,7 +327,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct rt_sigframe __user *frame; long err = 0; unsigned long __maybe_unused addr; - size_t frame_size = get_rt_frame_size(); + size_t frame_size = get_rt_frame_size(false); frame = get_sigframe(ksig, regs, frame_size); if (!access_ok(frame, frame_size)) @@ -465,4 +469,10 @@ void __init init_rt_signal_env(void) { riscv_v_sc_size = sizeof(struct __riscv_ctx_hdr) + sizeof(struct __sc_riscv_v_state) + riscv_v_vsize; + /* + * Determine the stack space required for guaranteed signal delivery. + * The signal_minsigstksz will be populated into the AT_MINSIGSTKSZ entry + * in the auxiliary array at process startup. + */ + signal_minsigstksz = get_rt_frame_size(true); } From 76e22fdc2c2658ab595cdda7368d43d2dc16f3f4 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Mon, 5 Jun 2023 11:07:13 +0000 Subject: [PATCH 39/78] riscv: signal: validate altstack to reflect Vector Some extensions, such as Vector, dynamically change footprint on a signal frame, so MINSIGSTKSZ is no longer accurate. For example, an RV64V implementation with vlen = 512 may occupy 2K + 40 + 12 Bytes of a signal frame with the upcoming support. And processes that do not execute any vector instructions do not need to reserve the extra sigframe. So we need a way to guard the allocation size of the sigframe at process runtime according to current status of V. Thus, provide the function sigaltstack_size_valid() to validate its size based on current allocation status of supported extensions. Signed-off-by: Andy Chiu Reviewed-by: Conor Dooley Reviewed-by: Heiko Stuebner Tested-by: Heiko Stuebner Link: https://lore.kernel.org/r/20230605110724.21391-17-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/signal.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index f117641c1c49..180d951d3624 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -476,3 +476,10 @@ void __init init_rt_signal_env(void) */ signal_minsigstksz = get_rt_frame_size(true); } + +#ifdef CONFIG_DYNAMIC_SIGFRAME +bool sigaltstack_size_valid(size_t ss_size) +{ + return ss_size > get_rt_frame_size(false); +} +#endif /* CONFIG_DYNAMIC_SIGFRAME */ From c7cdd96eca2810f5b69c37eb439ec63d59fa1b83 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Mon, 5 Jun 2023 11:07:14 +0000 Subject: [PATCH 40/78] riscv: prevent stack corruption by reserving task_pt_regs(p) early Early function calls, such as setup_vm(), relocate_enable_mmu(), soc_early_init() etc, are free to operate on stack. However, PT_SIZE_ON_STACK bytes at the head of the kernel stack are purposedly reserved for the placement of per-task register context pointed by task_pt_regs(p). Those functions may corrupt task_pt_regs if we overlap the $sp with it. In fact, we had accidentally corrupted sstatus.VS in some tests, treating the kernel to save V context before V was actually allocated, resulting in a kernel panic. Thus, we should skip PT_SIZE_ON_STACK for $sp before making C function calls from the top-level assembly. Co-developed-by: ShihPo Hung Signed-off-by: ShihPo Hung Co-developed-by: Vincent Chen Signed-off-by: Vincent Chen Signed-off-by: Greentime Hu Signed-off-by: Andy Chiu Reviewed-by: Conor Dooley Reviewed-by: Heiko Stuebner Tested-by: Heiko Stuebner Link: https://lore.kernel.org/r/20230605110724.21391-18-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/head.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index e16bb2185d55..11c3b94c4534 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -301,6 +301,7 @@ clear_bss_done: la tp, init_task la sp, init_thread_union + THREAD_SIZE XIP_FIXUP_OFFSET sp + addi sp, sp, -PT_SIZE_ON_STACK #ifdef CONFIG_BUILTIN_DTB la a0, __dtb_start XIP_FIXUP_OFFSET a0 @@ -318,6 +319,7 @@ clear_bss_done: /* Restore C environment */ la tp, init_task la sp, init_thread_union + THREAD_SIZE + addi sp, sp, -PT_SIZE_ON_STACK #ifdef CONFIG_KASAN call kasan_early_init From bf78f1ea6e5108a7ebd55be0853f0716433117a9 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Mon, 5 Jun 2023 11:07:15 +0000 Subject: [PATCH 41/78] riscv: kvm: Add V extension to KVM ISA Add V extension to KVM isa extension list to enable supporting of V extension on VCPUs. Signed-off-by: Vincent Chen Signed-off-by: Greentime Hu Signed-off-by: Andy Chiu Reviewed-by: Conor Dooley Reviewed-by: Anup Patel Acked-by: Anup Patel Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20230605110724.21391-19-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/kvm.h | 1 + arch/riscv/kvm/vcpu.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index f92790c9481a..8feb57c4c2e8 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -121,6 +121,7 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZICBOZ, KVM_RISCV_ISA_EXT_ZBB, KVM_RISCV_ISA_EXT_SSAIA, + KVM_RISCV_ISA_EXT_V, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 8bd9f2a8a0b9..f3282ff371ca 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -57,6 +57,7 @@ static const unsigned long kvm_isa_ext_arr[] = { [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h, [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i, [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, + [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v, KVM_ISA_EXT_ARR(SSAIA), KVM_ISA_EXT_ARR(SSTC), From 0f4b82579716b12bb88257bd7ea80f25c791fb2c Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Mon, 5 Jun 2023 11:07:16 +0000 Subject: [PATCH 42/78] riscv: KVM: Add vector lazy save/restore support This patch adds vector context save/restore for guest VCPUs. To reduce the impact on KVM performance, the implementation imitates the FP context switch mechanism to lazily store and restore the vector context only when the kernel enters/exits the in-kernel run loop and not during the KVM world switch. Signed-off-by: Vincent Chen Signed-off-by: Greentime Hu Signed-off-by: Andy Chiu Reviewed-by: Anup Patel Acked-by: Anup Patel Link: https://lore.kernel.org/r/20230605110724.21391-20-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/kvm_host.h | 2 + arch/riscv/include/asm/kvm_vcpu_vector.h | 82 ++++++++++ arch/riscv/include/uapi/asm/kvm.h | 7 + arch/riscv/kvm/Makefile | 1 + arch/riscv/kvm/vcpu.c | 22 +++ arch/riscv/kvm/vcpu_vector.c | 186 +++++++++++++++++++++++ 6 files changed, 300 insertions(+) create mode 100644 arch/riscv/include/asm/kvm_vcpu_vector.h create mode 100644 arch/riscv/kvm/vcpu_vector.c diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index ee0acccb1d3b..bd47a1dc2ff8 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -145,6 +146,7 @@ struct kvm_cpu_context { unsigned long sstatus; unsigned long hstatus; union __riscv_fp_state fp; + struct __riscv_v_ext_state vector; }; struct kvm_vcpu_csr { diff --git a/arch/riscv/include/asm/kvm_vcpu_vector.h b/arch/riscv/include/asm/kvm_vcpu_vector.h new file mode 100644 index 000000000000..ff994fdd6d0d --- /dev/null +++ b/arch/riscv/include/asm/kvm_vcpu_vector.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 SiFive + * + * Authors: + * Vincent Chen + * Greentime Hu + */ + +#ifndef __KVM_VCPU_RISCV_VECTOR_H +#define __KVM_VCPU_RISCV_VECTOR_H + +#include + +#ifdef CONFIG_RISCV_ISA_V +#include +#include + +static __always_inline void __kvm_riscv_vector_save(struct kvm_cpu_context *context) +{ + __riscv_v_vstate_save(&context->vector, context->vector.datap); +} + +static __always_inline void __kvm_riscv_vector_restore(struct kvm_cpu_context *context) +{ + __riscv_v_vstate_restore(&context->vector, context->vector.datap); +} + +void kvm_riscv_vcpu_vector_reset(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_guest_vector_save(struct kvm_cpu_context *cntx, + unsigned long *isa); +void kvm_riscv_vcpu_guest_vector_restore(struct kvm_cpu_context *cntx, + unsigned long *isa); +void kvm_riscv_vcpu_host_vector_save(struct kvm_cpu_context *cntx); +void kvm_riscv_vcpu_host_vector_restore(struct kvm_cpu_context *cntx); +int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu, + struct kvm_cpu_context *cntx); +void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu); +#else + +struct kvm_cpu_context; + +static inline void kvm_riscv_vcpu_vector_reset(struct kvm_vcpu *vcpu) +{ +} + +static inline void kvm_riscv_vcpu_guest_vector_save(struct kvm_cpu_context *cntx, + unsigned long *isa) +{ +} + +static inline void kvm_riscv_vcpu_guest_vector_restore(struct kvm_cpu_context *cntx, + unsigned long *isa) +{ +} + +static inline void kvm_riscv_vcpu_host_vector_save(struct kvm_cpu_context *cntx) +{ +} + +static inline void kvm_riscv_vcpu_host_vector_restore(struct kvm_cpu_context *cntx) +{ +} + +static inline int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu, + struct kvm_cpu_context *cntx) +{ + return 0; +} + +static inline void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu) +{ +} +#endif + +int kvm_riscv_vcpu_get_reg_vector(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + unsigned long rtype); +int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + unsigned long rtype); +#endif diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 8feb57c4c2e8..855c047e86d4 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -204,6 +204,13 @@ enum KVM_RISCV_SBI_EXT_ID { #define KVM_REG_RISCV_SBI_MULTI_REG_LAST \ KVM_REG_RISCV_SBI_MULTI_REG(KVM_RISCV_SBI_EXT_MAX - 1) +/* V extension registers are mapped as type 9 */ +#define KVM_REG_RISCV_VECTOR (0x09 << KVM_REG_RISCV_TYPE_SHIFT) +#define KVM_REG_RISCV_VECTOR_CSR_REG(name) \ + (offsetof(struct __riscv_v_ext_state, name) / sizeof(unsigned long)) +#define KVM_REG_RISCV_VECTOR_REG(n) \ + ((n) + sizeof(struct __riscv_v_ext_state) / sizeof(unsigned long)) + #endif #endif /* __LINUX_KVM_RISCV_H */ diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index 8031b8912a0d..7b4c21f9aa6a 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -17,6 +17,7 @@ kvm-y += mmu.o kvm-y += vcpu.o kvm-y += vcpu_exit.o kvm-y += vcpu_fp.o +kvm-y += vcpu_vector.o kvm-y += vcpu_insn.o kvm-y += vcpu_switch.o kvm-y += vcpu_sbi.o diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index f3282ff371ca..e5e045852e6a 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), @@ -139,6 +141,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) kvm_riscv_vcpu_fp_reset(vcpu); + kvm_riscv_vcpu_vector_reset(vcpu); + kvm_riscv_vcpu_timer_reset(vcpu); kvm_riscv_vcpu_aia_reset(vcpu); @@ -199,6 +203,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) cntx->hstatus |= HSTATUS_SPVP; cntx->hstatus |= HSTATUS_SPV; + if (kvm_riscv_vcpu_alloc_vector_context(vcpu, cntx)) + return -ENOMEM; + /* By default, make CY, TM, and IR counters accessible in VU mode */ reset_csr->scounteren = 0x7; @@ -242,6 +249,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) /* Free unused pages pre-allocated for G-stage page table mappings */ kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); + + /* Free vector context space for host and guest kernel */ + kvm_riscv_vcpu_free_vector_context(vcpu); } int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) @@ -680,6 +690,9 @@ static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu, return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg); case KVM_REG_RISCV_SBI_EXT: return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg); + case KVM_REG_RISCV_VECTOR: + return kvm_riscv_vcpu_set_reg_vector(vcpu, reg, + KVM_REG_RISCV_VECTOR); default: break; } @@ -709,6 +722,9 @@ static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu, return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg); case KVM_REG_RISCV_SBI_EXT: return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg); + case KVM_REG_RISCV_VECTOR: + return kvm_riscv_vcpu_get_reg_vector(vcpu, reg, + KVM_REG_RISCV_VECTOR); default: break; } @@ -1003,6 +1019,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context); kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context, vcpu->arch.isa); + kvm_riscv_vcpu_host_vector_save(&vcpu->arch.host_context); + kvm_riscv_vcpu_guest_vector_restore(&vcpu->arch.guest_context, + vcpu->arch.isa); kvm_riscv_vcpu_aia_load(vcpu, cpu); @@ -1022,6 +1041,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context); kvm_riscv_vcpu_timer_save(vcpu); + kvm_riscv_vcpu_guest_vector_save(&vcpu->arch.guest_context, + vcpu->arch.isa); + kvm_riscv_vcpu_host_vector_restore(&vcpu->arch.host_context); csr->vsstatus = csr_read(CSR_VSSTATUS); csr->vsie = csr_read(CSR_VSIE); diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c new file mode 100644 index 000000000000..edd2eecbddc2 --- /dev/null +++ b/arch/riscv/kvm/vcpu_vector.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 SiFive + * + * Authors: + * Vincent Chen + * Greentime Hu + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_RISCV_ISA_V +void kvm_riscv_vcpu_vector_reset(struct kvm_vcpu *vcpu) +{ + unsigned long *isa = vcpu->arch.isa; + struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + + cntx->sstatus &= ~SR_VS; + if (riscv_isa_extension_available(isa, v)) { + cntx->sstatus |= SR_VS_INITIAL; + WARN_ON(!cntx->vector.datap); + memset(cntx->vector.datap, 0, riscv_v_vsize); + } else { + cntx->sstatus |= SR_VS_OFF; + } +} + +static void kvm_riscv_vcpu_vector_clean(struct kvm_cpu_context *cntx) +{ + cntx->sstatus &= ~SR_VS; + cntx->sstatus |= SR_VS_CLEAN; +} + +void kvm_riscv_vcpu_guest_vector_save(struct kvm_cpu_context *cntx, + unsigned long *isa) +{ + if ((cntx->sstatus & SR_VS) == SR_VS_DIRTY) { + if (riscv_isa_extension_available(isa, v)) + __kvm_riscv_vector_save(cntx); + kvm_riscv_vcpu_vector_clean(cntx); + } +} + +void kvm_riscv_vcpu_guest_vector_restore(struct kvm_cpu_context *cntx, + unsigned long *isa) +{ + if ((cntx->sstatus & SR_VS) != SR_VS_OFF) { + if (riscv_isa_extension_available(isa, v)) + __kvm_riscv_vector_restore(cntx); + kvm_riscv_vcpu_vector_clean(cntx); + } +} + +void kvm_riscv_vcpu_host_vector_save(struct kvm_cpu_context *cntx) +{ + /* No need to check host sstatus as it can be modified outside */ + if (riscv_isa_extension_available(NULL, v)) + __kvm_riscv_vector_save(cntx); +} + +void kvm_riscv_vcpu_host_vector_restore(struct kvm_cpu_context *cntx) +{ + if (riscv_isa_extension_available(NULL, v)) + __kvm_riscv_vector_restore(cntx); +} + +int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu, + struct kvm_cpu_context *cntx) +{ + cntx->vector.datap = kmalloc(riscv_v_vsize, GFP_KERNEL); + if (!cntx->vector.datap) + return -ENOMEM; + + vcpu->arch.host_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL); + if (!vcpu->arch.host_context.vector.datap) + return -ENOMEM; + + return 0; +} + +void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu) +{ + kfree(vcpu->arch.guest_reset_context.vector.datap); + kfree(vcpu->arch.host_context.vector.datap); +} +#endif + +static void *kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu, + unsigned long reg_num, + size_t reg_size) +{ + struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + void *reg_val; + size_t vlenb = riscv_v_vsize / 32; + + if (reg_num < KVM_REG_RISCV_VECTOR_REG(0)) { + if (reg_size != sizeof(unsigned long)) + return NULL; + switch (reg_num) { + case KVM_REG_RISCV_VECTOR_CSR_REG(vstart): + reg_val = &cntx->vector.vstart; + break; + case KVM_REG_RISCV_VECTOR_CSR_REG(vl): + reg_val = &cntx->vector.vl; + break; + case KVM_REG_RISCV_VECTOR_CSR_REG(vtype): + reg_val = &cntx->vector.vtype; + break; + case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr): + reg_val = &cntx->vector.vcsr; + break; + case KVM_REG_RISCV_VECTOR_CSR_REG(datap): + default: + return NULL; + } + } else if (reg_num <= KVM_REG_RISCV_VECTOR_REG(31)) { + if (reg_size != vlenb) + return NULL; + reg_val = cntx->vector.datap + + (reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb; + } else { + return NULL; + } + + return reg_val; +} + +int kvm_riscv_vcpu_get_reg_vector(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + unsigned long rtype) +{ + unsigned long *isa = vcpu->arch.isa; + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + rtype); + void *reg_val = NULL; + size_t reg_size = KVM_REG_SIZE(reg->id); + + if (rtype == KVM_REG_RISCV_VECTOR && + riscv_isa_extension_available(isa, v)) { + reg_val = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size); + } + + if (!reg_val) + return -EINVAL; + + if (copy_to_user(uaddr, reg_val, reg_size)) + return -EFAULT; + + return 0; +} + +int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + unsigned long rtype) +{ + unsigned long *isa = vcpu->arch.isa; + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + rtype); + void *reg_val = NULL; + size_t reg_size = KVM_REG_SIZE(reg->id); + + if (rtype == KVM_REG_RISCV_VECTOR && + riscv_isa_extension_available(isa, v)) { + reg_val = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size); + } + + if (!reg_val) + return -EINVAL; + + if (copy_from_user(reg_val, uaddr, reg_size)) + return -EFAULT; + + return 0; +} From 50724efcb370c61c64f75614763fb411e087f70c Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Mon, 5 Jun 2023 11:07:17 +0000 Subject: [PATCH 43/78] riscv: hwcap: change ELF_HWCAP to a function Using a function is flexible to represent ELF_HWCAP. So the kernel may encode hwcap reflecting supported hardware features just at the moment of the start of each program. This will be helpful when we introduce prctl/sysctl interface to control per-process availability of Vector extension in following patches. Programs started with V disabled should see V masked off in theirs ELF_HWCAP. Signed-off-by: Andy Chiu Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230605110724.21391-21-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/elf.h | 2 +- arch/riscv/include/asm/hwcap.h | 2 ++ arch/riscv/kernel/cpufeature.c | 5 +++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index ca23c4f6c440..c24280774caf 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -66,7 +66,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr); * via a bitmap that coorespends to each single-letter ISA extension. This is * essentially defunct, but will remain for compatibility with userspace. */ -#define ELF_HWCAP (elf_hwcap & ((1UL << RISCV_ISA_EXT_BASE) - 1)) +#define ELF_HWCAP riscv_get_elf_hwcap() extern unsigned long elf_hwcap; /* diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 574385930ba7..e6c288ac4581 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -61,6 +61,8 @@ #include +unsigned long riscv_get_elf_hwcap(void); + struct riscv_isa_ext_data { /* Name of the extension displayed to userspace via /proc/cpuinfo */ char uprop[RISCV_ISA_EXT_NAME_LEN_MAX]; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 28032b083463..29c0680652a0 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -293,6 +293,11 @@ void __init riscv_fill_hwcap(void) pr_info("riscv: ELF capabilities %s\n", print_str); } +unsigned long riscv_get_elf_hwcap(void) +{ + return (elf_hwcap & ((1UL << RISCV_ISA_EXT_BASE) - 1)); +} + #ifdef CONFIG_RISCV_ALTERNATIVE /* * Alternative patch sites consider 48 bits when determining when to patch From 1fd96a3e9d5d4febe1a8486590ad52c048d1be77 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Mon, 5 Jun 2023 11:07:18 +0000 Subject: [PATCH 44/78] riscv: Add prctl controls for userspace vector management This patch add two riscv-specific prctls, to allow usespace control the use of vector unit: * PR_RISCV_V_SET_CONTROL: control the permission to use Vector at next, or all following execve for a thread. Turning off a thread's Vector live is not possible since libraries may have registered ifunc that may execute Vector instructions. * PR_RISCV_V_GET_CONTROL: get the same permission setting for the current thread, and the setting for following execve(s). Signed-off-by: Andy Chiu Reviewed-by: Greentime Hu Reviewed-by: Vincent Chen Link: https://lore.kernel.org/r/20230605110724.21391-22-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/processor.h | 10 +++ arch/riscv/include/asm/vector.h | 4 + arch/riscv/kernel/cpufeature.c | 9 ++- arch/riscv/kernel/process.c | 1 + arch/riscv/kernel/vector.c | 114 +++++++++++++++++++++++++++++ arch/riscv/kvm/vcpu.c | 2 + include/uapi/linux/prctl.h | 11 +++ kernel/sys.c | 12 +++ 8 files changed, 162 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 38ded8c5f207..e82af1097e26 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -40,6 +40,7 @@ struct thread_struct { unsigned long s[12]; /* s[0]: frame pointer */ struct __riscv_d_ext_state fstate; unsigned long bad_cause; + unsigned long vstate_ctrl; struct __riscv_v_ext_state vstate; }; @@ -83,6 +84,15 @@ extern void riscv_fill_hwcap(void); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); extern unsigned long signal_minsigstksz __ro_after_init; + +#ifdef CONFIG_RISCV_ISA_V +/* Userspace interface for PR_RISCV_V_{SET,GET}_VS prctl()s: */ +#define RISCV_V_SET_CONTROL(arg) riscv_v_vstate_ctrl_set_current(arg) +#define RISCV_V_GET_CONTROL() riscv_v_vstate_ctrl_get_current() +extern long riscv_v_vstate_ctrl_set_current(unsigned long arg); +extern long riscv_v_vstate_ctrl_get_current(void); +#endif /* CONFIG_RISCV_ISA_V */ + #endif /* __ASSEMBLY__ */ #endif /* _ASM_RISCV_PROCESSOR_H */ diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index 8e56da67b5cf..04c0b07bf6cd 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -160,6 +160,9 @@ static inline void __switch_to_vector(struct task_struct *prev, riscv_v_vstate_restore(next, task_pt_regs(next)); } +void riscv_v_vstate_ctrl_init(struct task_struct *tsk); +bool riscv_v_vstate_ctrl_user_allowed(void); + #else /* ! CONFIG_RISCV_ISA_V */ struct pt_regs; @@ -168,6 +171,7 @@ static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; } static __always_inline bool has_vector(void) { return false; } static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } +static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } #define riscv_v_vsize (0) #define riscv_v_vstate_save(task, regs) do {} while (0) #define riscv_v_vstate_restore(task, regs) do {} while (0) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 29c0680652a0..8ae43e40fffc 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -295,7 +295,14 @@ void __init riscv_fill_hwcap(void) unsigned long riscv_get_elf_hwcap(void) { - return (elf_hwcap & ((1UL << RISCV_ISA_EXT_BASE) - 1)); + unsigned long hwcap; + + hwcap = (elf_hwcap & ((1UL << RISCV_ISA_EXT_BASE) - 1)); + + if (!riscv_v_vstate_ctrl_user_allowed()) + hwcap &= ~COMPAT_HWCAP_ISA_V; + + return hwcap; } #ifdef CONFIG_RISCV_ALTERNATIVE diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 78eb5ac45888..e32d737e039f 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -149,6 +149,7 @@ void flush_thread(void) #endif #ifdef CONFIG_RISCV_ISA_V /* Reset vector state */ + riscv_v_vstate_ctrl_init(current); riscv_v_vstate_off(task_pt_regs(current)); kfree(current->thread.vstate.datap); memset(¤t->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index 9d81d1b2a7f3..a7dec9230164 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -19,6 +20,8 @@ #include #include +static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE); + unsigned long riscv_v_vsize __read_mostly; EXPORT_SYMBOL_GPL(riscv_v_vsize); @@ -91,6 +94,43 @@ static int riscv_v_thread_zalloc(void) return 0; } +#define VSTATE_CTRL_GET_CUR(x) ((x) & PR_RISCV_V_VSTATE_CTRL_CUR_MASK) +#define VSTATE_CTRL_GET_NEXT(x) (((x) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2) +#define VSTATE_CTRL_MAKE_NEXT(x) (((x) << 2) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) +#define VSTATE_CTRL_GET_INHERIT(x) (!!((x) & PR_RISCV_V_VSTATE_CTRL_INHERIT)) +static inline int riscv_v_ctrl_get_cur(struct task_struct *tsk) +{ + return VSTATE_CTRL_GET_CUR(tsk->thread.vstate_ctrl); +} + +static inline int riscv_v_ctrl_get_next(struct task_struct *tsk) +{ + return VSTATE_CTRL_GET_NEXT(tsk->thread.vstate_ctrl); +} + +static inline bool riscv_v_ctrl_test_inherit(struct task_struct *tsk) +{ + return VSTATE_CTRL_GET_INHERIT(tsk->thread.vstate_ctrl); +} + +static inline void riscv_v_ctrl_set(struct task_struct *tsk, int cur, int nxt, + bool inherit) +{ + unsigned long ctrl; + + ctrl = cur & PR_RISCV_V_VSTATE_CTRL_CUR_MASK; + ctrl |= VSTATE_CTRL_MAKE_NEXT(nxt); + if (inherit) + ctrl |= PR_RISCV_V_VSTATE_CTRL_INHERIT; + tsk->thread.vstate_ctrl = ctrl; +} + +bool riscv_v_vstate_ctrl_user_allowed(void) +{ + return riscv_v_ctrl_get_cur(current) == PR_RISCV_V_VSTATE_CTRL_ON; +} +EXPORT_SYMBOL_GPL(riscv_v_vstate_ctrl_user_allowed); + bool riscv_v_first_use_handler(struct pt_regs *regs) { u32 __user *epc = (u32 __user *)regs->epc; @@ -129,3 +169,77 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) riscv_v_vstate_on(regs); return true; } + +void riscv_v_vstate_ctrl_init(struct task_struct *tsk) +{ + bool inherit; + int cur, next; + + if (!has_vector()) + return; + + next = riscv_v_ctrl_get_next(tsk); + if (!next) { + if (riscv_v_implicit_uacc) + cur = PR_RISCV_V_VSTATE_CTRL_ON; + else + cur = PR_RISCV_V_VSTATE_CTRL_OFF; + } else { + cur = next; + } + /* Clear next mask if inherit-bit is not set */ + inherit = riscv_v_ctrl_test_inherit(tsk); + if (!inherit) + next = PR_RISCV_V_VSTATE_CTRL_DEFAULT; + + riscv_v_ctrl_set(tsk, cur, next, inherit); +} + +long riscv_v_vstate_ctrl_get_current(void) +{ + if (!has_vector()) + return -EINVAL; + + return current->thread.vstate_ctrl & PR_RISCV_V_VSTATE_CTRL_MASK; +} + +long riscv_v_vstate_ctrl_set_current(unsigned long arg) +{ + bool inherit; + int cur, next; + + if (!has_vector()) + return -EINVAL; + + if (arg & ~PR_RISCV_V_VSTATE_CTRL_MASK) + return -EINVAL; + + cur = VSTATE_CTRL_GET_CUR(arg); + switch (cur) { + case PR_RISCV_V_VSTATE_CTRL_OFF: + /* Do not allow user to turn off V if current is not off */ + if (riscv_v_ctrl_get_cur(current) != PR_RISCV_V_VSTATE_CTRL_OFF) + return -EPERM; + + break; + case PR_RISCV_V_VSTATE_CTRL_ON: + break; + case PR_RISCV_V_VSTATE_CTRL_DEFAULT: + cur = riscv_v_ctrl_get_cur(current); + break; + default: + return -EINVAL; + } + + next = VSTATE_CTRL_GET_NEXT(arg); + inherit = VSTATE_CTRL_GET_INHERIT(arg); + switch (next) { + case PR_RISCV_V_VSTATE_CTRL_DEFAULT: + case PR_RISCV_V_VSTATE_CTRL_OFF: + case PR_RISCV_V_VSTATE_CTRL_ON: + riscv_v_ctrl_set(current, cur, next, inherit); + return 0; + } + + return -EINVAL; +} diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index e5e045852e6a..de24127e7e93 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -88,6 +88,8 @@ static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext) switch (ext) { case KVM_RISCV_ISA_EXT_H: return false; + case KVM_RISCV_ISA_EXT_V: + return riscv_v_vstate_ctrl_user_allowed(); default: break; } diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index f23d9a16507f..3c36aeade991 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -294,4 +294,15 @@ struct prctl_mm_map { #define PR_SET_MEMORY_MERGE 67 #define PR_GET_MEMORY_MERGE 68 + +#define PR_RISCV_V_SET_CONTROL 69 +#define PR_RISCV_V_GET_CONTROL 70 +# define PR_RISCV_V_VSTATE_CTRL_DEFAULT 0 +# define PR_RISCV_V_VSTATE_CTRL_OFF 1 +# define PR_RISCV_V_VSTATE_CTRL_ON 2 +# define PR_RISCV_V_VSTATE_CTRL_INHERIT (1 << 4) +# define PR_RISCV_V_VSTATE_CTRL_CUR_MASK 0x3 +# define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc +# define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 339fee3eff6a..05f838929e72 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -140,6 +140,12 @@ #ifndef GET_TAGGED_ADDR_CTRL # define GET_TAGGED_ADDR_CTRL() (-EINVAL) #endif +#ifndef RISCV_V_SET_CONTROL +# define RISCV_V_SET_CONTROL(a) (-EINVAL) +#endif +#ifndef RISCV_V_GET_CONTROL +# define RISCV_V_GET_CONTROL() (-EINVAL) +#endif /* * this is where the system-wide overflow UID and GID are defined, for @@ -2708,6 +2714,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, error = !!test_bit(MMF_VM_MERGE_ANY, &me->mm->flags); break; #endif + case PR_RISCV_V_SET_CONTROL: + error = RISCV_V_SET_CONTROL(arg2); + break; + case PR_RISCV_V_GET_CONTROL: + error = RISCV_V_GET_CONTROL(); + break; default: error = -EINVAL; break; From 7ca7a7b9b635dbf8428f8e3bb8ea9e9ff5c79bfc Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Mon, 5 Jun 2023 11:07:19 +0000 Subject: [PATCH 45/78] riscv: Add sysctl to set the default vector rule for new processes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support Vector extension, the series exports variable-length vector registers on the signal frame. However, this potentially breaks abi if processing vector registers is required in the signal handler for old binaries. For example, there is such need if user-level context switch is triggerred via signals[1]. For this reason, it is best to leave a decision to distro maintainers, where the enablement of userspace Vector for new launching programs can be controlled. Developers may also need the switch to experiment with. The parameter is configurable through sysctl interface so a distro may turn off Vector early at init script if the break really happens in the wild. The switch will only take effects on new execve() calls once set. This will not effect existing processes that do not call execve(), nor processes which has been set with a non-default vstate_ctrl by making explicit PR_RISCV_V_SET_CONTROL prctl() calls. Link: https://lore.kernel.org/all/87cz4048rp.fsf@all.your.base.are.belong.to.us/ Signed-off-by: Andy Chiu Reviewed-by: Greentime Hu Reviewed-by: Vincent Chen Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20230605110724.21391-23-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vector.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index a7dec9230164..f9c8e19ab301 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -180,7 +180,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk) next = riscv_v_ctrl_get_next(tsk); if (!next) { - if (riscv_v_implicit_uacc) + if (READ_ONCE(riscv_v_implicit_uacc)) cur = PR_RISCV_V_VSTATE_CTRL_ON; else cur = PR_RISCV_V_VSTATE_CTRL_OFF; @@ -243,3 +243,34 @@ long riscv_v_vstate_ctrl_set_current(unsigned long arg) return -EINVAL; } + +#ifdef CONFIG_SYSCTL + +static struct ctl_table riscv_v_default_vstate_table[] = { + { + .procname = "riscv_v_default_allow", + .data = &riscv_v_implicit_uacc, + .maxlen = sizeof(riscv_v_implicit_uacc), + .mode = 0644, + .proc_handler = proc_dobool, + }, + { } +}; + +static int __init riscv_v_sysctl_init(void) +{ + if (has_vector()) + if (!register_sysctl("abi", riscv_v_default_vstate_table)) + return -EINVAL; + return 0; +} + +#else /* ! CONFIG_SYSCTL */ +static int __init riscv_v_sysctl_init(void) { return 0; } +#endif /* ! CONFIG_SYSCTL */ + +static int riscv_v_init(void) +{ + return riscv_v_sysctl_init(); +} +core_initcall(riscv_v_init); From e4bb020f3dbb83912eb6799a9d4bb79da4fd77ec Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Mon, 5 Jun 2023 11:07:20 +0000 Subject: [PATCH 46/78] riscv: detect assembler support for .option arch Some extensions use .option arch directive to selectively enable certain extensions in parts of its assembly code. For example, Zbb uses it to inform assmebler to emit bit manipulation instructions. However, supporting of this directive only exist on GNU assembler and has not landed on clang at the moment, making TOOLCHAIN_HAS_ZBB depend on AS_IS_GNU. While it is still under review at https://reviews.llvm.org/D123515, the upcoming Vector patch also requires this feature in assembler. Thus, provide Kconfig AS_HAS_OPTION_ARCH to detect such feature. Then TOOLCHAIN_HAS_XXX will be turned on automatically when the feature land. Suggested-by: Nathan Chancellor Signed-off-by: Andy Chiu Reviewed-by: Conor Dooley Reviewed-by: Nathan Chancellor Reviewed-by: Heiko Stuebner Tested-by: Heiko Stuebner Link: https://lore.kernel.org/r/20230605110724.21391-24-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 348c0fa1fc8c..1019b519d590 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -262,6 +262,12 @@ config RISCV_DMA_NONCOHERENT config AS_HAS_INSN def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero) +config AS_HAS_OPTION_ARCH + # https://reviews.llvm.org/D123515 + def_bool y + depends on $(as-instr, .option arch$(comma) +m) + depends on !$(as-instr, .option arch$(comma) -i) + source "arch/riscv/Kconfig.socs" source "arch/riscv/Kconfig.errata" @@ -466,7 +472,7 @@ config TOOLCHAIN_HAS_ZBB depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zbb) depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zbb) depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900 - depends on AS_IS_GNU + depends on AS_HAS_OPTION_ARCH config RISCV_ISA_ZBB bool "Zbb extension support for bit manipulation instructions" From fa8e7cce55da3569259dc270801885c420eb50fe Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Mon, 5 Jun 2023 11:07:21 +0000 Subject: [PATCH 47/78] riscv: Enable Vector code to be built This patch adds configs for building Vector code. First it detects the reqired toolchain support for building the code. Then it provides an option setting whether Vector is implicitly enabled to userspace. Signed-off-by: Guo Ren Co-developed-by: Greentime Hu Signed-off-by: Greentime Hu Co-developed-by: Andy Chiu Signed-off-by: Andy Chiu Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230605110724.21391-25-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 31 +++++++++++++++++++++++++++++++ arch/riscv/Makefile | 6 +++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 1019b519d590..f3ba0a8b085e 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -466,6 +466,37 @@ config RISCV_ISA_SVPBMT If you don't know what to do here, say Y. +config TOOLCHAIN_HAS_V + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64iv) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32iv) + depends on LLD_VERSION >= 140000 || LD_VERSION >= 23800 + depends on AS_HAS_OPTION_ARCH + +config RISCV_ISA_V + bool "VECTOR extension support" + depends on TOOLCHAIN_HAS_V + depends on FPU + select DYNAMIC_SIGFRAME + default y + help + Say N here if you want to disable all vector related procedure + in the kernel. + + If you don't know what to do here, say Y. + +config RISCV_ISA_V_DEFAULT_ENABLE + bool "Enable userspace Vector by default" + depends on RISCV_ISA_V + default y + help + Say Y here if you want to enable Vector in userspace by default. + Otherwise, userspace has to make explicit prctl() call to enable + Vector, or enable it via the sysctl interface. + + If you don't know what to do here, say Y. + config TOOLCHAIN_HAS_ZBB bool default y diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 0fb256bf8270..6ec6d52a4180 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -60,6 +60,7 @@ riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c +riscv-march-$(CONFIG_RISCV_ISA_V) := $(riscv-march-y)v ifdef CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC KBUILD_CFLAGS += -Wa,-misa-spec=2.2 @@ -71,7 +72,10 @@ endif # Check if the toolchain supports Zihintpause extension riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause -KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y)) +# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by +# matching non-v and non-multi-letter extensions out with the filter ([^v_]*) +KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/') + KBUILD_AFLAGS += -march=$(riscv-march-y) KBUILD_CFLAGS += -mno-save-restore From 04a4722eeede8f83ce3fefc67b891bd8e132784a Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Mon, 5 Jun 2023 11:07:22 +0000 Subject: [PATCH 48/78] riscv: Add documentation for Vector This patch add a brief documentation of the userspace interface in regard to the RISC-V Vector extension. Signed-off-by: Andy Chiu Reviewed-by: Greentime Hu Reviewed-by: Vincent Chen Co-developed-by: Bagas Sanjaya Signed-off-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20230605110724.21391-26-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- Documentation/riscv/index.rst | 1 + Documentation/riscv/vector.rst | 132 +++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 Documentation/riscv/vector.rst diff --git a/Documentation/riscv/index.rst b/Documentation/riscv/index.rst index 175a91db0200..95cf9c1e1da1 100644 --- a/Documentation/riscv/index.rst +++ b/Documentation/riscv/index.rst @@ -10,6 +10,7 @@ RISC-V architecture hwprobe patch-acceptance uabi + vector features diff --git a/Documentation/riscv/vector.rst b/Documentation/riscv/vector.rst new file mode 100644 index 000000000000..48f189d79e41 --- /dev/null +++ b/Documentation/riscv/vector.rst @@ -0,0 +1,132 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================================= +Vector Extension Support for RISC-V Linux +========================================= + +This document briefly outlines the interface provided to userspace by Linux in +order to support the use of the RISC-V Vector Extension. + +1. prctl() Interface +--------------------- + +Two new prctl() calls are added to allow programs to manage the enablement +status for the use of Vector in userspace. The intended usage guideline for +these interfaces is to give init systems a way to modify the availability of V +for processes running under its domain. Calling thess interfaces is not +recommended in libraries routines because libraries should not override policies +configured from the parant process. Also, users must noted that these interfaces +are not portable to non-Linux, nor non-RISC-V environments, so it is discourage +to use in a portable code. To get the availability of V in an ELF program, +please read :c:macro:`COMPAT_HWCAP_ISA_V` bit of :c:macro:`ELF_HWCAP` in the +auxiliary vector. + +* prctl(PR_RISCV_V_SET_CONTROL, unsigned long arg) + + Sets the Vector enablement status of the calling thread, where the control + argument consists of two 2-bit enablement statuses and a bit for inheritance + mode. Other threads of the calling process are unaffected. + + Enablement status is a tri-state value each occupying 2-bit of space in + the control argument: + + * :c:macro:`PR_RISCV_V_VSTATE_CTRL_DEFAULT`: Use the system-wide default + enablement status on execve(). The system-wide default setting can be + controlled via sysctl interface (see sysctl section below). + + * :c:macro:`PR_RISCV_V_VSTATE_CTRL_ON`: Allow Vector to be run for the + thread. + + * :c:macro:`PR_RISCV_V_VSTATE_CTRL_OFF`: Disallow Vector. Executing Vector + instructions under such condition will trap and casuse the termination of the thread. + + arg: The control argument is a 5-bit value consisting of 3 parts, and + accessed by 3 masks respectively. + + The 3 masks, PR_RISCV_V_VSTATE_CTRL_CUR_MASK, + PR_RISCV_V_VSTATE_CTRL_NEXT_MASK, and PR_RISCV_V_VSTATE_CTRL_INHERIT + represents bit[1:0], bit[3:2], and bit[4]. bit[1:0] accounts for the + enablement status of current thread, and the setting at bit[3:2] takes place + at next execve(). bit[4] defines the inheritance mode of the setting in + bit[3:2]. + + * :c:macro:`PR_RISCV_V_VSTATE_CTRL_CUR_MASK`: bit[1:0]: Account for the + Vector enablement status for the calling thread. The calling thread is + not able to turn off Vector once it has been enabled. The prctl() call + fails with EPERM if the value in this mask is PR_RISCV_V_VSTATE_CTRL_OFF + but the current enablement status is not off. Setting + PR_RISCV_V_VSTATE_CTRL_DEFAULT here takes no effect but to set back + the original enablement status. + + * :c:macro:`PR_RISCV_V_VSTATE_CTRL_NEXT_MASK`: bit[3:2]: Account for the + Vector enablement setting for the calling thread at the next execve() + system call. If PR_RISCV_V_VSTATE_CTRL_DEFAULT is used in this mask, + then the enablement status will be decided by the system-wide + enablement status when execve() happen. + + * :c:macro:`PR_RISCV_V_VSTATE_CTRL_INHERIT`: bit[4]: the inheritance + mode for the setting at PR_RISCV_V_VSTATE_CTRL_NEXT_MASK. If the bit + is set then the following execve() will not clear the setting in both + PR_RISCV_V_VSTATE_CTRL_NEXT_MASK and PR_RISCV_V_VSTATE_CTRL_INHERIT. + This setting persists across changes in the system-wide default value. + + Return value: + * 0 on success; + * EINVAL: Vector not supported, invalid enablement status for current or + next mask; + * EPERM: Turning off Vector in PR_RISCV_V_VSTATE_CTRL_CUR_MASK if Vector + was enabled for the calling thread. + + On success: + * A valid setting for PR_RISCV_V_VSTATE_CTRL_CUR_MASK takes place + immediately. The enablement status specified in + PR_RISCV_V_VSTATE_CTRL_NEXT_MASK happens at the next execve() call, or + all following execve() calls if PR_RISCV_V_VSTATE_CTRL_INHERIT bit is + set. + * Every successful call overwrites a previous setting for the calling + thread. + +* prctl(PR_RISCV_V_GET_CONTROL) + + Gets the same Vector enablement status for the calling thread. Setting for + next execve() call and the inheritance bit are all OR-ed together. + + Note that ELF programs are able to get the availability of V for itself by + reading :c:macro:`COMPAT_HWCAP_ISA_V` bit of :c:macro:`ELF_HWCAP` in the + auxiliary vector. + + Return value: + * a nonnegative value on success; + * EINVAL: Vector not supported. + +2. System runtime configuration (sysctl) +----------------------------------------- + +To mitigate the ABI impact of expansion of the signal stack, a +policy mechanism is provided to the administrators, distro maintainers, and +developers to control the default Vector enablement status for userspace +processes in form of sysctl knob: + +* /proc/sys/abi/riscv_v_default_allow + + Writing the text representation of 0 or 1 to this file sets the default + system enablement status for new starting userspace programs. Valid values + are: + + * 0: Do not allow Vector code to be executed as the default for new processes. + * 1: Allow Vector code to be executed as the default for new processes. + + Reading this file returns the current system default enablement status. + + At every execve() call, a new enablement status of the new process is set to + the system default, unless: + + * PR_RISCV_V_VSTATE_CTRL_INHERIT is set for the calling process, and the + setting in PR_RISCV_V_VSTATE_CTRL_NEXT_MASK is not + PR_RISCV_V_VSTATE_CTRL_DEFAULT. Or, + + * The setting in PR_RISCV_V_VSTATE_CTRL_NEXT_MASK is not + PR_RISCV_V_VSTATE_CTRL_DEFAULT. + + Modifying the system default enablement status does not affect the enablement + status of any existing process of thread that do not make an execve() call. From 7cf6198ce22d92590f9aaa13431001fa97bc0b2b Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Mon, 5 Jun 2023 11:07:23 +0000 Subject: [PATCH 49/78] selftests: Test RISC-V Vector prctl interface This add a test for prctl interface that controls the use of userspace Vector. Signed-off-by: Andy Chiu Link: https://lore.kernel.org/r/20230605110724.21391-27-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- tools/testing/selftests/riscv/Makefile | 2 +- .../testing/selftests/riscv/vector/.gitignore | 2 + tools/testing/selftests/riscv/vector/Makefile | 15 ++ .../riscv/vector/vstate_exec_nolibc.c | 111 ++++++++++ .../selftests/riscv/vector/vstate_prctl.c | 189 ++++++++++++++++++ 5 files changed, 318 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/riscv/vector/.gitignore create mode 100644 tools/testing/selftests/riscv/vector/Makefile create mode 100644 tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c create mode 100644 tools/testing/selftests/riscv/vector/vstate_prctl.c diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile index 32a72902d045..9dd629cc86aa 100644 --- a/tools/testing/selftests/riscv/Makefile +++ b/tools/testing/selftests/riscv/Makefile @@ -5,7 +5,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),riscv)) -RISCV_SUBTARGETS ?= hwprobe +RISCV_SUBTARGETS ?= hwprobe vector else RISCV_SUBTARGETS := endif diff --git a/tools/testing/selftests/riscv/vector/.gitignore b/tools/testing/selftests/riscv/vector/.gitignore new file mode 100644 index 000000000000..4f2b4e8a3b08 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/.gitignore @@ -0,0 +1,2 @@ +vstate_exec_nolibc +vstate_prctl diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile new file mode 100644 index 000000000000..cd6e80bf995d --- /dev/null +++ b/tools/testing/selftests/riscv/vector/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2021 ARM Limited +# Originally tools/testing/arm64/abi/Makefile + +TEST_GEN_PROGS := vstate_prctl +TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc + +include ../../lib.mk + +$(OUTPUT)/vstate_prctl: vstate_prctl.c ../hwprobe/sys_hwprobe.S + $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ + +$(OUTPUT)/vstate_exec_nolibc: vstate_exec_nolibc.c + $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \ + -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c new file mode 100644 index 000000000000..5cbc392944a6 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include + +#define THIS_PROGRAM "./vstate_exec_nolibc" + +int main(int argc, char **argv) +{ + int rc, pid, status, test_inherit = 0; + long ctrl, ctrl_c; + char *exec_argv[2], *exec_envp[2]; + + if (argc > 1) + test_inherit = 1; + + ctrl = my_syscall1(__NR_prctl, PR_RISCV_V_GET_CONTROL); + if (ctrl < 0) { + puts("PR_RISCV_V_GET_CONTROL is not supported\n"); + return ctrl; + } + + if (test_inherit) { + pid = fork(); + if (pid == -1) { + puts("fork failed\n"); + exit(-1); + } + + /* child */ + if (!pid) { + exec_argv[0] = THIS_PROGRAM; + exec_argv[1] = NULL; + exec_envp[0] = NULL; + exec_envp[1] = NULL; + /* launch the program again to check inherit */ + rc = execve(THIS_PROGRAM, exec_argv, exec_envp); + if (rc) { + puts("child execve failed\n"); + exit(-1); + } + } + + } else { + pid = fork(); + if (pid == -1) { + puts("fork failed\n"); + exit(-1); + } + + if (!pid) { + rc = my_syscall1(__NR_prctl, PR_RISCV_V_GET_CONTROL); + if (rc != ctrl) { + puts("child's vstate_ctrl not equal to parent's\n"); + exit(-1); + } + asm volatile (".option push\n\t" + ".option arch, +v\n\t" + "vsetvli x0, x0, e32, m8, ta, ma\n\t" + ".option pop\n\t" + ); + exit(ctrl); + } + } + + rc = waitpid(-1, &status, 0); + + if (WIFEXITED(status) && WEXITSTATUS(status) == -1) { + puts("child exited abnormally\n"); + exit(-1); + } + + if (WIFSIGNALED(status)) { + if (WTERMSIG(status) != SIGILL) { + puts("child was terminated by unexpected signal\n"); + exit(-1); + } + + if ((ctrl & PR_RISCV_V_VSTATE_CTRL_CUR_MASK) != PR_RISCV_V_VSTATE_CTRL_OFF) { + puts("child signaled by illegal V access but vstate_ctrl is not off\n"); + exit(-1); + } + + /* child terminated, and its vstate_ctrl is off */ + exit(ctrl); + } + + ctrl_c = WEXITSTATUS(status); + if (test_inherit) { + if (ctrl & PR_RISCV_V_VSTATE_CTRL_INHERIT) { + if (!(ctrl_c & PR_RISCV_V_VSTATE_CTRL_INHERIT)) { + puts("parent has inherit bit, but child has not\n"); + exit(-1); + } + } + rc = (ctrl & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2; + if (rc != PR_RISCV_V_VSTATE_CTRL_DEFAULT) { + if (rc != (ctrl_c & PR_RISCV_V_VSTATE_CTRL_CUR_MASK)) { + puts("parent's next setting does not equal to child's\n"); + exit(-1); + } + + if (!(ctrl & PR_RISCV_V_VSTATE_CTRL_INHERIT)) { + if ((ctrl_c & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) != + PR_RISCV_V_VSTATE_CTRL_DEFAULT) { + puts("must clear child's next vstate_ctrl if !inherit\n"); + exit(-1); + } + } + } + } + return ctrl; +} diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c new file mode 100644 index 000000000000..b348b475be57 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include + +#include "../../kselftest.h" + +/* + * Rather than relying on having a new enough libc to define this, just do it + * ourselves. This way we don't need to be coupled to a new-enough libc to + * contain the call. + */ +long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, + size_t cpu_count, unsigned long *cpus, unsigned int flags); + +#define NEXT_PROGRAM "./vstate_exec_nolibc" +static int launch_test(int test_inherit) +{ + char *exec_argv[3], *exec_envp[1]; + int rc, pid, status; + + pid = fork(); + if (pid < 0) { + ksft_test_result_fail("fork failed %d", pid); + return -1; + } + + if (!pid) { + exec_argv[0] = NEXT_PROGRAM; + exec_argv[1] = test_inherit != 0 ? "x" : NULL; + exec_argv[2] = NULL; + exec_envp[0] = NULL; + /* launch the program again to check inherit */ + rc = execve(NEXT_PROGRAM, exec_argv, exec_envp); + if (rc) { + perror("execve"); + ksft_test_result_fail("child execve failed %d\n", rc); + exit(-1); + } + } + + rc = waitpid(-1, &status, 0); + if (rc < 0) { + ksft_test_result_fail("waitpid failed\n"); + return -3; + } + + if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) || + WIFSIGNALED(status)) { + ksft_test_result_fail("child exited abnormally\n"); + return -4; + } + + return WEXITSTATUS(status); +} + +int test_and_compare_child(long provided, long expected, int inherit) +{ + int rc; + + rc = prctl(PR_RISCV_V_SET_CONTROL, provided); + if (rc != 0) { + ksft_test_result_fail("prctl with provided arg %lx failed with code %d\n", + provided, rc); + return -1; + } + rc = launch_test(inherit); + if (rc != expected) { + ksft_test_result_fail("Test failed, check %d != %d\n", rc, + expected); + return -2; + } + return 0; +} + +#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT 0 +#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT 2 + +int main(void) +{ + struct riscv_hwprobe pair; + long flag, expected; + long rc; + + pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; + rc = riscv_hwprobe(&pair, 1, 0, NULL, 0); + if (rc < 0) { + ksft_test_result_fail("hwprobe() failed with %d\n", rc); + return -1; + } + + if (pair.key != RISCV_HWPROBE_KEY_IMA_EXT_0) { + ksft_test_result_fail("hwprobe cannot probe RISCV_HWPROBE_KEY_IMA_EXT_0\n"); + return -2; + } + + if (!(pair.value & RISCV_HWPROBE_IMA_V)) { + rc = prctl(PR_RISCV_V_GET_CONTROL); + if (rc != -1 || errno != EINVAL) { + ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without V\n"); + return -3; + } + + rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON); + if (rc != -1 || errno != EINVAL) { + ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without V\n"); + return -4; + } + + ksft_test_result_skip("Vector not supported\n"); + return 0; + } + + flag = PR_RISCV_V_VSTATE_CTRL_ON; + rc = prctl(PR_RISCV_V_SET_CONTROL, flag); + if (rc != 0) { + ksft_test_result_fail("Enabling V for current should always success\n"); + return -5; + } + + flag = PR_RISCV_V_VSTATE_CTRL_OFF; + rc = prctl(PR_RISCV_V_SET_CONTROL, flag); + if (rc != -1 || errno != EPERM) { + ksft_test_result_fail("Disabling current's V alive must fail with EPERM(%d)\n", + errno); + return -5; + } + + /* Turn on next's vector explicitly and test */ + flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; + if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0)) + return -6; + + /* Turn off next's vector explicitly and test */ + flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; + if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 0)) + return -7; + + /* Turn on next's vector explicitly and test inherit */ + flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; + flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; + expected = flag | PR_RISCV_V_VSTATE_CTRL_ON; + if (test_and_compare_child(flag, expected, 0)) + return -8; + + if (test_and_compare_child(flag, expected, 1)) + return -9; + + /* Turn off next's vector explicitly and test inherit */ + flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; + flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; + expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF; + if (test_and_compare_child(flag, expected, 0)) + return -10; + + if (test_and_compare_child(flag, expected, 1)) + return -11; + + /* arguments should fail with EINVAL */ + rc = prctl(PR_RISCV_V_SET_CONTROL, 0xff0); + if (rc != -1 || errno != EINVAL) { + ksft_test_result_fail("Undefined control argument should return EINVAL\n"); + return -12; + } + + rc = prctl(PR_RISCV_V_SET_CONTROL, 0x3); + if (rc != -1 || errno != EINVAL) { + ksft_test_result_fail("Undefined control argument should return EINVAL\n"); + return -12; + } + + rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc); + if (rc != -1 || errno != EINVAL) { + ksft_test_result_fail("Undefined control argument should return EINVAL\n"); + return -12; + } + + rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc); + if (rc != -1 || errno != EINVAL) { + ksft_test_result_fail("Undefined control argument should return EINVAL\n"); + return -12; + } + + ksft_test_result_pass("tests for riscv_v_vstate_ctrl pass\n"); + ksft_exit_pass(); + return 0; +} From 1e72695137ef5afd94000b763f7a35899d0a005e Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Mon, 5 Jun 2023 11:07:24 +0000 Subject: [PATCH 50/78] selftests: add .gitignore file for RISC-V hwprobe The executable file "hwprobe" should be ignored by git, adding it to fix that. Signed-off-by: Andy Chiu Link: https://lore.kernel.org/r/20230605110724.21391-28-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- tools/testing/selftests/riscv/hwprobe/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/riscv/hwprobe/.gitignore diff --git a/tools/testing/selftests/riscv/hwprobe/.gitignore b/tools/testing/selftests/riscv/hwprobe/.gitignore new file mode 100644 index 000000000000..8113dc3bdd03 --- /dev/null +++ b/tools/testing/selftests/riscv/hwprobe/.gitignore @@ -0,0 +1 @@ +hwprobe From c818fea83de4cdf5072c7cf00dd289fc9c6e1c68 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 17 Mar 2023 13:45:12 +0000 Subject: [PATCH 51/78] riscv: say disabling zicbom if no or bad riscv,cbom-block-size found If Zicbom is present but there was no riscv,cbom-blocks-size property found during the cpu feeatures probe, or the cbom-block-size is not valid, then the extension will be disabled. Make the print explicitly say this is disabled to ensure that there is no confusion about what is being done. Signed-off-by: Ben Dooks Reviewed-by: Conor Dooley Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20230317134512.254627-1-ben.dooks@codethink.co.uk Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index de2d16300f69..d1e9e879f577 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -75,10 +75,10 @@ static bool riscv_isa_extension_check(int id) switch (id) { case RISCV_ISA_EXT_ZICBOM: if (!riscv_cbom_block_size) { - pr_err("Zicbom detected in ISA string, but no cbom-block-size found\n"); + pr_err("Zicbom detected in ISA string, disabling as no cbom-block-size found\n"); return false; } else if (!is_power_of_2(riscv_cbom_block_size)) { - pr_err("cbom-block-size present, but is not a power-of-2\n"); + pr_err("Zicbom disabled as cbom-block-size present, but is not a power-of-2\n"); return false; } return true; From de658bcf03339561572e5dad3ec8ecedd1256747 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 9 May 2023 23:26:41 +0800 Subject: [PATCH 52/78] riscv: mm: stub extable related functions/macros for !MMU extable relies on the MMU to work properly, so it's useless to include __ex_table sections and build extable related functions for !MMU case. Signed-off-by: Jisheng Zhang Link: https://lore.kernel.org/r/20230509152641.805-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/asm-extable.h | 6 ++++++ arch/riscv/include/asm/extable.h | 4 ++++ arch/riscv/mm/Makefile | 3 +-- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/asm-extable.h b/arch/riscv/include/asm/asm-extable.h index 14be0673f5b5..00a96e7a9664 100644 --- a/arch/riscv/include/asm/asm-extable.h +++ b/arch/riscv/include/asm/asm-extable.h @@ -7,6 +7,8 @@ #define EX_TYPE_BPF 2 #define EX_TYPE_UACCESS_ERR_ZERO 3 +#ifdef CONFIG_MMU + #ifdef __ASSEMBLY__ #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ @@ -62,4 +64,8 @@ #endif /* __ASSEMBLY__ */ +#else /* CONFIG_MMU */ + #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) +#endif /* CONFIG_MMU */ + #endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/riscv/include/asm/extable.h b/arch/riscv/include/asm/extable.h index 512012d193dc..3eb5c1f7bf34 100644 --- a/arch/riscv/include/asm/extable.h +++ b/arch/riscv/include/asm/extable.h @@ -32,7 +32,11 @@ do { \ (b)->data = (tmp).data; \ } while (0) +#ifdef CONFIG_MMU bool fixup_exception(struct pt_regs *regs); +#else +static inline bool fixup_exception(struct pt_regs *regs) { return false; } +#endif #if defined(CONFIG_BPF_JIT) && defined(CONFIG_ARCH_RV64I) bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs); diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index b85e9e82f082..9c454f90fd3d 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -13,8 +13,7 @@ endif KCOV_INSTRUMENT_init.o := n obj-y += init.o -obj-y += extable.o -obj-$(CONFIG_MMU) += fault.o pageattr.o +obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o obj-y += cacheflush.o obj-y += context.o obj-y += pgtable.o From 650ea2a1dd964ca0a9c55f68dcb614d359c6b7d7 Mon Sep 17 00:00:00 2001 From: Song Shuai Date: Fri, 19 May 2023 14:08:54 +0800 Subject: [PATCH 53/78] riscv: hibernation: Replace jalr with jr before suspend_restore_regs No need to link the x1/ra reg via jalr before suspend_restore_regs So it's better to replace jalr with jr. Signed-off-by: Song Shuai Reviewed-by: JeeHeng Sia Link: https://lore.kernel.org/r/20230519060854.214138-1-suagrfillet@gmail.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/hibernate-asm.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/hibernate-asm.S b/arch/riscv/kernel/hibernate-asm.S index effaf5ca5da0..5c76671c7e15 100644 --- a/arch/riscv/kernel/hibernate-asm.S +++ b/arch/riscv/kernel/hibernate-asm.S @@ -50,7 +50,7 @@ ENTRY(hibernate_restore_image) REG_L s4, restore_pblist REG_L a1, relocated_restore_code - jalr a1 + jr a1 END(hibernate_restore_image) /* @@ -73,5 +73,5 @@ ENTRY(hibernate_core_restore_code) REG_L s4, HIBERN_PBE_NEXT(s4) bnez s4, .Lcopy - jalr s2 + jr s2 END(hibernate_core_restore_code) From c6399b893043a5bb634de8677362f96684f1c0c8 Mon Sep 17 00:00:00 2001 From: Song Shuai Date: Mon, 22 May 2023 10:50:20 +0800 Subject: [PATCH 54/78] riscv: hibernation: Remove duplicate call of suspend_restore_csrs The suspend_restore_csrs is called in both __hibernate_cpu_resume and the `else` of subsequent swsusp_arch_suspend. Removing the first call makes both suspend_{save,restore}_csrs left in swsusp_arch_suspend for clean code. Fixes: c0317210012e ("RISC-V: Add arch functions to support hibernation/suspend-to-disk") Reviewed-by: Conor Dooley Reviewed-by: JeeHeng Sia Signed-off-by: Song Shuai Link: https://lore.kernel.org/r/20230522025020.285042-1-songshuaishuai@tinylab.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/hibernate-asm.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/kernel/hibernate-asm.S b/arch/riscv/kernel/hibernate-asm.S index 5c76671c7e15..d698dd7df637 100644 --- a/arch/riscv/kernel/hibernate-asm.S +++ b/arch/riscv/kernel/hibernate-asm.S @@ -28,7 +28,6 @@ ENTRY(__hibernate_cpu_resume) REG_L a0, hibernate_cpu_context - suspend_restore_csrs suspend_restore_regs /* Return zero value. */ From 3b426d4b5b1462b8da31d4e631ac4f3c6270e9e1 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Wed, 7 Jun 2023 16:54:16 +0530 Subject: [PATCH 55/78] RISC-V: ACPI : Fix for usage of pointers in different address space The arch specific __acpi_map_table can be wrapper around either early_memremap or early_ioremap. But early_memremap routine works with normal pointers whereas __acpi_map_table expects pointers in iomem address space. This causes kernel test bot to fail while using the sparse tool. Fix the issue by using early_ioremap and similar fix done for __acpi_unmap_table. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202305201427.I7QhPjNW-lkp@intel.com/ Fixes: a91a9ffbd3a5 ("RISC-V: Add support to build the ACPI core") Signed-off-by: Sunil V L Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230607112417.782085-2-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c index df5a45a2eb93..5ee03ebab80e 100644 --- a/arch/riscv/kernel/acpi.c +++ b/arch/riscv/kernel/acpi.c @@ -204,7 +204,7 @@ void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size) if (!size) return NULL; - return early_memremap(phys, size); + return early_ioremap(phys, size); } void __init __acpi_unmap_table(void __iomem *map, unsigned long size) @@ -212,7 +212,7 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size) if (!map || !size) return; - early_memunmap(map, size); + early_iounmap(map, size); } void *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) From ca7473cb8312232d8e03808004c54528e9446b73 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Wed, 7 Jun 2023 16:54:17 +0530 Subject: [PATCH 56/78] RISC-V/perf: Use standard interface to get INTC domain Currently the PMU driver is using DT based lookup to find the INTC node for sscofpmf extension. This will not work for ACPI based systems causing the driver to fail to register the PMU overflow interrupt handler. Hence, change the code to use the standard interface to find the INTC node which works irrespective of DT or ACPI. Signed-off-by: Sunil V L Reviewed-by: Conor Dooley Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20230607112417.782085-3-sunilvl@ventanamicro.com Signed-off-by: Palmer Dabbelt --- drivers/perf/riscv_pmu_sbi.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 4f3ac296b3e2..0bc491252a44 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -739,7 +739,6 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde { int ret; struct cpu_hw_events __percpu *hw_events = pmu->hw_events; - struct device_node *cpu, *child; struct irq_domain *domain = NULL; if (riscv_isa_extension_available(NULL, SSCOFPMF)) { @@ -756,20 +755,8 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde if (!riscv_pmu_use_irq) return -EOPNOTSUPP; - for_each_of_cpu_node(cpu) { - child = of_get_compatible_child(cpu, "riscv,cpu-intc"); - if (!child) { - pr_err("Failed to find INTC node\n"); - of_node_put(cpu); - return -ENODEV; - } - domain = irq_find_host(child); - of_node_put(child); - if (domain) { - of_node_put(cpu); - break; - } - } + domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(), + DOMAIN_BUS_ANY); if (!domain) { pr_err("Failed to find INTC IRQ root domain\n"); return -ENODEV; From f20233852ae295fde59c9a28c4a2087d693de3fb Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 9 Jun 2023 16:07:06 +0200 Subject: [PATCH 57/78] dt-bindings: riscv: cpus: drop unneeded quotes Cleanup bindings dropping unneeded quotes. Once all these are fixed, checking for this can be enabled in yamllint. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Conor Dooley Acked-by: Rob Herring Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230609140706.64623-1-krzysztof.kozlowski@linaro.org Signed-off-by: Palmer Dabbelt --- Documentation/devicetree/bindings/riscv/cpus.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index db5253a2a74a..8a56473cdd5a 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -61,7 +61,7 @@ properties: hart. These values originate from the RISC-V Privileged Specification document, available from https://riscv.org/specifications/ - $ref: "/schemas/types.yaml#/definitions/string" + $ref: /schemas/types.yaml#/definitions/string enum: - riscv,sv32 - riscv,sv39 @@ -95,7 +95,7 @@ properties: While the isa strings in ISA specification are case insensitive, letters in the riscv,isa string must be all lowercase. - $ref: "/schemas/types.yaml#/definitions/string" + $ref: /schemas/types.yaml#/definitions/string pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[a-z])+)?(?:_[hsxz](?:[a-z])+)*$ # RISC-V requires 'timebase-frequency' in /cpus, so disallow it here @@ -120,7 +120,7 @@ properties: - interrupt-controller cpu-idle-states: - $ref: '/schemas/types.yaml#/definitions/phandle-array' + $ref: /schemas/types.yaml#/definitions/phandle-array items: maxItems: 1 description: | From c6699baf10647b87b075bf6c65d25b4cd52d4830 Mon Sep 17 00:00:00 2001 From: Evan Green Date: Tue, 9 May 2023 11:25:01 -0700 Subject: [PATCH 58/78] RISC-V: Add Zba, Zbs extension probing Add the Zba address bit manipulation extension and Zbs single bit instructions extension into those the kernel is aware of and maintains in its riscv_isa bitmap. Signed-off-by: Evan Green Reviewed-by: Andrew Jones Reviewed-by: Palmer Dabbelt Reviewed-by: Conor Dooley Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20230509182504.2997252-2-evan@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hwcap.h | 2 ++ arch/riscv/kernel/cpu.c | 2 ++ arch/riscv/kernel/cpufeature.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index e0c40a4c63d5..6b2e8ff4638c 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -46,6 +46,8 @@ #define RISCV_ISA_EXT_ZICBOZ 34 #define RISCV_ISA_EXT_SMAIA 35 #define RISCV_ISA_EXT_SSAIA 36 +#define RISCV_ISA_EXT_ZBA 37 +#define RISCV_ISA_EXT_ZBS 38 #define RISCV_ISA_EXT_MAX 64 #define RISCV_ISA_EXT_NAME_LEN_MAX 32 diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index c96aa56cf1c7..bd294364390d 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -184,7 +184,9 @@ static struct riscv_isa_ext_data isa_ext_arr[] = { __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), + __RISCV_ISA_EXT_DATA(zba, RISCV_ISA_EXT_ZBA), __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB), + __RISCV_ISA_EXT_DATA(zbs, RISCV_ISA_EXT_ZBS), __RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA), __RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA), __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index b1d6b7e4b829..a1954c83638f 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -236,7 +236,9 @@ void __init riscv_fill_hwcap(void) SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL); SET_ISA_EXT_MAP("svnapot", RISCV_ISA_EXT_SVNAPOT); SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT); + SET_ISA_EXT_MAP("zba", RISCV_ISA_EXT_ZBA); SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB); + SET_ISA_EXT_MAP("zbs", RISCV_ISA_EXT_ZBS); SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM); SET_ISA_EXT_MAP("zicboz", RISCV_ISA_EXT_ZICBOZ); SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE); From 82e9c66e81c814e20ee2a3aafb60a9012c79fb40 Mon Sep 17 00:00:00 2001 From: Evan Green Date: Tue, 9 May 2023 11:25:02 -0700 Subject: [PATCH 59/78] RISC-V: Track ISA extensions per hart The kernel maintains a mask of ISA extensions ANDed together across all harts. Let's also keep a bitmap of ISA extensions for each CPU. Although the kernel is currently unlikely to enable a feature that exists only on some CPUs, we want the ability to report asymmetric CPU extensions accurately to usermode. Note that riscv_fill_hwcaps() runs before the per_cpu_offsets are built, which is why I've used a [NR_CPUS] array rather than per_cpu() data. Signed-off-by: Evan Green Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230509182504.2997252-3-evan@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cpufeature.h | 10 ++++++++++ arch/riscv/kernel/cpufeature.c | 18 ++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 808d5403f2ac..23fed53b8815 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -6,6 +6,9 @@ #ifndef _ASM_CPUFEATURE_H #define _ASM_CPUFEATURE_H +#include +#include + /* * These are probed via a device_initcall(), via either the SBI or directly * from the corresponding CSRs. @@ -16,8 +19,15 @@ struct riscv_cpuinfo { unsigned long mimpid; }; +struct riscv_isainfo { + DECLARE_BITMAP(isa, RISCV_ISA_EXT_MAX); +}; + DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); DECLARE_PER_CPU(long, misaligned_access_speed); +/* Per-cpu ISA extensions. */ +extern struct riscv_isainfo hart_isa[NR_CPUS]; + #endif diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index a1954c83638f..e8b7b4b20bb5 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -26,6 +26,9 @@ unsigned long elf_hwcap __read_mostly; /* Host ISA bitmap */ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; +/* Per-cpu ISA extensions. */ +struct riscv_isainfo hart_isa[NR_CPUS]; + /* Performance information */ DEFINE_PER_CPU(long, misaligned_access_speed); @@ -113,14 +116,18 @@ void __init riscv_fill_hwcap(void) bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX); for_each_of_cpu_node(node) { + struct riscv_isainfo *isainfo; unsigned long this_hwcap = 0; - DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX); const char *temp; + unsigned int cpu_id; rc = riscv_of_processor_hartid(node, &hartid); if (rc < 0) continue; + cpu_id = riscv_hartid_to_cpuid(hartid); + isainfo = &hart_isa[cpu_id]; + if (of_property_read_string(node, "riscv,isa", &isa)) { pr_warn("Unable to find \"riscv,isa\" devicetree entry\n"); continue; @@ -137,7 +144,6 @@ void __init riscv_fill_hwcap(void) /* The riscv,isa DT property must start with rv64 or rv32 */ if (temp == isa) continue; - bitmap_zero(this_isa, RISCV_ISA_EXT_MAX); for (; *isa; ++isa) { const char *ext = isa++; const char *ext_end = isa; @@ -215,7 +221,7 @@ void __init riscv_fill_hwcap(void) if ((ext_end - ext == sizeof(name) - 1) && \ !memcmp(ext, name, sizeof(name) - 1) && \ riscv_isa_extension_check(bit)) \ - set_bit(bit, this_isa); \ + set_bit(bit, isainfo->isa); \ } while (false) \ if (unlikely(ext_err)) @@ -225,7 +231,7 @@ void __init riscv_fill_hwcap(void) if (riscv_isa_extension_check(nr)) { this_hwcap |= isa2hwcap[nr]; - set_bit(nr, this_isa); + set_bit(nr, isainfo->isa); } } else { /* sorted alphabetically */ @@ -257,9 +263,9 @@ void __init riscv_fill_hwcap(void) elf_hwcap = this_hwcap; if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX)) - bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX); + bitmap_copy(riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX); else - bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX); + bitmap_and(riscv_isa, riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX); } /* We don't support systems with F but without D, so mask those out From c0baf321038d5fa4273c0dc495d78f39848dd8fc Mon Sep 17 00:00:00 2001 From: Evan Green Date: Tue, 9 May 2023 11:25:03 -0700 Subject: [PATCH 60/78] RISC-V: hwprobe: Expose Zba, Zbb, and Zbs Add two new bits to the IMA_EXT_0 key for ZBA, ZBB, and ZBS extensions. These are accurately reported per CPU. Signed-off-by: Evan Green Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20230509182504.2997252-4-evan@rivosinc.com Signed-off-by: Palmer Dabbelt --- Documentation/riscv/hwprobe.rst | 10 ++++++ arch/riscv/include/uapi/asm/hwprobe.h | 3 ++ arch/riscv/kernel/sys_riscv.c | 48 +++++++++++++++++++++++---- 3 files changed, 54 insertions(+), 7 deletions(-) diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst index 9f0dd62dcb5d..fb25670ef0e5 100644 --- a/Documentation/riscv/hwprobe.rst +++ b/Documentation/riscv/hwprobe.rst @@ -64,6 +64,16 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_IMA_C`: The C extension is supported, as defined by version 2.2 of the RISC-V ISA manual. + * :c:macro:`RISCV_HWPROBE_EXT_ZBA`: The Zba address generation extension is + supported, as defined in version 1.0 of the Bit-Manipulation ISA + extensions. + + * :c:macro:`RISCV_HWPROBE_EXT_ZBB`: The Zbb extension is supported, as defined + in version 1.0 of the Bit-Manipulation ISA extensions. + + * :c:macro:`RISCV_HWPROBE_EXT_ZBS`: The Zbs extension is supported, as defined + in version 1.0 of the Bit-Manipulation ISA extensions. + * :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance information about the selected set of processors. diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 8d745a4ad8a2..853f8f6d9a42 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -25,6 +25,9 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_KEY_IMA_EXT_0 4 #define RISCV_HWPROBE_IMA_FD (1 << 0) #define RISCV_HWPROBE_IMA_C (1 << 1) +#define RISCV_HWPROBE_EXT_ZBA (1 << 2) +#define RISCV_HWPROBE_EXT_ZBB (1 << 3) +#define RISCV_HWPROBE_EXT_ZBS (1 << 4) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 5db29683ebee..fe655db19ab4 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -121,6 +121,46 @@ static void hwprobe_arch_id(struct riscv_hwprobe *pair, pair->value = id; } +static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, + const struct cpumask *cpus) +{ + int cpu; + u64 missing = 0; + + pair->value = 0; + if (has_fpu()) + pair->value |= RISCV_HWPROBE_IMA_FD; + + if (riscv_isa_extension_available(NULL, c)) + pair->value |= RISCV_HWPROBE_IMA_C; + + /* + * Loop through and record extensions that 1) anyone has, and 2) anyone + * doesn't have. + */ + for_each_cpu(cpu, cpus) { + struct riscv_isainfo *isainfo = &hart_isa[cpu]; + + if (riscv_isa_extension_available(isainfo->isa, ZBA)) + pair->value |= RISCV_HWPROBE_EXT_ZBA; + else + missing |= RISCV_HWPROBE_EXT_ZBA; + + if (riscv_isa_extension_available(isainfo->isa, ZBB)) + pair->value |= RISCV_HWPROBE_EXT_ZBB; + else + missing |= RISCV_HWPROBE_EXT_ZBB; + + if (riscv_isa_extension_available(isainfo->isa, ZBS)) + pair->value |= RISCV_HWPROBE_EXT_ZBS; + else + missing |= RISCV_HWPROBE_EXT_ZBS; + } + + /* Now turn off reporting features if any CPU is missing it. */ + pair->value &= ~missing; +} + static u64 hwprobe_misaligned(const struct cpumask *cpus) { int cpu; @@ -164,13 +204,7 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, break; case RISCV_HWPROBE_KEY_IMA_EXT_0: - pair->value = 0; - if (has_fpu()) - pair->value |= RISCV_HWPROBE_IMA_FD; - - if (riscv_isa_extension_available(NULL, c)) - pair->value |= RISCV_HWPROBE_IMA_C; - + hwprobe_isa_ext0(pair, cpus); break; case RISCV_HWPROBE_KEY_CPUPERF_0: From 7d3332be011e4ed061c1403b30b5e54ebccb4fa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 31 May 2023 11:38:17 +0200 Subject: [PATCH 61/78] riscv: mm: Pre-allocate PGD entries for vmalloc/modules area MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RISC-V port requires that kernel PGD entries are to be synchronized between MMs. This is done via the vmalloc_fault() function, that simply copies the PGD entries from init_mm to the faulting one. Historically, faulting in PGD entries have been a source for both bugs [1], and poor performance. One way to get rid of vmalloc faults is by pre-allocating the PGD entries. Pre-allocating the entries potientially wastes 64 * 4K (65 on SV39). The pre-allocation function is pulled from Jörg Rödel's x86 work, with the addition of 3-level page tables (PMD allocations). The pmd_alloc() function needs the ptlock cache to be initialized (when split page locks is enabled), so the pre-allocation is done in a RISC-V specific pgtable_cache_init() implementation. Pre-allocate the kernel PGD entries for the vmalloc/modules area, but only for 64b platforms. Link: https://lore.kernel.org/lkml/20200508144043.13893-1-joro@8bytes.org/ # [1] Signed-off-by: Björn Töpel Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20230531093817.665799-1-bjorn@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/fault.c | 16 ++---------- arch/riscv/mm/init.c | 58 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 14 deletions(-) diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 8685f85a7474..b023fb311e28 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -238,24 +238,12 @@ void handle_page_fault(struct pt_regs *regs) * only copy the information from the master page table, * nothing more. */ - if (unlikely((addr >= VMALLOC_START) && (addr < VMALLOC_END))) { + if ((!IS_ENABLED(CONFIG_MMU) || !IS_ENABLED(CONFIG_64BIT)) && + unlikely(addr >= VMALLOC_START && addr < VMALLOC_END)) { vmalloc_fault(regs, code, addr); return; } -#ifdef CONFIG_64BIT - /* - * Modules in 64bit kernels lie in their own virtual region which is not - * in the vmalloc region, but dealing with page faults in this region - * or the vmalloc region amounts to doing the same thing: checking that - * the mapping exists in init_mm.pgd and updating user page table, so - * just use vmalloc_fault. - */ - if (unlikely(addr >= MODULES_VADDR && addr < MODULES_END)) { - vmalloc_fault(regs, code, addr); - return; - } -#endif /* Enable interrupts if they were enabled in the parent context. */ if (!regs_irqs_disabled(regs)) local_irq_enable(); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 747e5b1ef02d..45ceaff5679e 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1363,3 +1363,61 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, return vmemmap_populate_basepages(start, end, node, NULL); } #endif + +#if defined(CONFIG_MMU) && defined(CONFIG_64BIT) +/* + * Pre-allocates page-table pages for a specific area in the kernel + * page-table. Only the level which needs to be synchronized between + * all page-tables is allocated because the synchronization can be + * expensive. + */ +static void __init preallocate_pgd_pages_range(unsigned long start, unsigned long end, + const char *area) +{ + unsigned long addr; + const char *lvl; + + for (addr = start; addr < end && addr >= start; addr = ALIGN(addr + 1, PGDIR_SIZE)) { + pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + lvl = "p4d"; + p4d = p4d_alloc(&init_mm, pgd, addr); + if (!p4d) + goto failed; + + if (pgtable_l5_enabled) + continue; + + lvl = "pud"; + pud = pud_alloc(&init_mm, p4d, addr); + if (!pud) + goto failed; + + if (pgtable_l4_enabled) + continue; + + lvl = "pmd"; + pmd = pmd_alloc(&init_mm, pud, addr); + if (!pmd) + goto failed; + } + return; + +failed: + /* + * The pages have to be there now or they will be missing in + * process page-tables later. + */ + panic("Failed to pre-allocate %s pages for %s area\n", lvl, area); +} + +void __init pgtable_cache_init(void) +{ + preallocate_pgd_pages_range(VMALLOC_START, VMALLOC_END, "vmalloc"); + if (IS_ENABLED(CONFIG_MODULES)) + preallocate_pgd_pages_range(MODULES_VADDR, MODULES_END, "bpf/modules"); +} +#endif From 648321fa0d970c04b4327ac1a053abf43d285931 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 24 May 2023 00:59:42 +0800 Subject: [PATCH 62/78] riscv: mm: try VMA lock-based page fault handling first Attempt VMA lock-based page fault handling first, and fall back to the existing mmap_lock-based handling if that fails. A simple running the ebizzy benchmark on Lichee Pi 4A shows that PER_VMA_LOCK can improve the ebizzy benchmark by about 32.68%. In theory, the more CPUs, the bigger improvement, but I don't have any HW platform which has more than 4 CPUs. This is the riscv variant of "x86/mm: try VMA lock-based page fault handling first". Signed-off-by: Jisheng Zhang Reviewed-by: Guo Ren Reviewed-by: Suren Baghdasaryan Link: https://lore.kernel.org/r/20230523165942.2630-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/mm/fault.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a3d54cd14fca..a9e8b697fefb 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -44,6 +44,7 @@ config RISCV select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU select ARCH_SUPPORTS_HUGETLBFS if MMU select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU + select ARCH_SUPPORTS_PER_VMA_LOCK if MMU select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index b023fb311e28..e52ed89a0cdb 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -274,6 +274,36 @@ void handle_page_fault(struct pt_regs *regs) flags |= FAULT_FLAG_WRITE; else if (cause == EXC_INST_PAGE_FAULT) flags |= FAULT_FLAG_INSTRUCTION; +#ifdef CONFIG_PER_VMA_LOCK + if (!(flags & FAULT_FLAG_USER)) + goto lock_mmap; + + vma = lock_vma_under_rcu(mm, addr); + if (!vma) + goto lock_mmap; + + if (unlikely(access_error(cause, vma))) { + vma_end_read(vma); + goto lock_mmap; + } + + fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs); + vma_end_read(vma); + + if (!(fault & VM_FAULT_RETRY)) { + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto done; + } + count_vm_vma_lock_event(VMA_LOCK_RETRY); + + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + no_context(regs, addr); + return; + } +lock_mmap: +#endif /* CONFIG_PER_VMA_LOCK */ + retry: mmap_read_lock(mm); vma = find_vma(mm, addr); @@ -343,6 +373,9 @@ good_area: mmap_read_unlock(mm); +#ifdef CONFIG_PER_VMA_LOCK +done: +#endif if (unlikely(fault & VM_FAULT_ERROR)) { tsk->thread.bad_cause = cause; mm_fault_error(regs, addr, fault); From 58b1294dd1d65bb62f08dddbf418f954210c2057 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sun, 23 Apr 2023 09:42:26 +0800 Subject: [PATCH 63/78] riscv: uprobes: Restore thread.bad_cause thread.bad_cause is saved in arch_uprobe_pre_xol(), it should be restored in arch_uprobe_{post,abort}_xol() accordingly, otherwise the save operation is meaningless, this change is similar with x86 and powerpc. Signed-off-by: Tiezhu Yang Acked-by: Oleg Nesterov Reviewed-by: Guo Ren Fixes: 74784081aac8 ("riscv: Add uprobes supported") Link: https://lore.kernel.org/r/1682214146-3756-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/probes/uprobes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c index c976a21cd4bd..194f166b2cc4 100644 --- a/arch/riscv/kernel/probes/uprobes.c +++ b/arch/riscv/kernel/probes/uprobes.c @@ -67,6 +67,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) struct uprobe_task *utask = current->utask; WARN_ON_ONCE(current->thread.bad_cause != UPROBE_TRAP_NR); + current->thread.bad_cause = utask->autask.saved_cause; instruction_pointer_set(regs, utask->vaddr + auprobe->insn_size); @@ -102,6 +103,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; + current->thread.bad_cause = utask->autask.saved_cause; /* * Task has received a fatal signal, so reset back to probbed * address. From 4681dacadeefa5ca6017e00736adc1d7dc963c6a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 23 Apr 2023 22:32:10 +0000 Subject: [PATCH 64/78] riscv: replace deprecated scall with ecall scall is a deprecated alias for ecall. ecall is used in several places, so there is no assembler compatibility concern. Signed-off-by: Fangrui Song Link: https://lore.kernel.org/r/20230423223210.126948-1-maskray@google.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 2 +- arch/riscv/kernel/vdso/rt_sigreturn.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index e9ae284a55c1..143a2bb3e697 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -348,6 +348,6 @@ SYM_CODE_END(excp_vect_table) #ifndef CONFIG_MMU SYM_CODE_START(__user_rt_sigreturn) li a7, __NR_rt_sigreturn - scall + ecall SYM_CODE_END(__user_rt_sigreturn) #endif diff --git a/arch/riscv/kernel/vdso/rt_sigreturn.S b/arch/riscv/kernel/vdso/rt_sigreturn.S index 0573705eac76..10438c7c626a 100644 --- a/arch/riscv/kernel/vdso/rt_sigreturn.S +++ b/arch/riscv/kernel/vdso/rt_sigreturn.S @@ -11,6 +11,6 @@ ENTRY(__vdso_rt_sigreturn) .cfi_startproc .cfi_signal_frame li a7, __NR_rt_sigreturn - scall + ecall .cfi_endproc ENDPROC(__vdso_rt_sigreturn) From ee95b88d71b9cf7ac1085ebc014f161971e1be9a Mon Sep 17 00:00:00 2001 From: Viacheslav Mitrofanov Date: Fri, 5 May 2023 07:20:57 +0000 Subject: [PATCH 65/78] perf: RISC-V: Limit the number of counters returned from SBI Perf gets the number of supported counters from SBI. If it happens that the number of returned counters more than RISCV_MAX_COUNTERS the code trusts it. It does not lead to an immediate problem but can potentially lead to it. Prevent getting more than RISCV_MAX_COUNTERS from SBI. Signed-off-by: Viacheslav Mitrofanov Reviewed-by: Andrew Jones Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20230505072058.1049732-1-v.v.mitrofanov@yadro.com Signed-off-by: Palmer Dabbelt --- drivers/perf/riscv_pmu_sbi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 0bc491252a44..4163ff517471 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -855,6 +855,12 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) goto out_free; } + /* It is possible to get from SBI more than max number of counters */ + if (num_counters > RISCV_MAX_COUNTERS) { + num_counters = RISCV_MAX_COUNTERS; + pr_info("SBI returned more than maximum number of counters. Limiting the number of counters to %d\n", num_counters); + } + /* cache all the information about counters now */ if (pmu_sbi_get_ctrinfo(num_counters, &cmask)) goto out_free; From fed14be476f075a523fd4addfee07cb2f8dc1971 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 7 Jun 2023 21:28:25 +0100 Subject: [PATCH 66/78] RISC-V: simplify register width check in ISA string parsing Saving off the `isa` pointer to a temp variable, followed by checking if it has been incremented is a bit of an odd pattern. Perhaps it was done to avoid a funky looking if statement mixed with the ifdeffery. Now that we use IS_ENABLED() here just return from the parser as soon as we detect a mismatch between the string and the currently running kernel. Reviewed-by: Andrew Jones Signed-off-by: Conor Dooley Reviewed-by: Sunil V L Link: https://lore.kernel.org/r/20230607-splatter-bacterium-a75bb9f0d0b7@spud Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index e3324d661fb9..c8635211fc18 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -126,7 +126,6 @@ void __init riscv_fill_hwcap(void) for_each_possible_cpu(cpu) { unsigned long this_hwcap = 0; DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX); - const char *temp; if (acpi_disabled) { node = of_cpu_device_node_get(cpu); @@ -149,14 +148,14 @@ void __init riscv_fill_hwcap(void) } } - temp = isa; - if (IS_ENABLED(CONFIG_32BIT) && !strncasecmp(isa, "rv32", 4)) - isa += 4; - else if (IS_ENABLED(CONFIG_64BIT) && !strncasecmp(isa, "rv64", 4)) - isa += 4; - /* The riscv,isa DT property must start with rv64 or rv32 */ - if (temp == isa) + if (IS_ENABLED(CONFIG_32BIT) && strncasecmp(isa, "rv32", 4)) continue; + + if (IS_ENABLED(CONFIG_64BIT) && strncasecmp(isa, "rv64", 4)) + continue; + + isa += 4; + bitmap_zero(this_isa, RISCV_ISA_EXT_MAX); for (; *isa; ++isa) { const char *ext = isa++; From 2ac874343749b76e069cff5fea09c49e0bd365a0 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 7 Jun 2023 21:28:26 +0100 Subject: [PATCH 67/78] RISC-V: split early & late of_node to hartid mapping Some back and forth with Drew [1] about riscv_fill_hwcap() resulted in the realisation that it is not very useful to parse the DT & perform validation of riscv,isa every time we would like to get the id for a hart. Although it is no longer called in riscv_fill_hwcap(), riscv_of_processor_hartid() is called in several other places. Notably in setup_smp() it forms part of the logic for filling the mask of possible CPUs. Since a possible CPU must have passed this basic validation of riscv,isa, a repeat validation is not required. Rename riscv_of_processor_id() to riscv_early_of_processor_id(), which will be called from setup_smp() & introduce a new riscv_of_processor_id() which makes use of the pre-populated mask of possible cpus. Link: https://lore.kernel.org/linux-riscv/xvdswl3iyikwvamny7ikrxo2ncuixshtg3f6uucjahpe3xpc5c@ud4cz4fkg5dj/ [1] Reviewed-by: Andrew Jones Signed-off-by: Conor Dooley Reviewed-by: Sunil V L Link: https://lore.kernel.org/r/20230607-glade-pastel-d8cbd9d9f3c6@spud Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/processor.h | 1 + arch/riscv/kernel/cpu.c | 22 +++++++++++++++++++++- arch/riscv/kernel/smpboot.c | 2 +- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 94a0590c6971..3479f9fca4b0 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -75,6 +75,7 @@ static inline void wait_for_interrupt(void) struct device_node; int riscv_of_processor_hartid(struct device_node *node, unsigned long *hartid); +int riscv_early_of_processor_hartid(struct device_node *node, unsigned long *hartid); int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid); extern void riscv_fill_hwcap(void); diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index 637263f9a7b9..8025de06edb7 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -22,6 +22,26 @@ * isn't an enabled and valid RISC-V hart node. */ int riscv_of_processor_hartid(struct device_node *node, unsigned long *hart) +{ + int cpu; + + *hart = (unsigned long)of_get_cpu_hwid(node, 0); + if (*hart == ~0UL) { + pr_warn("Found CPU without hart ID\n"); + return -ENODEV; + } + + cpu = riscv_hartid_to_cpuid(*hart); + if (cpu < 0) + return cpu; + + if (!cpu_possible(cpu)) + return -ENODEV; + + return 0; +} + +int riscv_early_of_processor_hartid(struct device_node *node, unsigned long *hart) { const char *isa; @@ -30,7 +50,7 @@ int riscv_of_processor_hartid(struct device_node *node, unsigned long *hart) return -ENODEV; } - *hart = (unsigned long) of_get_cpu_hwid(node, 0); + *hart = (unsigned long)of_get_cpu_hwid(node, 0); if (*hart == ~0UL) { pr_warn("Found CPU without hart ID\n"); return -ENODEV; diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 67bc5ef3e8b2..3f42331c8912 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -148,7 +148,7 @@ static void __init of_parse_and_init_cpus(void) cpu_set_ops(0); for_each_of_cpu_node(dn) { - rc = riscv_of_processor_hartid(dn, &hart); + rc = riscv_early_of_processor_hartid(dn, &hart); if (rc < 0) continue; From 069b0d51707721d5ab2001df866b66b82e4c1c35 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 7 Jun 2023 21:28:27 +0100 Subject: [PATCH 68/78] RISC-V: validate riscv,isa at boot, not during ISA string parsing Since riscv_fill_hwcap() now only iterates over possible cpus, the basic validation of whether riscv,isa contains "rv" can be moved to riscv_early_of_processor_hartid(). Further, "ima" support is required by the kernel, so reject any CPU not fitting the bill. Reviewed-by: Andrew Jones Signed-off-by: Conor Dooley Reviewed-by: Sunil V L Link: https://lore.kernel.org/r/20230607-guts-blurry-67e711acf328@spud Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpu.c | 8 +++++--- arch/riscv/kernel/cpufeature.c | 12 ++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index 8025de06edb7..dfb4a2a61050 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -65,10 +65,12 @@ int riscv_early_of_processor_hartid(struct device_node *node, unsigned long *har pr_warn("CPU with hartid=%lu has no \"riscv,isa\" property\n", *hart); return -ENODEV; } - if (tolower(isa[0]) != 'r' || tolower(isa[1]) != 'v') { - pr_warn("CPU with hartid=%lu has an invalid ISA of \"%s\"\n", *hart, isa); + + if (IS_ENABLED(CONFIG_32BIT) && strncasecmp(isa, "rv32ima", 7)) + return -ENODEV; + + if (IS_ENABLED(CONFIG_64BIT) && strncasecmp(isa, "rv64ima", 7)) return -ENODEV; - } return 0; } diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index c8635211fc18..c3851c8cfa9c 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -148,12 +148,12 @@ void __init riscv_fill_hwcap(void) } } - if (IS_ENABLED(CONFIG_32BIT) && strncasecmp(isa, "rv32", 4)) - continue; - - if (IS_ENABLED(CONFIG_64BIT) && strncasecmp(isa, "rv64", 4)) - continue; - + /* + * For all possible cpus, we have already validated in + * the boot process that they at least contain "rv" and + * whichever of "32"/"64" this kernel supports, and so this + * section can be skipped. + */ isa += 4; bitmap_zero(this_isa, RISCV_ISA_EXT_MAX); From 6b913e3da87da1be57096c068b4d2e7d4b31f457 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 7 Jun 2023 21:28:28 +0100 Subject: [PATCH 69/78] RISC-V: rework comments in ISA string parser I have found these comments to not be at all helpful whenever I look at the parser. Further, the comments in the default case (single letter parser) are not quite right either. Group the comments into a larger one at the start of each case, that attempts to explain things at a higher level. Reviewed-by: Andrew Jones Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20230607-headpiece-tannery-83ed5cc4856a@spud Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 70 ++++++++++++++++++++++++++++------ 1 file changed, 59 insertions(+), 11 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index c3851c8cfa9c..7dd4589e79a4 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -164,7 +164,7 @@ void __init riscv_fill_hwcap(void) switch (*ext) { case 's': - /** + /* * Workaround for invalid single-letter 's' & 'u'(QEMU). * No need to set the bit in riscv_isa as 's' & 'u' are * not valid ISA extensions. It works until multi-letter @@ -181,53 +181,101 @@ void __init riscv_fill_hwcap(void) case 'X': case 'z': case 'Z': + /* + * Before attempting to parse the extension itself, we find its end. + * As multi-letter extensions must be split from other multi-letter + * extensions with an "_", the end of a multi-letter extension will + * either be the null character or the "_" at the start of the next + * multi-letter extension. + * + * Next, as the extensions version is currently ignored, we + * eliminate that portion. This is done by parsing backwards from + * the end of the extension, removing any numbers. This may be a + * major or minor number however, so the process is repeated if a + * minor number was found. + * + * ext_end is intended to represent the first character *after* the + * name portion of an extension, but will be decremented to the last + * character itself while eliminating the extensions version number. + * A simple re-increment solves this problem. + */ ext_long = true; - /* Multi-letter extension must be delimited */ for (; *isa && *isa != '_'; ++isa) if (unlikely(!isalnum(*isa))) ext_err = true; - /* Parse backwards */ + ext_end = isa; if (unlikely(ext_err)) break; + if (!isdigit(ext_end[-1])) break; - /* Skip the minor version */ + while (isdigit(*--ext_end)) ; - if (tolower(ext_end[0]) != 'p' - || !isdigit(ext_end[-1])) { - /* Advance it to offset the pre-decrement */ + + if (tolower(ext_end[0]) != 'p' || !isdigit(ext_end[-1])) { ++ext_end; break; } - /* Skip the major version */ + while (isdigit(*--ext_end)) ; + ++ext_end; break; default: + /* + * Things are a little easier for single-letter extensions, as they + * are parsed forwards. + * + * After checking that our starting position is valid, we need to + * ensure that, when isa was incremented at the start of the loop, + * that it arrived at the start of the next extension. + * + * If we are already on a non-digit, there is nothing to do. Either + * we have a multi-letter extension's _, or the start of an + * extension. + * + * Otherwise we have found the current extension's major version + * number. Parse past it, and a subsequent p/minor version number + * if present. The `p` extension must not appear immediately after + * a number, so there is no fear of missing it. + * + */ if (unlikely(!isalpha(*ext))) { ext_err = true; break; } - /* Find next extension */ + if (!isdigit(*isa)) break; - /* Skip the minor version */ + while (isdigit(*++isa)) ; + if (tolower(*isa) != 'p') break; + if (!isdigit(*++isa)) { --isa; break; } - /* Skip the major version */ + while (isdigit(*++isa)) ; + break; } + + /* + * The parser expects that at the start of an iteration isa points to the + * character before the start of the next extension. This will not be the + * case if we have just parsed a single-letter extension and the next + * extension is not a multi-letter extension prefixed with an "_". It is + * also not the case at the end of the string, where it will point to the + * terminating null character. + */ if (*isa != '_') --isa; From 7816ebc1ddd16b5cc95febb75f778bf88411a365 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 7 Jun 2023 21:28:29 +0100 Subject: [PATCH 70/78] RISC-V: remove decrement/increment dance in ISA string parser While expanding on the comments in the ISA string parsing code, I noticed that the conditional decrement of `isa` at the end of the loop was a bit odd. The parsing code expects that at the start of the for loop, `isa` will point to the first character of the next unparsed extension. However, depending on what the next extension is, this may not be true. Unless the next extension is a multi-letter extension preceded by an underscore, `isa` will either point to the string's null-terminator or to the first character of the next extension, once the switch statement has been evaluated. Obviously incrementing `isa` at the end of the loop could cause it to increment past the null terminator or miss a single letter extension, so `isa` is conditionally decremented, just so that the loop can increment it again. It's easier to understand the code if, instead of this decrement + increment dance, we instead use a while loop & rely on the handling of individual extension types to leave `isa` pointing to the first character of the next extension. As already mentioned, this won't be the case where the following extension is multi-letter & preceded by an underscore. To handle that, invert the check and increment rather than decrement. Hopefully this eliminates a "huh?!?" moment the next time somebody tries to understand this code. Reviewed-by: Andrew Jones Signed-off-by: Conor Dooley Reviewed-by: Sunil V L Link: https://lore.kernel.org/r/20230607-estate-left-f20faabefb89@spud Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 7dd4589e79a4..84dc44a3e6e5 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -157,7 +157,7 @@ void __init riscv_fill_hwcap(void) isa += 4; bitmap_zero(this_isa, RISCV_ISA_EXT_MAX); - for (; *isa; ++isa) { + while (*isa) { const char *ext = isa++; const char *ext_end = isa; bool ext_long = false, ext_err = false; @@ -270,14 +270,12 @@ void __init riscv_fill_hwcap(void) /* * The parser expects that at the start of an iteration isa points to the - * character before the start of the next extension. This will not be the - * case if we have just parsed a single-letter extension and the next - * extension is not a multi-letter extension prefixed with an "_". It is - * also not the case at the end of the string, where it will point to the - * terminating null character. + * first character of the next extension. As we stop parsing an extension + * on meeting a non-alphanumeric character, an extra increment is needed + * where the succeeding extension is a multi-letter prefixed with an "_". */ - if (*isa != '_') - --isa; + if (*isa == '_') + ++isa; #define SET_ISA_EXT_MAP(name, bit) \ do { \ From 1e5cae98e46d15f4dc7c675e1bd0ed2172ea181c Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 7 Jun 2023 21:28:30 +0100 Subject: [PATCH 71/78] dt-bindings: riscv: explicitly mention assumption of Zicntr & Zihpm support Similar to commit 41ebfc91f785 ("dt-bindings: riscv: explicitly mention assumption of Zicsr & Zifencei support"), the Zicntr and Zihpm extensions also used to be part of the base ISA but were removed after the bindings were merged. Document the assumption of their presence in the base ISA. Suggested-by: Palmer Dabbelt Signed-off-by: Conor Dooley Acked-by: Rob Herring Link: https://lore.kernel.org/r/20230607-rerun-retinal-5e8ba89e98f1@spud Signed-off-by: Palmer Dabbelt --- Documentation/devicetree/bindings/riscv/cpus.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index db5253a2a74a..d5208881a1fb 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -89,8 +89,8 @@ properties: Due to revisions of the ISA specification, some deviations have arisen over time. Notably, riscv,isa was defined prior to the creation of the - Zicsr and Zifencei extensions and thus "i" implies - "zicsr_zifencei". + Zicntr, Zicsr, Zifencei and Zihpm extensions and thus "i" + implies "zicntr_zicsr_zifencei_zihpm". While the isa strings in ISA specification are case insensitive, letters in the riscv,isa string must be all From 07edc32779e3dfe164970fc254291258277219c9 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 7 Jun 2023 21:28:31 +0100 Subject: [PATCH 72/78] RISC-V: always report presence of extensions formerly part of the base ISA Of these four extensions, two were part of the base ISA when the port was written and are required by the kernel. The other two are implied when `i` is in riscv,isa on DT systems. There's not much that userspace can do with this extra information, but there is no harm in reporting an ISA string that closer resembles the current versions of the specifications either. Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20230607-nest-collision-5796b6be8be6@spud Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hwcap.h | 4 ++++ arch/riscv/kernel/cpu.c | 4 ++++ arch/riscv/kernel/cpufeature.c | 17 +++++++++++++++++ 3 files changed, 25 insertions(+) diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index e0c40a4c63d5..e0eb9ad06805 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -46,6 +46,10 @@ #define RISCV_ISA_EXT_ZICBOZ 34 #define RISCV_ISA_EXT_SMAIA 35 #define RISCV_ISA_EXT_SSAIA 36 +#define RISCV_ISA_EXT_ZICNTR 37 +#define RISCV_ISA_EXT_ZICSR 38 +#define RISCV_ISA_EXT_ZIFENCEI 39 +#define RISCV_ISA_EXT_ZIHPM 40 #define RISCV_ISA_EXT_MAX 64 #define RISCV_ISA_EXT_NAME_LEN_MAX 32 diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index dfb4a2a61050..6aea6412cf65 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -208,7 +208,11 @@ arch_initcall(riscv_cpuinfo_init); static struct riscv_isa_ext_data isa_ext_arr[] = { __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), + __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), + __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), + __RISCV_ISA_EXT_DATA(zifencei, RISCV_ISA_EXT_ZIFENCEI), __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), + __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM), __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB), __RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA), __RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA), diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 84dc44a3e6e5..d21f7e8a33ef 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -311,6 +311,23 @@ void __init riscv_fill_hwcap(void) #undef SET_ISA_EXT_MAP } + /* + * Linux requires the following extensions, so we may as well + * always set them. + */ + set_bit(RISCV_ISA_EXT_ZICSR, this_isa); + set_bit(RISCV_ISA_EXT_ZIFENCEI, this_isa); + + /* + * These ones were as they were part of the base ISA when the + * port & dt-bindings were upstreamed, and so can be set + * unconditionally where `i` is in riscv,isa on DT systems. + */ + if (acpi_disabled) { + set_bit(RISCV_ISA_EXT_ZICNTR, this_isa); + set_bit(RISCV_ISA_EXT_ZIHPM, this_isa); + } + /* * All "okay" hart should have same isa. Set HWCAP based on * common capabilities of every "okay" hart, in case they don't From 163e76cc6ef43b7a5e9b6e245a6d6667c9d9b4a7 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 13 Jun 2023 21:30:16 -0400 Subject: [PATCH 73/78] riscv: stack: Support HAVE_IRQ_EXIT_ON_IRQ_STACK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add independent irq stacks for percpu to prevent kernel stack overflows. It is also compatible with VMAP_STACK by arch_alloc_vmap_stack. Tested-by: Jisheng Zhang Signed-off-by: Guo Ren Signed-off-by: Guo Ren Cc: Clément Léger Link: https://lore.kernel.org/r/20230614013018.2168426-2-guoren@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 7 ++++++ arch/riscv/include/asm/irq_stack.h | 30 ++++++++++++++++++++++++ arch/riscv/include/asm/thread_info.h | 2 ++ arch/riscv/kernel/irq.c | 33 ++++++++++++++++++++++++++ arch/riscv/kernel/traps.c | 35 ++++++++++++++++++++++++++-- 5 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 arch/riscv/include/asm/irq_stack.h diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a3d54cd14fca..a8368fe7be14 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -590,6 +590,13 @@ config FPU If you don't know what to do here, say Y. +config IRQ_STACKS + bool "Independent irq stacks" if EXPERT + default y + select HAVE_IRQ_EXIT_ON_IRQ_STACK + help + Add independent irq stacks for percpu to prevent kernel stack overflows. + endmenu # "Platform type" menu "Kernel features" diff --git a/arch/riscv/include/asm/irq_stack.h b/arch/riscv/include/asm/irq_stack.h new file mode 100644 index 000000000000..e4042d297580 --- /dev/null +++ b/arch/riscv/include/asm/irq_stack.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_RISCV_IRQ_STACK_H +#define _ASM_RISCV_IRQ_STACK_H + +#include +#include +#include +#include +#include +#include + +DECLARE_PER_CPU(ulong *, irq_stack_ptr); + +#ifdef CONFIG_VMAP_STACK +/* + * To ensure that VMAP'd stack overflow detection works correctly, all VMAP'd + * stacks need to have the same alignment. + */ +static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node) +{ + void *p; + + p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node, + __builtin_return_address(0)); + return kasan_reset_tag(p); +} +#endif /* CONFIG_VMAP_STACK */ + +#endif /* _ASM_RISCV_IRQ_STACK_H */ diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 97e6f65ec176..2f32875276b0 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -40,6 +40,8 @@ #define OVERFLOW_STACK_SIZE SZ_4K #define SHADOW_OVERFLOW_STACK_SIZE (1024) +#define IRQ_STACK_SIZE THREAD_SIZE + #ifndef __ASSEMBLY__ extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)]; diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index eb9a68a539e6..a1dcf8e43b3c 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -28,6 +28,38 @@ struct fwnode_handle *riscv_get_intc_hwnode(void) } EXPORT_SYMBOL_GPL(riscv_get_intc_hwnode); +#ifdef CONFIG_IRQ_STACKS +#include + +DEFINE_PER_CPU(ulong *, irq_stack_ptr); + +#ifdef CONFIG_VMAP_STACK +static void init_irq_stacks(void) +{ + int cpu; + ulong *p; + + for_each_possible_cpu(cpu) { + p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, cpu_to_node(cpu)); + per_cpu(irq_stack_ptr, cpu) = p; + } +} +#else +/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */ +DEFINE_PER_CPU_ALIGNED(ulong [IRQ_STACK_SIZE/sizeof(ulong)], irq_stack); + +static void init_irq_stacks(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu); +} +#endif /* CONFIG_VMAP_STACK */ +#else +static void init_irq_stacks(void) {} +#endif /* CONFIG_IRQ_STACKS */ + int arch_show_interrupts(struct seq_file *p, int prec) { show_ipi_stats(p, prec); @@ -36,6 +68,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) void __init init_IRQ(void) { + init_irq_stacks(); irqchip_init(); if (!handle_arch_irq) panic("No interrupt controller found."); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 05ffdcd1424e..5158961ea977 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -27,6 +27,7 @@ #include #include #include +#include int show_unhandled_signals = 1; @@ -327,16 +328,46 @@ asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs) } #endif -asmlinkage __visible noinstr void do_irq(struct pt_regs *regs) +static void noinstr handle_riscv_irq(struct pt_regs *regs) { struct pt_regs *old_regs; - irqentry_state_t state = irqentry_enter(regs); irq_enter_rcu(); old_regs = set_irq_regs(regs); handle_arch_irq(regs); set_irq_regs(old_regs); irq_exit_rcu(); +} + +asmlinkage void noinstr do_irq(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); +#ifdef CONFIG_IRQ_STACKS + if (on_thread_stack()) { + ulong *sp = per_cpu(irq_stack_ptr, smp_processor_id()) + + IRQ_STACK_SIZE/sizeof(ulong); + __asm__ __volatile( + "addi sp, sp, -"RISCV_SZPTR "\n" + REG_S" ra, (sp) \n" + "addi sp, sp, -"RISCV_SZPTR "\n" + REG_S" s0, (sp) \n" + "addi s0, sp, 2*"RISCV_SZPTR "\n" + "move sp, %[sp] \n" + "move a0, %[regs] \n" + "call handle_riscv_irq \n" + "addi sp, s0, -2*"RISCV_SZPTR"\n" + REG_L" s0, (sp) \n" + "addi sp, sp, "RISCV_SZPTR "\n" + REG_L" ra, (sp) \n" + "addi sp, sp, "RISCV_SZPTR "\n" + : + : [sp] "r" (sp), [regs] "r" (regs) + : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", + "t0", "t1", "t2", "t3", "t4", "t5", "t6", + "memory"); + } else +#endif + handle_riscv_irq(regs); irqentry_exit(regs, state); } From dd69d07a5a6c5a9ada85321ab0695e7978fc6f3e Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 13 Jun 2023 21:30:17 -0400 Subject: [PATCH 74/78] riscv: stack: Support HAVE_SOFTIRQ_ON_OWN_STACK Add the HAVE_SOFTIRQ_ON_OWN_STACK feature for the IRQ_STACKS config, and the irq and softirq use the same irq_stack of percpu. Tested-by: Jisheng Zhang Signed-off-by: Guo Ren Signed-off-by: Guo Ren Link: https://lore.kernel.org/r/20230614013018.2168426-3-guoren@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 6 ++++-- arch/riscv/kernel/irq.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a8368fe7be14..f515cb101c19 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -591,11 +591,13 @@ config FPU If you don't know what to do here, say Y. config IRQ_STACKS - bool "Independent irq stacks" if EXPERT + bool "Independent irq & softirq stacks" if EXPERT default y select HAVE_IRQ_EXIT_ON_IRQ_STACK + select HAVE_SOFTIRQ_ON_OWN_STACK help - Add independent irq stacks for percpu to prevent kernel stack overflows. + Add independent irq & softirq stacks for percpu to prevent kernel stack + overflows. We may save some memory footprint by disabling IRQ_STACKS. endmenu # "Platform type" diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index a1dcf8e43b3c..d0577cc6a081 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -11,6 +11,9 @@ #include #include #include +#include +#include +#include static struct fwnode_handle *(*__get_intc_node)(void); @@ -56,6 +59,38 @@ static void init_irq_stacks(void) per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu); } #endif /* CONFIG_VMAP_STACK */ + +#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK +void do_softirq_own_stack(void) +{ +#ifdef CONFIG_IRQ_STACKS + if (on_thread_stack()) { + ulong *sp = per_cpu(irq_stack_ptr, smp_processor_id()) + + IRQ_STACK_SIZE/sizeof(ulong); + __asm__ __volatile( + "addi sp, sp, -"RISCV_SZPTR "\n" + REG_S" ra, (sp) \n" + "addi sp, sp, -"RISCV_SZPTR "\n" + REG_S" s0, (sp) \n" + "addi s0, sp, 2*"RISCV_SZPTR "\n" + "move sp, %[sp] \n" + "call __do_softirq \n" + "addi sp, s0, -2*"RISCV_SZPTR"\n" + REG_L" s0, (sp) \n" + "addi sp, sp, "RISCV_SZPTR "\n" + REG_L" ra, (sp) \n" + "addi sp, sp, "RISCV_SZPTR "\n" + : + : [sp] "r" (sp) + : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", + "t0", "t1", "t2", "t3", "t4", "t5", "t6", + "memory"); + } else +#endif + __do_softirq(); +} +#endif /* CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK */ + #else static void init_irq_stacks(void) {} #endif /* CONFIG_IRQ_STACKS */ From a7555f6b62e7f5b3a3b783cc6d4c4dafcb8527c8 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 13 Jun 2023 21:30:18 -0400 Subject: [PATCH 75/78] riscv: stack: Add config of thread stack size The commit 0cac21b02ba5 ("riscv: use 16KB kernel stack on 64-bit") increases the thread size mandatory, but some scenarios, such as D1 with a small memory footprint, would suffer from that. After independent irq stack support, let's give users a choice to determine their custom stack size. Link: https://lore.kernel.org/linux-riscv/5f6e6c39-b846-4392-b468-02202404de28@www.fastmail.com/ Suggested-by: Arnd Bergmann Tested-by: Jisheng Zhang Signed-off-by: Guo Ren Signed-off-by: Guo Ren Link: https://lore.kernel.org/r/20230614013018.2168426-4-guoren@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 10 ++++++++++ arch/riscv/include/asm/thread_info.h | 12 +----------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index f515cb101c19..0599bba13654 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -599,6 +599,16 @@ config IRQ_STACKS Add independent irq & softirq stacks for percpu to prevent kernel stack overflows. We may save some memory footprint by disabling IRQ_STACKS. +config THREAD_SIZE_ORDER + int "Kernel stack size (in power-of-two numbers of page size)" if VMAP_STACK && EXPERT + range 0 4 + default 1 if 32BIT && !KASAN + default 3 if 64BIT && KASAN + default 2 + help + Specify the Pages of thread stack size (from 4KB to 64KB), which also + affects irq stack size, which is equal to thread stack size. + endmenu # "Platform type" menu "Kernel features" diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 2f32875276b0..1833beb00489 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -11,18 +11,8 @@ #include #include -#ifdef CONFIG_KASAN -#define KASAN_STACK_ORDER 1 -#else -#define KASAN_STACK_ORDER 0 -#endif - /* thread information allocation */ -#ifdef CONFIG_64BIT -#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER) -#else -#define THREAD_SIZE_ORDER (1 + KASAN_STACK_ORDER) -#endif +#define THREAD_SIZE_ORDER CONFIG_THREAD_SIZE_ORDER #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) /* From 3c1b4758a9544cbaf38d052ad66a69618e920ceb Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 15 Jun 2023 23:50:14 +0100 Subject: [PATCH 76/78] dt-bindings: riscv: cpus: add a ref the common cpu schema To permit validation of RISC-V cpu nodes, "additionalProperties: true" needs to be swapped for "unevaluatedProperties: false". To facilitate this in a way that passes dt_binding_check, a reference to the cpu schema is required. Disallow the generic cache-op-block-size property that that drags in, since the RISC-V CBO extensions do not require a common size, and have individual properties. Signed-off-by: Conor Dooley Reviewed-by: Rob Herring Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230615-dubiously-parasail-79d34cefedce@spud Signed-off-by: Palmer Dabbelt --- Documentation/devicetree/bindings/riscv/cpus.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index 3d2934b15e80..e89a10d9c06b 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -23,6 +23,9 @@ description: | two cores, each of which has two hyperthreads, could be described as having four harts. +allOf: + - $ref: /schemas/cpu.yaml# + properties: compatible: oneOf: @@ -98,6 +101,9 @@ properties: $ref: "/schemas/types.yaml#/definitions/string" pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[a-z])+)?(?:_[hsxz](?:[a-z])+)*$ + # RISC-V has multiple properties for cache op block sizes as the sizes + # differ between individual CBO extensions + cache-op-block-size: false # RISC-V requires 'timebase-frequency' in /cpus, so disallow it here timebase-frequency: false From 1ffe6ddc5c64f88b1ec2e250327defb5446a7904 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 15 Jun 2023 23:50:15 +0100 Subject: [PATCH 77/78] dt-bindings: riscv: cpus: switch to unevaluatedProperties: false To permit validation of cpu nodes, swap "additionalProperties: true" out for "unevaluatedProperties: false". Signed-off-by: Conor Dooley Reviewed-by: Rob Herring Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230615-viper-stoic-1ff8efd7d51d@spud Signed-off-by: Palmer Dabbelt --- Documentation/devicetree/bindings/riscv/cpus.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index e89a10d9c06b..144da86718c1 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -143,7 +143,7 @@ required: - riscv,isa - interrupt-controller -additionalProperties: true +unevaluatedProperties: false examples: - | From 91afbaafd6b1f1846520efd2b158066a25a1a316 Mon Sep 17 00:00:00 2001 From: Song Shuai Date: Fri, 9 Jun 2023 15:50:48 +0800 Subject: [PATCH 78/78] riscv: hibernate: remove WARN_ON in save_processor_state During hibernation or restoration, freeze_secondary_cpus checks num_online_cpus via BUG_ON, and the subsequent save_processor_state also does the checking with WARN_ON. In the case of CONFIG_PM_SLEEP_SMP=n, freeze_secondary_cpus is not defined, but the sole possible condition to disable CONFIG_PM_SLEEP_SMP is !SMP where num_online_cpus is always 1. We also don't have to check it in save_processor_state. So remove the unnecessary checking in save_processor_state. Fixes: c0317210012e ("RISC-V: Add arch functions to support hibernation/suspend-to-disk") Signed-off-by: Song Shuai Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230609075049.2651723-4-songshuaishuai@tinylab.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/hibernate.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/kernel/hibernate.c b/arch/riscv/kernel/hibernate.c index 264b2dcdd67e..671b686c0158 100644 --- a/arch/riscv/kernel/hibernate.c +++ b/arch/riscv/kernel/hibernate.c @@ -80,7 +80,6 @@ int pfn_is_nosave(unsigned long pfn) void notrace save_processor_state(void) { - WARN_ON(num_online_cpus() != 1); } void notrace restore_processor_state(void)