diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 375ff893fa60..62c483133953 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -583,6 +583,15 @@ config FA_DUMP If unsure, say "y". Only special kernels like petitboot may need to say "N" here. +config PRESERVE_FA_DUMP + bool "Preserve Firmware-assisted dump" + depends on PPC64 && PPC_POWERNV && !FA_DUMP + help + On a kernel with FA_DUMP disabled, this option helps to preserve + crash data from a previously crash'ed kernel. Useful when the next + memory preserving kernel boot would process this crash data. + Petitboot kernel is the typical usecase for this option. + config IRQ_ALL_CPUS bool "Distribute interrupts on all CPUs by default" depends on SMP diff --git a/arch/powerpc/include/asm/fadump-internal.h b/arch/powerpc/include/asm/fadump-internal.h index 0f2ce85ee7b9..6c2868d38bd3 100644 --- a/arch/powerpc/include/asm/fadump-internal.h +++ b/arch/powerpc/include/asm/fadump-internal.h @@ -9,6 +9,7 @@ #ifndef _ASM_POWERPC_FADUMP_INTERNAL_H #define _ASM_POWERPC_FADUMP_INTERNAL_H +#ifndef CONFIG_PRESERVE_FA_DUMP /* * The RMA region will be saved for later dumping when kernel crashes. * RMA is Real Mode Area, the first block of logical memory address owned @@ -146,6 +147,16 @@ void fadump_update_elfcore_header(char *bufp); bool is_fadump_boot_mem_contiguous(void); bool is_fadump_reserved_mem_contiguous(void); +#else /* !CONFIG_PRESERVE_FA_DUMP */ + +/* Firmware-assisted dump configuration details. */ +struct fw_dump { + u64 boot_mem_top; + u64 dump_active; +}; + +#endif /* CONFIG_PRESERVE_FA_DUMP */ + #ifdef CONFIG_PPC_PSERIES extern void rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node); #else diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index fd5002b4c18c..526a6a647312 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -14,9 +14,6 @@ extern int crashing_cpu; extern int is_fadump_memory_area(u64 addr, ulong size); -extern int early_init_dt_scan_fw_dump(unsigned long node, - const char *uname, int depth, void *data); -extern int fadump_reserve_mem(void); extern int setup_fadump(void); extern int is_fadump_active(void); extern int should_fadump_crash(void); @@ -29,4 +26,10 @@ static inline int should_fadump_crash(void) { return 0; } static inline void crash_fadump(struct pt_regs *regs, const char *str) { } static inline void fadump_cleanup(void) { } #endif /* !CONFIG_FA_DUMP */ + +#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) +extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname, + int depth, void *data); +extern int fadump_reserve_mem(void); +#endif #endif /* _ASM_POWERPC_FADUMP_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 66c54443187d..21ab769e8530 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -79,7 +79,9 @@ obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o -obj-$(CONFIG_FA_DUMP) += fadump.o +ifneq ($(CONFIG_FA_DUMP)$(CONFIG_PRESERVE_FA_DUMP),) +obj-y += fadump.o +endif ifdef CONFIG_PPC32 obj-$(CONFIG_E500) += idle_e500.o endif diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 6f52a60bc212..645d9d4d9332 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -36,6 +36,7 @@ static struct fw_dump fw_dump; static void __init fadump_reserve_crash_area(u64 base); +#ifndef CONFIG_PRESERVE_FA_DUMP static DEFINE_MUTEX(fadump_mutex); struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 }; struct fadump_mrange_info reserved_mrange_info = { "reserved", NULL, 0, 0, 0 }; @@ -439,11 +440,6 @@ error_out: return 0; } -unsigned long __init arch_reserved_kernel_pages(void) -{ - return memblock_reserved_size() / PAGE_SIZE; -} - /* Look for fadump= cmdline option. */ static int __init early_fadump_param(char *p) { @@ -1358,6 +1354,39 @@ int __init setup_fadump(void) return 1; } subsys_initcall(setup_fadump); +#else /* !CONFIG_PRESERVE_FA_DUMP */ + +/* Scan the Firmware Assisted dump configuration details. */ +int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, + int depth, void *data) +{ + if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0)) + return 0; + + opal_fadump_dt_scan(&fw_dump, node); + return 1; +} + +/* + * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel, + * preserve crash data. The subsequent memory preserving kernel boot + * is likely to process this crash data. + */ +int __init fadump_reserve_mem(void) +{ + if (fw_dump.dump_active) { + /* + * If last boot has crashed then reserve all the memory + * above boot memory to preserve crash data. + */ + pr_info("Preserving crash data for processing in next boot.\n"); + fadump_reserve_crash_area(fw_dump.boot_mem_top); + } else + pr_debug("FADump-aware kernel..\n"); + + return 1; +} +#endif /* CONFIG_PRESERVE_FA_DUMP */ /* Preserve everything above the base address */ static void __init fadump_reserve_crash_area(u64 base) @@ -1382,3 +1411,8 @@ static void __init fadump_reserve_crash_area(u64 base) memblock_reserve(mstart, msize); } } + +unsigned long __init arch_reserved_kernel_pages(void) +{ + return memblock_reserved_size() / PAGE_SIZE; +} diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 5828f1c81dc9..6620f37abe73 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -708,7 +708,7 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_ultravisor, NULL); #endif -#ifdef CONFIG_FA_DUMP +#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) /* scan tree to see if dump is active during last boot */ of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL); #endif @@ -735,7 +735,7 @@ void __init early_init_devtree(void *params) if (PHYSICAL_START > MEMORY_START) memblock_reserve(MEMORY_START, 0x8000); reserve_kdump_trampoline(); -#ifdef CONFIG_FA_DUMP +#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) /* * If we fail to reserve memory for firmware-assisted dump then * fallback to kexec based kdump. diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 3e6d6d6d1ec1..2d13e6462339 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -8,6 +8,7 @@ obj-y += ultravisor.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_FA_DUMP) += opal-fadump.o +obj-$(CONFIG_PRESERVE_FA_DUMP) += opal-fadump.o obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o obj-$(CONFIG_CXL_BASE) += pci-cxl.o obj-$(CONFIG_EEH) += eeh-powernv.o diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c index ad4c18bd8809..c66fa96915ae 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.c +++ b/arch/powerpc/platforms/powernv/opal-fadump.c @@ -20,6 +20,67 @@ #include "opal-fadump.h" + +#ifdef CONFIG_PRESERVE_FA_DUMP +/* + * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel, + * ensure crash data is preserved in hope that the subsequent memory + * preserving kernel boot is going to process this crash data. + */ +void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) +{ + const struct opal_fadump_mem_struct *opal_fdm_active; + const __be32 *prop; + unsigned long dn; + u64 addr = 0; + s64 ret; + + dn = of_get_flat_dt_subnode_by_name(node, "dump"); + if (dn == -FDT_ERR_NOTFOUND) + return; + + /* + * Check if dump has been initiated on last reboot. + */ + prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL); + if (!prop) + return; + + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr); + if ((ret != OPAL_SUCCESS) || !addr) { + pr_debug("Could not get Kernel metadata (%lld)\n", ret); + return; + } + + /* + * Preserve memory only if kernel memory regions are registered + * with f/w for MPIPL. + */ + addr = be64_to_cpu(addr); + pr_debug("Kernel metadata addr: %llx\n", addr); + opal_fdm_active = (void *)addr; + if (opal_fdm_active->registered_regions == 0) + return; + + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr); + if ((ret != OPAL_SUCCESS) || !addr) { + pr_err("Failed to get boot memory tag (%lld)\n", ret); + return; + } + + /* + * Memory below this address can be used for booting a + * capture kernel or petitboot kernel. Preserve everything + * above this address for processing crashdump. + */ + fadump_conf->boot_mem_top = be64_to_cpu(addr); + pr_debug("Preserve everything above %llx\n", fadump_conf->boot_mem_top); + + pr_info("Firmware-assisted dump is active.\n"); + fadump_conf->dump_active = 1; +} + +#else /* CONFIG_PRESERVE_FA_DUMP */ static const struct opal_fadump_mem_struct *opal_fdm_active; static const struct opal_mpipl_fadump *opal_cpu_metadata; static struct opal_fadump_mem_struct *opal_fdm; @@ -183,6 +244,17 @@ static int opal_fadump_setup_metadata(struct fw_dump *fadump_conf) err = -EPERM; } + /* + * Register boot memory top address with f/w. Should be retrieved + * by a kernel that intends to preserve crash'ed kernel's memory. + */ + ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_BOOT_MEM, + fadump_conf->boot_memory_size); + if (ret != OPAL_SUCCESS) { + pr_err("Failed to set boot memory tag!\n"); + err = -EPERM; + } + return err; } @@ -649,3 +721,4 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) fadump_conf->dump_active = 1; opal_fadump_get_config(fadump_conf, opal_fdm_active); } +#endif /* !CONFIG_PRESERVE_FA_DUMP */