powerpc/fadump: add support to preserve crash data on FADUMP disabled kernel
Add a new kernel config option, CONFIG_PRESERVE_FA_DUMP that ensures that crash data, from previously crash'ed kernel, is preserved. This helps in cases where FADump is not enabled but the subsequent memory preserving kernel boot is likely to process this crash data. One typical usecase for this config option is petitboot kernel. As OPAL allows registering address with it in the first kernel and retrieving it after MPIPL, use it to store the top of boot memory. A kernel that intends to preserve crash data retrieves it and avoids using memory beyond this address. Move arch_reserved_kernel_pages() function as it is needed for both FA_DUMP and PRESERVE_FA_DUMP configurations. Signed-off-by: Hari Bathini <hbathini@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/156821375751.5656.11459483669542541602.stgit@hbathini.in.ibm.com
This commit is contained in:
Родитель
b2a815a554
Коммит
bec53196ad
|
@ -583,6 +583,15 @@ config FA_DUMP
|
||||||
If unsure, say "y". Only special kernels like petitboot may
|
If unsure, say "y". Only special kernels like petitboot may
|
||||||
need to say "N" here.
|
need to say "N" here.
|
||||||
|
|
||||||
|
config PRESERVE_FA_DUMP
|
||||||
|
bool "Preserve Firmware-assisted dump"
|
||||||
|
depends on PPC64 && PPC_POWERNV && !FA_DUMP
|
||||||
|
help
|
||||||
|
On a kernel with FA_DUMP disabled, this option helps to preserve
|
||||||
|
crash data from a previously crash'ed kernel. Useful when the next
|
||||||
|
memory preserving kernel boot would process this crash data.
|
||||||
|
Petitboot kernel is the typical usecase for this option.
|
||||||
|
|
||||||
config IRQ_ALL_CPUS
|
config IRQ_ALL_CPUS
|
||||||
bool "Distribute interrupts on all CPUs by default"
|
bool "Distribute interrupts on all CPUs by default"
|
||||||
depends on SMP
|
depends on SMP
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#ifndef _ASM_POWERPC_FADUMP_INTERNAL_H
|
#ifndef _ASM_POWERPC_FADUMP_INTERNAL_H
|
||||||
#define _ASM_POWERPC_FADUMP_INTERNAL_H
|
#define _ASM_POWERPC_FADUMP_INTERNAL_H
|
||||||
|
|
||||||
|
#ifndef CONFIG_PRESERVE_FA_DUMP
|
||||||
/*
|
/*
|
||||||
* The RMA region will be saved for later dumping when kernel crashes.
|
* The RMA region will be saved for later dumping when kernel crashes.
|
||||||
* RMA is Real Mode Area, the first block of logical memory address owned
|
* RMA is Real Mode Area, the first block of logical memory address owned
|
||||||
|
@ -146,6 +147,16 @@ void fadump_update_elfcore_header(char *bufp);
|
||||||
bool is_fadump_boot_mem_contiguous(void);
|
bool is_fadump_boot_mem_contiguous(void);
|
||||||
bool is_fadump_reserved_mem_contiguous(void);
|
bool is_fadump_reserved_mem_contiguous(void);
|
||||||
|
|
||||||
|
#else /* !CONFIG_PRESERVE_FA_DUMP */
|
||||||
|
|
||||||
|
/* Firmware-assisted dump configuration details. */
|
||||||
|
struct fw_dump {
|
||||||
|
u64 boot_mem_top;
|
||||||
|
u64 dump_active;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* CONFIG_PRESERVE_FA_DUMP */
|
||||||
|
|
||||||
#ifdef CONFIG_PPC_PSERIES
|
#ifdef CONFIG_PPC_PSERIES
|
||||||
extern void rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node);
|
extern void rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node);
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -14,9 +14,6 @@
|
||||||
extern int crashing_cpu;
|
extern int crashing_cpu;
|
||||||
|
|
||||||
extern int is_fadump_memory_area(u64 addr, ulong size);
|
extern int is_fadump_memory_area(u64 addr, ulong size);
|
||||||
extern int early_init_dt_scan_fw_dump(unsigned long node,
|
|
||||||
const char *uname, int depth, void *data);
|
|
||||||
extern int fadump_reserve_mem(void);
|
|
||||||
extern int setup_fadump(void);
|
extern int setup_fadump(void);
|
||||||
extern int is_fadump_active(void);
|
extern int is_fadump_active(void);
|
||||||
extern int should_fadump_crash(void);
|
extern int should_fadump_crash(void);
|
||||||
|
@ -29,4 +26,10 @@ static inline int should_fadump_crash(void) { return 0; }
|
||||||
static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
|
static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
|
||||||
static inline void fadump_cleanup(void) { }
|
static inline void fadump_cleanup(void) { }
|
||||||
#endif /* !CONFIG_FA_DUMP */
|
#endif /* !CONFIG_FA_DUMP */
|
||||||
|
|
||||||
|
#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
|
||||||
|
extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
|
||||||
|
int depth, void *data);
|
||||||
|
extern int fadump_reserve_mem(void);
|
||||||
|
#endif
|
||||||
#endif /* _ASM_POWERPC_FADUMP_H */
|
#endif /* _ASM_POWERPC_FADUMP_H */
|
||||||
|
|
|
@ -79,7 +79,9 @@ obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
|
||||||
eeh_driver.o eeh_event.o eeh_sysfs.o
|
eeh_driver.o eeh_event.o eeh_sysfs.o
|
||||||
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
|
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
|
||||||
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
|
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
|
||||||
obj-$(CONFIG_FA_DUMP) += fadump.o
|
ifneq ($(CONFIG_FA_DUMP)$(CONFIG_PRESERVE_FA_DUMP),)
|
||||||
|
obj-y += fadump.o
|
||||||
|
endif
|
||||||
ifdef CONFIG_PPC32
|
ifdef CONFIG_PPC32
|
||||||
obj-$(CONFIG_E500) += idle_e500.o
|
obj-$(CONFIG_E500) += idle_e500.o
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -36,6 +36,7 @@ static struct fw_dump fw_dump;
|
||||||
|
|
||||||
static void __init fadump_reserve_crash_area(u64 base);
|
static void __init fadump_reserve_crash_area(u64 base);
|
||||||
|
|
||||||
|
#ifndef CONFIG_PRESERVE_FA_DUMP
|
||||||
static DEFINE_MUTEX(fadump_mutex);
|
static DEFINE_MUTEX(fadump_mutex);
|
||||||
struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 };
|
struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 };
|
||||||
struct fadump_mrange_info reserved_mrange_info = { "reserved", NULL, 0, 0, 0 };
|
struct fadump_mrange_info reserved_mrange_info = { "reserved", NULL, 0, 0, 0 };
|
||||||
|
@ -439,11 +440,6 @@ error_out:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long __init arch_reserved_kernel_pages(void)
|
|
||||||
{
|
|
||||||
return memblock_reserved_size() / PAGE_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Look for fadump= cmdline option. */
|
/* Look for fadump= cmdline option. */
|
||||||
static int __init early_fadump_param(char *p)
|
static int __init early_fadump_param(char *p)
|
||||||
{
|
{
|
||||||
|
@ -1358,6 +1354,39 @@ int __init setup_fadump(void)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
subsys_initcall(setup_fadump);
|
subsys_initcall(setup_fadump);
|
||||||
|
#else /* !CONFIG_PRESERVE_FA_DUMP */
|
||||||
|
|
||||||
|
/* Scan the Firmware Assisted dump configuration details. */
|
||||||
|
int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
|
||||||
|
int depth, void *data)
|
||||||
|
{
|
||||||
|
if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
opal_fadump_dt_scan(&fw_dump, node);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
|
||||||
|
* preserve crash data. The subsequent memory preserving kernel boot
|
||||||
|
* is likely to process this crash data.
|
||||||
|
*/
|
||||||
|
int __init fadump_reserve_mem(void)
|
||||||
|
{
|
||||||
|
if (fw_dump.dump_active) {
|
||||||
|
/*
|
||||||
|
* If last boot has crashed then reserve all the memory
|
||||||
|
* above boot memory to preserve crash data.
|
||||||
|
*/
|
||||||
|
pr_info("Preserving crash data for processing in next boot.\n");
|
||||||
|
fadump_reserve_crash_area(fw_dump.boot_mem_top);
|
||||||
|
} else
|
||||||
|
pr_debug("FADump-aware kernel..\n");
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_PRESERVE_FA_DUMP */
|
||||||
|
|
||||||
/* Preserve everything above the base address */
|
/* Preserve everything above the base address */
|
||||||
static void __init fadump_reserve_crash_area(u64 base)
|
static void __init fadump_reserve_crash_area(u64 base)
|
||||||
|
@ -1382,3 +1411,8 @@ static void __init fadump_reserve_crash_area(u64 base)
|
||||||
memblock_reserve(mstart, msize);
|
memblock_reserve(mstart, msize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned long __init arch_reserved_kernel_pages(void)
|
||||||
|
{
|
||||||
|
return memblock_reserved_size() / PAGE_SIZE;
|
||||||
|
}
|
||||||
|
|
|
@ -708,7 +708,7 @@ void __init early_init_devtree(void *params)
|
||||||
of_scan_flat_dt(early_init_dt_scan_ultravisor, NULL);
|
of_scan_flat_dt(early_init_dt_scan_ultravisor, NULL);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_FA_DUMP
|
#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
|
||||||
/* scan tree to see if dump is active during last boot */
|
/* scan tree to see if dump is active during last boot */
|
||||||
of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
|
of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
|
||||||
#endif
|
#endif
|
||||||
|
@ -735,7 +735,7 @@ void __init early_init_devtree(void *params)
|
||||||
if (PHYSICAL_START > MEMORY_START)
|
if (PHYSICAL_START > MEMORY_START)
|
||||||
memblock_reserve(MEMORY_START, 0x8000);
|
memblock_reserve(MEMORY_START, 0x8000);
|
||||||
reserve_kdump_trampoline();
|
reserve_kdump_trampoline();
|
||||||
#ifdef CONFIG_FA_DUMP
|
#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
|
||||||
/*
|
/*
|
||||||
* If we fail to reserve memory for firmware-assisted dump then
|
* If we fail to reserve memory for firmware-assisted dump then
|
||||||
* fallback to kexec based kdump.
|
* fallback to kexec based kdump.
|
||||||
|
|
|
@ -8,6 +8,7 @@ obj-y += ultravisor.o
|
||||||
|
|
||||||
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
|
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
|
||||||
obj-$(CONFIG_FA_DUMP) += opal-fadump.o
|
obj-$(CONFIG_FA_DUMP) += opal-fadump.o
|
||||||
|
obj-$(CONFIG_PRESERVE_FA_DUMP) += opal-fadump.o
|
||||||
obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o
|
obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o
|
||||||
obj-$(CONFIG_CXL_BASE) += pci-cxl.o
|
obj-$(CONFIG_CXL_BASE) += pci-cxl.o
|
||||||
obj-$(CONFIG_EEH) += eeh-powernv.o
|
obj-$(CONFIG_EEH) += eeh-powernv.o
|
||||||
|
|
|
@ -20,6 +20,67 @@
|
||||||
|
|
||||||
#include "opal-fadump.h"
|
#include "opal-fadump.h"
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef CONFIG_PRESERVE_FA_DUMP
|
||||||
|
/*
|
||||||
|
* When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
|
||||||
|
* ensure crash data is preserved in hope that the subsequent memory
|
||||||
|
* preserving kernel boot is going to process this crash data.
|
||||||
|
*/
|
||||||
|
void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
|
||||||
|
{
|
||||||
|
const struct opal_fadump_mem_struct *opal_fdm_active;
|
||||||
|
const __be32 *prop;
|
||||||
|
unsigned long dn;
|
||||||
|
u64 addr = 0;
|
||||||
|
s64 ret;
|
||||||
|
|
||||||
|
dn = of_get_flat_dt_subnode_by_name(node, "dump");
|
||||||
|
if (dn == -FDT_ERR_NOTFOUND)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if dump has been initiated on last reboot.
|
||||||
|
*/
|
||||||
|
prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL);
|
||||||
|
if (!prop)
|
||||||
|
return;
|
||||||
|
|
||||||
|
ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr);
|
||||||
|
if ((ret != OPAL_SUCCESS) || !addr) {
|
||||||
|
pr_debug("Could not get Kernel metadata (%lld)\n", ret);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Preserve memory only if kernel memory regions are registered
|
||||||
|
* with f/w for MPIPL.
|
||||||
|
*/
|
||||||
|
addr = be64_to_cpu(addr);
|
||||||
|
pr_debug("Kernel metadata addr: %llx\n", addr);
|
||||||
|
opal_fdm_active = (void *)addr;
|
||||||
|
if (opal_fdm_active->registered_regions == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr);
|
||||||
|
if ((ret != OPAL_SUCCESS) || !addr) {
|
||||||
|
pr_err("Failed to get boot memory tag (%lld)\n", ret);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Memory below this address can be used for booting a
|
||||||
|
* capture kernel or petitboot kernel. Preserve everything
|
||||||
|
* above this address for processing crashdump.
|
||||||
|
*/
|
||||||
|
fadump_conf->boot_mem_top = be64_to_cpu(addr);
|
||||||
|
pr_debug("Preserve everything above %llx\n", fadump_conf->boot_mem_top);
|
||||||
|
|
||||||
|
pr_info("Firmware-assisted dump is active.\n");
|
||||||
|
fadump_conf->dump_active = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* CONFIG_PRESERVE_FA_DUMP */
|
||||||
static const struct opal_fadump_mem_struct *opal_fdm_active;
|
static const struct opal_fadump_mem_struct *opal_fdm_active;
|
||||||
static const struct opal_mpipl_fadump *opal_cpu_metadata;
|
static const struct opal_mpipl_fadump *opal_cpu_metadata;
|
||||||
static struct opal_fadump_mem_struct *opal_fdm;
|
static struct opal_fadump_mem_struct *opal_fdm;
|
||||||
|
@ -183,6 +244,17 @@ static int opal_fadump_setup_metadata(struct fw_dump *fadump_conf)
|
||||||
err = -EPERM;
|
err = -EPERM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Register boot memory top address with f/w. Should be retrieved
|
||||||
|
* by a kernel that intends to preserve crash'ed kernel's memory.
|
||||||
|
*/
|
||||||
|
ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_BOOT_MEM,
|
||||||
|
fadump_conf->boot_memory_size);
|
||||||
|
if (ret != OPAL_SUCCESS) {
|
||||||
|
pr_err("Failed to set boot memory tag!\n");
|
||||||
|
err = -EPERM;
|
||||||
|
}
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -649,3 +721,4 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
|
||||||
fadump_conf->dump_active = 1;
|
fadump_conf->dump_active = 1;
|
||||||
opal_fadump_get_config(fadump_conf, opal_fdm_active);
|
opal_fadump_get_config(fadump_conf, opal_fdm_active);
|
||||||
}
|
}
|
||||||
|
#endif /* !CONFIG_PRESERVE_FA_DUMP */
|
||||||
|
|
Загрузка…
Ссылка в новой задаче