From 4af4206be2bd1933cae20c2b6fb2058dbc887f7c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Apr 2014 20:58:54 +0200 Subject: [PATCH 01/11] tracing: Fix syscall_*regfunc() vs copy_process() race syscall_regfunc() and syscall_unregfunc() should set/clear TIF_SYSCALL_TRACEPOINT system-wide, but do_each_thread() can race with copy_process() and miss the new child which was not added to the process/thread lists yet. Change copy_process() to update the child's TIF_SYSCALL_TRACEPOINT under tasklist. Link: http://lkml.kernel.org/p/20140413185854.GB20668@redhat.com Cc: stable@vger.kernel.org # 2.6.33 Fixes: a871bd33a6c0 "tracing: Add syscall tracepoints" Acked-by: Frederic Weisbecker Acked-by: Paul E. McKenney Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- include/trace/syscall.h | 15 +++++++++++++++ kernel/fork.c | 2 ++ 2 files changed, 17 insertions(+) diff --git a/include/trace/syscall.h b/include/trace/syscall.h index fed853f3d7aa..9674145e2f6a 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -32,4 +33,18 @@ struct syscall_metadata { struct ftrace_event_call *exit_event; }; +#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_HAVE_SYSCALL_TRACEPOINTS) +static inline void syscall_tracepoint_update(struct task_struct *p) +{ + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + set_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT); + else + clear_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT); +} +#else +static inline void syscall_tracepoint_update(struct task_struct *p) +{ +} +#endif + #endif /* _TRACE_SYSCALL_H */ diff --git a/kernel/fork.c b/kernel/fork.c index d2799d1fc952..6a13c46cd87d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1487,7 +1487,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); + syscall_tracepoint_update(p); write_unlock_irq(&tasklist_lock); + proc_fork_connector(p); cgroup_post_fork(p); if (clone_flags & CLONE_THREAD) From 8063e41d2ffc0b0ce974ea802158be35902072f3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Apr 2014 20:59:18 +0200 Subject: [PATCH 02/11] tracing: Change syscall_*regfunc() to check PF_KTHREAD and use for_each_process_thread() 1. Remove _irqsafe from syscall_regfunc/syscall_unregfunc, read_lock(tasklist) doesn't need to disable irqs. 2. Change this code to avoid the deprecated do_each_thread() and use for_each_process_thread() (stolen from the patch from Frederic). 3. Change syscall_regfunc() to check PF_KTHREAD to skip the kernel threads, ->mm != NULL is the common mistake. Note: probably this check should be simply removed, needs another patch. [fweisbec@gmail.com: s/do_each_thread/for_each_process_thread/] Link: http://lkml.kernel.org/p/20140413185918.GC20668@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/tracepoint.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 33cbd8c203f8..9cf12640de5a 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -492,33 +492,31 @@ static int sys_tracepoint_refcount; void syscall_regfunc(void) { - unsigned long flags; - struct task_struct *g, *t; + struct task_struct *p, *t; if (!sys_tracepoint_refcount) { - read_lock_irqsave(&tasklist_lock, flags); - do_each_thread(g, t) { + read_lock(&tasklist_lock); + for_each_process_thread(p, t) { /* Skip kernel threads. */ - if (t->mm) + if (!(t->flags & PF_KTHREAD)) set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); - } while_each_thread(g, t); - read_unlock_irqrestore(&tasklist_lock, flags); + } + read_unlock(&tasklist_lock); } sys_tracepoint_refcount++; } void syscall_unregfunc(void) { - unsigned long flags; - struct task_struct *g, *t; + struct task_struct *p, *t; sys_tracepoint_refcount--; if (!sys_tracepoint_refcount) { - read_lock_irqsave(&tasklist_lock, flags); - do_each_thread(g, t) { + read_lock(&tasklist_lock); + for_each_process_thread(p, t) { clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); - } while_each_thread(g, t); - read_unlock_irqrestore(&tasklist_lock, flags); + } + read_unlock(&tasklist_lock); } } #endif From ea73c79e33c45e1fa0071e216f06fd5682314490 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Apr 2014 20:59:38 +0200 Subject: [PATCH 03/11] tracing: syscall_regfunc() should not skip kernel threads syscall_regfunc() ignores the kernel threads because "it has no effect", see cc3b13c1 "Don't trace kernel thread syscalls" which added this check. However, this means that a user-space task spawned by call_usermodehelper() will run without TIF_SYSCALL_TRACEPOINT if sys_tracepoint_refcount != 0. Remove this check. The unnecessary report from ret_from_fork path mentioned by cc3b13c1 is no longer possible, see See commit fb45550d76bb5 "make sure that kernel_thread() callbacks call do_exit() themselves". A kernel_thread() callback can only return and take the int_ret_from_sys_call path after do_execve() succeeds, otherwise the kernel will crash. But in this case it is no longer a kernel thread and thus is needs TIF_SYSCALL_TRACEPOINT. Link: http://lkml.kernel.org/p/20140413185938.GD20668@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/tracepoint.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 9cf12640de5a..3490407dc7b7 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -497,9 +497,7 @@ void syscall_regfunc(void) if (!sys_tracepoint_refcount) { read_lock(&tasklist_lock); for_each_process_thread(p, t) { - /* Skip kernel threads. */ - if (!(t->flags & PF_KTHREAD)) - set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); + set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); } read_unlock(&tasklist_lock); } From 4d4c9cc839a308be3289a361ccba4447ee140552 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 17 Jun 2014 08:59:16 -0400 Subject: [PATCH 04/11] tracing: Add __field_struct macro for TRACE_EVENT() Currently the __field() macro in TRACE_EVENT is only good for primitive values, such as integers and pointers, but it fails on complex data types such as structures or unions. This is because the __field() macro determines if the variable is signed or not with the test of: (((type)(-1)) < (type)1) Unfortunately, that fails when type is a structure. Since trace events should support structures as fields a new macro is created for such a case called __field_struct() which acts exactly the same as __field() does but it does not do the signed type check and just uses a constant false for that answer. Cc: Tony Luck Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 33 ++++++++++++++++++++++ samples/trace_events/trace-events-sample.h | 3 +- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 0fd06fef9fac..26b4f2e13275 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -44,6 +44,12 @@ #undef __field_ext #define __field_ext(type, item, filter_type) type item; +#undef __field_struct +#define __field_struct(type, item) type item; + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) type item; + #undef __array #define __array(type, item, len) type item[len]; @@ -122,6 +128,12 @@ #undef __field_ext #define __field_ext(type, item, filter_type) +#undef __field_struct +#define __field_struct(type, item) + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) + #undef __array #define __array(type, item, len) @@ -315,9 +327,21 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \ if (ret) \ return ret; +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item), \ + 0, filter_type); \ + if (ret) \ + return ret; + #undef __field #define __field(type, item) __field_ext(type, item, FILTER_OTHER) +#undef __field_struct +#define __field_struct(type, item) __field_struct_ext(type, item, FILTER_OTHER) + #undef __array #define __array(type, item, len) \ do { \ @@ -379,6 +403,12 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #undef __field_ext #define __field_ext(type, item, filter_type) +#undef __field_struct +#define __field_struct(type, item) + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) + #undef __array #define __array(type, item, len) @@ -550,6 +580,9 @@ static inline notrace int ftrace_get_offsets_##call( \ #undef __field #define __field(type, item) +#undef __field_struct +#define __field_struct(type, item) + #undef __array #define __array(type, item, len) diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index 6af373236d73..4b0113f73ee9 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -56,7 +56,8 @@ * struct: This defines the way the data will be stored in the ring buffer. * There are currently two types of elements. __field and __array. * a __field is broken up into (type, name). Where type can be any - * type but an array. + * primitive type (integer, long or pointer). __field_struct() can + * be any static complex data value (struct, union, but not an array). * For an array. there are three fields. (type, name, size). The * type of elements in the array, the name of the field and the size * of the array. From 76ac8275f296b49c58f684825543bf4eb85d43d0 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 11 Jun 2014 13:54:04 -0700 Subject: [PATCH 05/11] trace, RAS: Add basic RAS trace event To avoid confuision and conflict of usage for RAS related trace event, add an unified RAS trace event stub. Start a RAS subsystem menu which will be fleshed out in time, when more features get added to it. Signed-off-by: Chen, Gong Link: http://lkml.kernel.org/r/1402475691-30045-2-git-send-email-gong.chen@linux.intel.com Signed-off-by: Borislav Petkov Signed-off-by: Tony Luck --- drivers/Kconfig | 2 ++ drivers/Makefile | 1 + drivers/edac/Kconfig | 1 + drivers/edac/edac_mc.c | 3 --- drivers/ras/Kconfig | 2 ++ drivers/ras/Makefile | 1 + drivers/ras/ras.c | 12 ++++++++++++ 7 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 drivers/ras/Kconfig create mode 100644 drivers/ras/Makefile create mode 100644 drivers/ras/ras.c diff --git a/drivers/Kconfig b/drivers/Kconfig index 0e87a34b6472..4e6e66c3c8d6 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -176,4 +176,6 @@ source "drivers/powercap/Kconfig" source "drivers/mcb/Kconfig" +source "drivers/ras/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index f98b50d8251d..65c32b1cea3d 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -158,3 +158,4 @@ obj-$(CONFIG_NTB) += ntb/ obj-$(CONFIG_FMC) += fmc/ obj-$(CONFIG_POWERCAP) += powercap/ obj-$(CONFIG_MCB) += mcb/ +obj-$(CONFIG_RAS) += ras/ diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 878f09005fad..d3c0465ba456 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -72,6 +72,7 @@ config EDAC_MCE_INJ config EDAC_MM_EDAC tristate "Main Memory EDAC (Error Detection And Correction) reporting" + select RAS help Some systems are able to detect and correct errors in main memory. EDAC can report statistics on memory error diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 2c694b5297cc..9f134823fa75 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -33,9 +33,6 @@ #include #include "edac_core.h" #include "edac_module.h" - -#define CREATE_TRACE_POINTS -#define TRACE_INCLUDE_PATH ../../include/ras #include /* lock to memory controller's control array */ diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig new file mode 100644 index 000000000000..f9da613052c2 --- /dev/null +++ b/drivers/ras/Kconfig @@ -0,0 +1,2 @@ +config RAS + bool diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile new file mode 100644 index 000000000000..223e806fa5bf --- /dev/null +++ b/drivers/ras/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_RAS) += ras.o diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c new file mode 100644 index 000000000000..b0c6ed1d8e77 --- /dev/null +++ b/drivers/ras/ras.c @@ -0,0 +1,12 @@ +/* + * Copyright (C) 2014 Intel Corporation + * + * Authors: + * Chen, Gong + */ + +#define CREATE_TRACE_POINTS +#define TRACE_INCLUDE_PATH ../../include/ras +#include + +EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event); From 0a2409aad38e97b1db55e6515b990be7b17060f6 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 11 Jun 2014 13:57:27 -0700 Subject: [PATCH 06/11] trace, AER: Move trace into unified interface AER uses a separate trace interface by now. To make it consistent, move it into unified RAS trace interface. Signed-off-by: Chen, Gong Acked-by: Borislav Petkov Signed-off-by: Tony Luck --- drivers/pci/pcie/aer/Kconfig | 1 + drivers/pci/pcie/aer/aerdrv_errprint.c | 4 +- include/ras/ras_event.h | 64 +++++++++++++++++++++ include/trace/events/ras.h | 77 -------------------------- 4 files changed, 66 insertions(+), 80 deletions(-) delete mode 100644 include/trace/events/ras.h diff --git a/drivers/pci/pcie/aer/Kconfig b/drivers/pci/pcie/aer/Kconfig index 50e94e02378a..389440228c1d 100644 --- a/drivers/pci/pcie/aer/Kconfig +++ b/drivers/pci/pcie/aer/Kconfig @@ -5,6 +5,7 @@ config PCIEAER boolean "Root Port Advanced Error Reporting support" depends on PCIEPORTBUS + select RAS default y help This enables PCI Express Root Port Advanced Error Reporting diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c index 36ed31b52198..35d06e177917 100644 --- a/drivers/pci/pcie/aer/aerdrv_errprint.c +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c @@ -22,9 +22,7 @@ #include #include "aerdrv.h" - -#define CREATE_TRACE_POINTS -#include +#include #define AER_AGENT_RECEIVER 0 #define AER_AGENT_REQUESTER 1 diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 21cdb0b7b0fb..acbcbb88eaaa 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -8,6 +8,7 @@ #include #include #include +#include /* * Hardware Events Report @@ -94,6 +95,69 @@ TRACE_EVENT(mc_event, __get_str(driver_detail)) ); +/* + * PCIe AER Trace event + * + * These events are generated when hardware detects a corrected or + * uncorrected event on a PCIe device. The event report has + * the following structure: + * + * char * dev_name - The name of the slot where the device resides + * ([domain:]bus:device.function). + * u32 status - Either the correctable or uncorrectable register + * indicating what error or errors have been seen + * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED + */ + +#define aer_correctable_errors \ + {BIT(0), "Receiver Error"}, \ + {BIT(6), "Bad TLP"}, \ + {BIT(7), "Bad DLLP"}, \ + {BIT(8), "RELAY_NUM Rollover"}, \ + {BIT(12), "Replay Timer Timeout"}, \ + {BIT(13), "Advisory Non-Fatal"} + +#define aer_uncorrectable_errors \ + {BIT(4), "Data Link Protocol"}, \ + {BIT(12), "Poisoned TLP"}, \ + {BIT(13), "Flow Control Protocol"}, \ + {BIT(14), "Completion Timeout"}, \ + {BIT(15), "Completer Abort"}, \ + {BIT(16), "Unexpected Completion"}, \ + {BIT(17), "Receiver Overflow"}, \ + {BIT(18), "Malformed TLP"}, \ + {BIT(19), "ECRC"}, \ + {BIT(20), "Unsupported Request"} + +TRACE_EVENT(aer_event, + TP_PROTO(const char *dev_name, + const u32 status, + const u8 severity), + + TP_ARGS(dev_name, status, severity), + + TP_STRUCT__entry( + __string( dev_name, dev_name ) + __field( u32, status ) + __field( u8, severity ) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name); + __entry->status = status; + __entry->severity = severity; + ), + + TP_printk("%s PCIe Bus Error: severity=%s, %s\n", + __get_str(dev_name), + __entry->severity == AER_CORRECTABLE ? "Corrected" : + __entry->severity == AER_FATAL ? + "Fatal" : "Uncorrected, non-fatal", + __entry->severity == AER_CORRECTABLE ? + __print_flags(__entry->status, "|", aer_correctable_errors) : + __print_flags(__entry->status, "|", aer_uncorrectable_errors)) +); + #endif /* _TRACE_HW_EVENT_MC_H */ /* This part must be outside protection */ diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h deleted file mode 100644 index 1c875ad1ee5f..000000000000 --- a/include/trace/events/ras.h +++ /dev/null @@ -1,77 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM ras - -#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_AER_H - -#include -#include - - -/* - * PCIe AER Trace event - * - * These events are generated when hardware detects a corrected or - * uncorrected event on a PCIe device. The event report has - * the following structure: - * - * char * dev_name - The name of the slot where the device resides - * ([domain:]bus:device.function). - * u32 status - Either the correctable or uncorrectable register - * indicating what error or errors have been seen - * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED - */ - -#define aer_correctable_errors \ - {BIT(0), "Receiver Error"}, \ - {BIT(6), "Bad TLP"}, \ - {BIT(7), "Bad DLLP"}, \ - {BIT(8), "RELAY_NUM Rollover"}, \ - {BIT(12), "Replay Timer Timeout"}, \ - {BIT(13), "Advisory Non-Fatal"} - -#define aer_uncorrectable_errors \ - {BIT(4), "Data Link Protocol"}, \ - {BIT(12), "Poisoned TLP"}, \ - {BIT(13), "Flow Control Protocol"}, \ - {BIT(14), "Completion Timeout"}, \ - {BIT(15), "Completer Abort"}, \ - {BIT(16), "Unexpected Completion"}, \ - {BIT(17), "Receiver Overflow"}, \ - {BIT(18), "Malformed TLP"}, \ - {BIT(19), "ECRC"}, \ - {BIT(20), "Unsupported Request"} - -TRACE_EVENT(aer_event, - TP_PROTO(const char *dev_name, - const u32 status, - const u8 severity), - - TP_ARGS(dev_name, status, severity), - - TP_STRUCT__entry( - __string( dev_name, dev_name ) - __field( u32, status ) - __field( u8, severity ) - ), - - TP_fast_assign( - __assign_str(dev_name, dev_name); - __entry->status = status; - __entry->severity = severity; - ), - - TP_printk("%s PCIe Bus Error: severity=%s, %s\n", - __get_str(dev_name), - __entry->severity == AER_CORRECTABLE ? "Corrected" : - __entry->severity == AER_FATAL ? - "Fatal" : "Uncorrected, non-fatal", - __entry->severity == AER_CORRECTABLE ? - __print_flags(__entry->status, "|", aer_correctable_errors) : - __print_flags(__entry->status, "|", aer_uncorrectable_errors)) -); - -#endif /* _TRACE_AER_H */ - -/* This part must be outside protection */ -#include From 3760cd20402d4c131e1994c968ecb055fa0f74bc Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 11 Jun 2014 13:59:45 -0700 Subject: [PATCH 07/11] CPER: Adjust code flow of some functions Some codes can be reorganzied as a common function for other usages. Signed-off-by: Chen, Gong Signed-off-by: Tony Luck --- drivers/firmware/efi/cper.c | 161 +++++++++++++++++++++++------------- include/linux/cper.h | 9 ++ 2 files changed, 112 insertions(+), 58 deletions(-) diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 1491dd4f08f9..ac33a9fed341 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -34,6 +34,9 @@ #include #define INDENT_SP " " + +static char rcd_decode_str[CPER_REC_LEN]; + /* * CPER record ID need to be unique even after reboot, because record * ID is used as index for ERST storage, while CPER records from @@ -50,18 +53,19 @@ u64 cper_next_record_id(void) } EXPORT_SYMBOL_GPL(cper_next_record_id); -static const char *cper_severity_strs[] = { +static const char * const severity_strs[] = { "recoverable", "fatal", "corrected", "info", }; -static const char *cper_severity_str(unsigned int severity) +const char *cper_severity_str(unsigned int severity) { - return severity < ARRAY_SIZE(cper_severity_strs) ? - cper_severity_strs[severity] : "unknown"; + return severity < ARRAY_SIZE(severity_strs) ? + severity_strs[severity] : "unknown"; } +EXPORT_SYMBOL_GPL(cper_severity_str); /* * cper_print_bits - print strings for set bits @@ -100,32 +104,32 @@ void cper_print_bits(const char *pfx, unsigned int bits, printk("%s\n", buf); } -static const char * const cper_proc_type_strs[] = { +static const char * const proc_type_strs[] = { "IA32/X64", "IA64", }; -static const char * const cper_proc_isa_strs[] = { +static const char * const proc_isa_strs[] = { "IA32", "IA64", "X64", }; -static const char * const cper_proc_error_type_strs[] = { +static const char * const proc_error_type_strs[] = { "cache error", "TLB error", "bus error", "micro-architectural error", }; -static const char * const cper_proc_op_strs[] = { +static const char * const proc_op_strs[] = { "unknown or generic", "data read", "data write", "instruction execution", }; -static const char * const cper_proc_flag_strs[] = { +static const char * const proc_flag_strs[] = { "restartable", "precise IP", "overflow", @@ -137,26 +141,26 @@ static void cper_print_proc_generic(const char *pfx, { if (proc->validation_bits & CPER_PROC_VALID_TYPE) printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type, - proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ? - cper_proc_type_strs[proc->proc_type] : "unknown"); + proc->proc_type < ARRAY_SIZE(proc_type_strs) ? + proc_type_strs[proc->proc_type] : "unknown"); if (proc->validation_bits & CPER_PROC_VALID_ISA) printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa, - proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ? - cper_proc_isa_strs[proc->proc_isa] : "unknown"); + proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ? + proc_isa_strs[proc->proc_isa] : "unknown"); if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) { printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type); cper_print_bits(pfx, proc->proc_error_type, - cper_proc_error_type_strs, - ARRAY_SIZE(cper_proc_error_type_strs)); + proc_error_type_strs, + ARRAY_SIZE(proc_error_type_strs)); } if (proc->validation_bits & CPER_PROC_VALID_OPERATION) printk("%s""operation: %d, %s\n", pfx, proc->operation, - proc->operation < ARRAY_SIZE(cper_proc_op_strs) ? - cper_proc_op_strs[proc->operation] : "unknown"); + proc->operation < ARRAY_SIZE(proc_op_strs) ? + proc_op_strs[proc->operation] : "unknown"); if (proc->validation_bits & CPER_PROC_VALID_FLAGS) { printk("%s""flags: 0x%02x\n", pfx, proc->flags); - cper_print_bits(pfx, proc->flags, cper_proc_flag_strs, - ARRAY_SIZE(cper_proc_flag_strs)); + cper_print_bits(pfx, proc->flags, proc_flag_strs, + ARRAY_SIZE(proc_flag_strs)); } if (proc->validation_bits & CPER_PROC_VALID_LEVEL) printk("%s""level: %d\n", pfx, proc->level); @@ -177,7 +181,7 @@ static void cper_print_proc_generic(const char *pfx, printk("%s""IP: 0x%016llx\n", pfx, proc->ip); } -static const char *cper_mem_err_type_strs[] = { +static const char * const mem_err_type_strs[] = { "unknown", "no error", "single-bit ECC", @@ -196,6 +200,77 @@ static const char *cper_mem_err_type_strs[] = { "physical memory map-out event", }; +const char *cper_mem_err_type_str(unsigned int etype) +{ + return etype < ARRAY_SIZE(mem_err_type_strs) ? + mem_err_type_strs[etype] : "unknown"; +} +EXPORT_SYMBOL_GPL(cper_mem_err_type_str); + +static int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg) +{ + u32 len, n; + + if (!msg) + return 0; + + n = 0; + len = CPER_REC_LEN - 1; + if (mem->validation_bits & CPER_MEM_VALID_NODE) + n += scnprintf(msg + n, len - n, "node: %d ", mem->node); + if (mem->validation_bits & CPER_MEM_VALID_CARD) + n += scnprintf(msg + n, len - n, "card: %d ", mem->card); + if (mem->validation_bits & CPER_MEM_VALID_MODULE) + n += scnprintf(msg + n, len - n, "module: %d ", mem->module); + if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER) + n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank); + if (mem->validation_bits & CPER_MEM_VALID_BANK) + n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank); + if (mem->validation_bits & CPER_MEM_VALID_DEVICE) + n += scnprintf(msg + n, len - n, "device: %d ", mem->device); + if (mem->validation_bits & CPER_MEM_VALID_ROW) + n += scnprintf(msg + n, len - n, "row: %d ", mem->row); + if (mem->validation_bits & CPER_MEM_VALID_COLUMN) + n += scnprintf(msg + n, len - n, "column: %d ", mem->column); + if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION) + n += scnprintf(msg + n, len - n, "bit_position: %d ", + mem->bit_pos); + if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) + n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ", + mem->requestor_id); + if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID) + n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ", + mem->responder_id); + if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID) + scnprintf(msg + n, len - n, "target_id: 0x%016llx ", + mem->target_id); + + msg[n] = '\0'; + return n; +} + +static int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg) +{ + u32 len, n; + const char *bank = NULL, *device = NULL; + + if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE)) + return 0; + + n = 0; + len = CPER_REC_LEN - 1; + dmi_memdev_name(mem->mem_dev_handle, &bank, &device); + if (bank && device) + n = snprintf(msg, len, "DIMM location: %s %s ", bank, device); + else + n = snprintf(msg, len, + "DIMM location: not present. DMI handle: 0x%.4x ", + mem->mem_dev_handle); + + msg[n] = '\0'; + return n; +} + static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) { if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) @@ -206,48 +281,18 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) printk("%s""physical_address_mask: 0x%016llx\n", pfx, mem->physical_addr_mask); - if (mem->validation_bits & CPER_MEM_VALID_NODE) - pr_debug("node: %d\n", mem->node); - if (mem->validation_bits & CPER_MEM_VALID_CARD) - pr_debug("card: %d\n", mem->card); - if (mem->validation_bits & CPER_MEM_VALID_MODULE) - pr_debug("module: %d\n", mem->module); - if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER) - pr_debug("rank: %d\n", mem->rank); - if (mem->validation_bits & CPER_MEM_VALID_BANK) - pr_debug("bank: %d\n", mem->bank); - if (mem->validation_bits & CPER_MEM_VALID_DEVICE) - pr_debug("device: %d\n", mem->device); - if (mem->validation_bits & CPER_MEM_VALID_ROW) - pr_debug("row: %d\n", mem->row); - if (mem->validation_bits & CPER_MEM_VALID_COLUMN) - pr_debug("column: %d\n", mem->column); - if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION) - pr_debug("bit_position: %d\n", mem->bit_pos); - if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) - pr_debug("requestor_id: 0x%016llx\n", mem->requestor_id); - if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID) - pr_debug("responder_id: 0x%016llx\n", mem->responder_id); - if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID) - pr_debug("target_id: 0x%016llx\n", mem->target_id); + if (cper_mem_err_location(mem, rcd_decode_str)) + printk("%s%s\n", pfx, rcd_decode_str); if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { u8 etype = mem->error_type; printk("%s""error_type: %d, %s\n", pfx, etype, - etype < ARRAY_SIZE(cper_mem_err_type_strs) ? - cper_mem_err_type_strs[etype] : "unknown"); - } - if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { - const char *bank = NULL, *device = NULL; - dmi_memdev_name(mem->mem_dev_handle, &bank, &device); - if (bank != NULL && device != NULL) - printk("%s""DIMM location: %s %s", pfx, bank, device); - else - printk("%s""DIMM DMI handle: 0x%.4x", - pfx, mem->mem_dev_handle); + cper_mem_err_type_str(etype)); } + if (cper_dimm_err_location(mem, rcd_decode_str)) + printk("%s%s\n", pfx, rcd_decode_str); } -static const char *cper_pcie_port_type_strs[] = { +static const char * const pcie_port_type_strs[] = { "PCIe end point", "legacy PCI end point", "unknown", @@ -266,8 +311,8 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, { if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE) printk("%s""port_type: %d, %s\n", pfx, pcie->port_type, - pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ? - cper_pcie_port_type_strs[pcie->port_type] : "unknown"); + pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ? + pcie_port_type_strs[pcie->port_type] : "unknown"); if (pcie->validation_bits & CPER_PCIE_VALID_VERSION) printk("%s""version: %d.%d\n", pfx, pcie->version.major, pcie->version.minor); diff --git a/include/linux/cper.h b/include/linux/cper.h index 2fc0ec3d89cc..ed088b9c1298 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -35,6 +35,13 @@ */ #define CPER_RECORD_REV 0x0100 +/* + * CPER record length contains the CPER fields which are relevant for further + * handling of a memory error in userspace (we don't carry all the fields + * defined in the UEFI spec because some of them don't make any sense.) + * Currently, a length of 256 should be more than enough. + */ +#define CPER_REC_LEN 256 /* * Severity difinition for error_severity in struct cper_record_header * and section_severity in struct cper_section_descriptor @@ -395,6 +402,8 @@ struct cper_sec_pcie { #pragma pack() u64 cper_next_record_id(void); +const char *cper_severity_str(unsigned int); +const char *cper_mem_err_type_str(unsigned int); void cper_print_bits(const char *prefix, unsigned int bits, const char * const strs[], unsigned int strs_size); From d963cd95bea93b7db9390a71d1e2cabbb3b2c3ea Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 11 Jun 2014 14:02:20 -0700 Subject: [PATCH 08/11] RAS, debugfs: Add debugfs interface for RAS subsystem Implement a new debugfs interface for RAS susbsystem. A file named daemon_active is added there accordingly. This file is used to track if user space daemon accesses perf/trace interface or not. One can track which daemon opens it via "lsof /path/to/debugfs/ras/daemon_active". Signed-off-by: Chen, Gong Link: http://lkml.kernel.org/r/1402475691-30045-5-git-send-email-gong.chen@linux.intel.com Signed-off-by: Borislav Petkov Signed-off-by: Tony Luck --- drivers/ras/Makefile | 2 +- drivers/ras/debugfs.c | 56 +++++++++++++++++++++++++++++++++++++++++++ drivers/ras/ras.c | 14 +++++++++++ include/linux/ras.h | 14 +++++++++++ 4 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 drivers/ras/debugfs.c create mode 100644 include/linux/ras.h diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile index 223e806fa5bf..d7f73341ced3 100644 --- a/drivers/ras/Makefile +++ b/drivers/ras/Makefile @@ -1 +1 @@ -obj-$(CONFIG_RAS) += ras.o +obj-$(CONFIG_RAS) += ras.o debugfs.o diff --git a/drivers/ras/debugfs.c b/drivers/ras/debugfs.c new file mode 100644 index 000000000000..0322acf67ea5 --- /dev/null +++ b/drivers/ras/debugfs.c @@ -0,0 +1,56 @@ +#include + +static struct dentry *ras_debugfs_dir; + +static atomic_t trace_count = ATOMIC_INIT(0); + +int ras_userspace_consumers(void) +{ + return atomic_read(&trace_count); +} +EXPORT_SYMBOL_GPL(ras_userspace_consumers); + +static int trace_show(struct seq_file *m, void *v) +{ + return atomic_read(&trace_count); +} + +static int trace_open(struct inode *inode, struct file *file) +{ + atomic_inc(&trace_count); + return single_open(file, trace_show, NULL); +} + +static int trace_release(struct inode *inode, struct file *file) +{ + atomic_dec(&trace_count); + return single_release(inode, file); +} + +static const struct file_operations trace_fops = { + .open = trace_open, + .read = seq_read, + .llseek = seq_lseek, + .release = trace_release, +}; + +int __init ras_add_daemon_trace(void) +{ + struct dentry *fentry; + + if (!ras_debugfs_dir) + return -ENOENT; + + fentry = debugfs_create_file("daemon_active", S_IRUSR, ras_debugfs_dir, + NULL, &trace_fops); + if (!fentry) + return -ENODEV; + + return 0; + +} + +void __init ras_debugfs_init(void) +{ + ras_debugfs_dir = debugfs_create_dir("ras", NULL); +} diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index b0c6ed1d8e77..4cac43a1e25c 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -5,8 +5,22 @@ * Chen, Gong */ +#include +#include + #define CREATE_TRACE_POINTS #define TRACE_INCLUDE_PATH ../../include/ras #include +static int __init ras_init(void) +{ + int rc = 0; + + ras_debugfs_init(); + rc = ras_add_daemon_trace(); + + return rc; +} +subsys_initcall(ras_init); + EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event); diff --git a/include/linux/ras.h b/include/linux/ras.h new file mode 100644 index 000000000000..2aceeafd6fe5 --- /dev/null +++ b/include/linux/ras.h @@ -0,0 +1,14 @@ +#ifndef __RAS_H__ +#define __RAS_H__ + +#ifdef CONFIG_DEBUG_FS +int ras_userspace_consumers(void); +void ras_debugfs_init(void); +int ras_add_daemon_trace(void); +#else +static inline int ras_userspace_consumers(void) { return 0; } +static inline void ras_debugfs_init(void) { return; } +static inline int ras_add_daemon_trace(void) { return 0; } +#endif + +#endif From 2dfb7d51a61d7ca91b131c8db612f27d9390f2d5 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Tue, 17 Jun 2014 22:33:07 -0400 Subject: [PATCH 09/11] trace, RAS: Add eMCA trace event interface Add trace interface to elaborate all H/W error related information. Signed-off-by: Chen, Gong Acked-by: Borislav Petkov Signed-off-by: Tony Luck --- drivers/acpi/Kconfig | 4 ++- drivers/acpi/acpi_extlog.c | 27 ++++++++++++++-- drivers/firmware/efi/cper.c | 45 +++++++++++++++++++++++--- drivers/ras/ras.c | 3 ++ include/linux/cper.h | 23 +++++++++++++ include/ras/ras_event.h | 64 +++++++++++++++++++++++++++++++++++++ 6 files changed, 158 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index a34a22841002..206942b8d105 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -370,6 +370,7 @@ config ACPI_EXTLOG tristate "Extended Error Log support" depends on X86_MCE && X86_LOCAL_APIC select UEFI_CPER + select RAS default n help Certain usages such as Predictive Failure Analysis (PFA) require @@ -384,6 +385,7 @@ config ACPI_EXTLOG Enhanced MCA Logging allows firmware to provide additional error information to system software, synchronous with MCE or CMCI. This - driver adds support for that functionality. + driver adds support for that functionality with corresponding + tracepoint which carries that information to userspace. endif # ACPI diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index 185334114d71..e61da957f30f 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -16,6 +16,7 @@ #include #include "apei/apei-internal.h" +#include #define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */ @@ -137,8 +138,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, struct mce *mce = (struct mce *)data; int bank = mce->bank; int cpu = mce->extcpu; - struct acpi_generic_status *estatus; - int rc; + struct acpi_generic_status *estatus, *tmp; + struct acpi_generic_data *gdata; + const uuid_le *fru_id = &NULL_UUID_LE; + char *fru_text = ""; + uuid_le *sec_type; + static u32 err_seq; estatus = extlog_elog_entry_check(cpu, bank); if (estatus == NULL) @@ -148,7 +153,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, /* clear record status to enable BIOS to update it again */ estatus->block_status = 0; - rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu); + tmp = (struct acpi_generic_status *)elog_buf; + print_extlog_rcd(NULL, tmp, cpu); + + /* log event via trace */ + err_seq++; + gdata = (struct acpi_generic_data *)(tmp + 1); + if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) + fru_id = (uuid_le *)gdata->fru_id; + if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) + fru_text = gdata->fru_text; + sec_type = (uuid_le *)gdata->section_type; + if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) { + struct cper_sec_mem_err *mem = (void *)(gdata + 1); + if (gdata->error_data_length >= sizeof(*mem)) + trace_extlog_mem_event(mem, err_seq, fru_id, fru_text, + (u8)gdata->error_severity); + } return NOTIFY_STOP; } diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index ac33a9fed341..437e6fd47311 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -207,7 +207,7 @@ const char *cper_mem_err_type_str(unsigned int etype) } EXPORT_SYMBOL_GPL(cper_mem_err_type_str); -static int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg) +static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg) { u32 len, n; @@ -249,7 +249,7 @@ static int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg) return n; } -static int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg) +static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg) { u32 len, n; const char *bank = NULL, *device = NULL; @@ -271,8 +271,44 @@ static int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg) return n; } +void cper_mem_err_pack(const struct cper_sec_mem_err *mem, + struct cper_mem_err_compact *cmem) +{ + cmem->validation_bits = mem->validation_bits; + cmem->node = mem->node; + cmem->card = mem->card; + cmem->module = mem->module; + cmem->bank = mem->bank; + cmem->device = mem->device; + cmem->row = mem->row; + cmem->column = mem->column; + cmem->bit_pos = mem->bit_pos; + cmem->requestor_id = mem->requestor_id; + cmem->responder_id = mem->responder_id; + cmem->target_id = mem->target_id; + cmem->rank = mem->rank; + cmem->mem_array_handle = mem->mem_array_handle; + cmem->mem_dev_handle = mem->mem_dev_handle; +} + +const char *cper_mem_err_unpack(struct trace_seq *p, + struct cper_mem_err_compact *cmem) +{ + const char *ret = p->buffer + p->len; + + if (cper_mem_err_location(cmem, rcd_decode_str)) + trace_seq_printf(p, "%s", rcd_decode_str); + if (cper_dimm_err_location(cmem, rcd_decode_str)) + trace_seq_printf(p, "%s", rcd_decode_str); + trace_seq_putc(p, '\0'); + + return ret; +} + static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) { + struct cper_mem_err_compact cmem; + if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); if (mem->validation_bits & CPER_MEM_VALID_PA) @@ -281,14 +317,15 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) printk("%s""physical_address_mask: 0x%016llx\n", pfx, mem->physical_addr_mask); - if (cper_mem_err_location(mem, rcd_decode_str)) + cper_mem_err_pack(mem, &cmem); + if (cper_mem_err_location(&cmem, rcd_decode_str)) printk("%s%s\n", pfx, rcd_decode_str); if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { u8 etype = mem->error_type; printk("%s""error_type: %d, %s\n", pfx, etype, cper_mem_err_type_str(etype)); } - if (cper_dimm_err_location(mem, rcd_decode_str)) + if (cper_dimm_err_location(&cmem, rcd_decode_str)) printk("%s%s\n", pfx, rcd_decode_str); } diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index 4cac43a1e25c..b67dd362b7b6 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -23,4 +23,7 @@ static int __init ras_init(void) } subsys_initcall(ras_init); +#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE) +EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event); +#endif EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event); diff --git a/include/linux/cper.h b/include/linux/cper.h index ed088b9c1298..76abba4b238e 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -22,6 +22,7 @@ #define LINUX_CPER_H #include +#include /* CPER record signature and the size */ #define CPER_SIG_RECORD "CPER" @@ -363,6 +364,24 @@ struct cper_sec_mem_err { __u16 mem_dev_handle; /* module handle in UEFI 2.4 */ }; +struct cper_mem_err_compact { + __u64 validation_bits; + __u16 node; + __u16 card; + __u16 module; + __u16 bank; + __u16 device; + __u16 row; + __u16 column; + __u16 bit_pos; + __u64 requestor_id; + __u64 responder_id; + __u64 target_id; + __u16 rank; + __u16 mem_array_handle; + __u16 mem_dev_handle; +}; + struct cper_sec_pcie { __u64 validation_bits; __u32 port_type; @@ -406,5 +425,9 @@ const char *cper_severity_str(unsigned int); const char *cper_mem_err_type_str(unsigned int); void cper_print_bits(const char *prefix, unsigned int bits, const char * const strs[], unsigned int strs_size); +void cper_mem_err_pack(const struct cper_sec_mem_err *, + struct cper_mem_err_compact *); +const char *cper_mem_err_unpack(struct trace_seq *, + struct cper_mem_err_compact *); #endif diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index acbcbb88eaaa..47da53c27ffa 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -9,6 +9,70 @@ #include #include #include +#include + +/* + * MCE Extended Error Log trace event + * + * These events are generated when hardware detects a corrected or + * uncorrected event. + */ + +/* memory trace event */ + +#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE) +TRACE_EVENT(extlog_mem_event, + TP_PROTO(struct cper_sec_mem_err *mem, + u32 err_seq, + const uuid_le *fru_id, + const char *fru_text, + u8 sev), + + TP_ARGS(mem, err_seq, fru_id, fru_text, sev), + + TP_STRUCT__entry( + __field(u32, err_seq) + __field(u8, etype) + __field(u8, sev) + __field(u64, pa) + __field(u8, pa_mask_lsb) + __field_struct(uuid_le, fru_id) + __string(fru_text, fru_text) + __field_struct(struct cper_mem_err_compact, data) + ), + + TP_fast_assign( + __entry->err_seq = err_seq; + if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) + __entry->etype = mem->error_type; + else + __entry->etype = ~0; + __entry->sev = sev; + if (mem->validation_bits & CPER_MEM_VALID_PA) + __entry->pa = mem->physical_addr; + else + __entry->pa = ~0ull; + + if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) + __entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask); + else + __entry->pa_mask_lsb = ~0; + __entry->fru_id = *fru_id; + __assign_str(fru_text, fru_text); + cper_mem_err_pack(mem, &__entry->data); + ), + + TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s", + __entry->err_seq, + cper_severity_str(__entry->sev), + cper_mem_err_type_str(__entry->etype), + __entry->pa, + __entry->pa_mask_lsb, + cper_mem_err_unpack(p, &__entry->data), + &__entry->fru_id, + __get_str(fru_text)) +); +#endif /* * Hardware Events Report From d6cae935ec5b7873a8ccd8f0331bef2df729e86a Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 11 Jun 2014 04:34:50 -0400 Subject: [PATCH 10/11] trace, eMCA: Add a knob to adjust where to save event log To avoid saving two copies for one H/W event, add a new file under debugfs to control how to save event log. Once this file is opened, the perf/trace will be used, in the meanwhile, kernel will stop printing event log to the console. On the other hand, if this file is closed, kernel will print event log to the console again. Signed-off-by: Chen, Gong Acked-by: Borislav Petkov Signed-off-by: Tony Luck --- drivers/acpi/acpi_extlog.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index e61da957f30f..a99d4a6156dc 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -154,7 +155,11 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, estatus->block_status = 0; tmp = (struct acpi_generic_status *)elog_buf; - print_extlog_rcd(NULL, tmp, cpu); + + if (!ras_userspace_consumers()) { + print_extlog_rcd(NULL, tmp, cpu); + goto out; + } /* log event via trace */ err_seq++; @@ -171,6 +176,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, (u8)gdata->error_severity); } +out: return NOTIFY_STOP; } From 7c76bb5f7a3d052339b873374333dd0dcc35ce28 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 11 Jun 2014 04:34:51 -0400 Subject: [PATCH 11/11] RAS, extlog: Adjust init flow Unless the platform has eMCA related capability, don't need to check if there is conflict with EDAC driver. Signed-off-by: Chen, Gong Acked-by: Borislav Petkov Signed-off-by: Tony Luck --- drivers/acpi/acpi_extlog.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index a99d4a6156dc..0ad6f389d922 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -223,19 +223,16 @@ static int __init extlog_init(void) u64 cap; int rc; + rdmsrl(MSR_IA32_MCG_CAP, cap); + + if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr()) + return -ENODEV; + if (get_edac_report_status() == EDAC_REPORTING_FORCE) { pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n"); return -EPERM; } - rc = -ENODEV; - rdmsrl(MSR_IA32_MCG_CAP, cap); - if (!(cap & MCG_ELOG_P)) - return rc; - - if (!extlog_get_l1addr()) - return rc; - rc = -EINVAL; /* get L1 header to fetch necessary information */ l1_hdr_size = sizeof(struct extlog_l1_head);