x86/mce/AMD: Redo error logging from APIC LVT interrupt handlers
We have support for the new SMCA MCA_DE{STAT,ADDR} registers in Linux. So we've used these registers in place of MCA_{STATUS,ADDR} on SMCA systems. However, the guidance for current SMCA implementations of is to continue using MCA_{STATUS,ADDR} and to use MCA_DE{STAT,ADDR} only if a Deferred error was not found in the former registers. If we logged a Deferred error in MCA_STATUS then we should also clear MCA_DESTAT. This also means we shouldn't clear MCA_CONFIG[LogDeferredInMcaStat]. Rework __log_error() to only log an error and add helpers for the different error types being logged from the corresponding interrupt handlers. Boris: carve out common functionality into a _log_error_bank(). Cleanup comments, check MCi_STATUS bits before reading MSRs. Streamline flow. Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com> Signed-off-by: Borislav Petkov <bp@suse.de> Cc: Tony Luck <tony.luck@intel.com> Cc: linux-edac <linux-edac@vger.kernel.org> Link: http://lkml.kernel.org/r/1493147772-2721-1-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
Родитель
473e90b2e8
Коммит
37d43acfd7
|
@ -471,20 +471,6 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
|
||||||
*/
|
*/
|
||||||
smca_high |= BIT(0);
|
smca_high |= BIT(0);
|
||||||
|
|
||||||
/*
|
|
||||||
* SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
|
|
||||||
* registers with the option of additionally logging to
|
|
||||||
* MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
|
|
||||||
*
|
|
||||||
* This bit is usually set by BIOS to retain the old behavior
|
|
||||||
* for OSes that don't use the new registers. Linux supports the
|
|
||||||
* new registers so let's disable that additional logging here.
|
|
||||||
*
|
|
||||||
* MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
|
|
||||||
* portion of the MSR).
|
|
||||||
*/
|
|
||||||
smca_high &= ~BIT(2);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* SMCA sets the Deferred Error Interrupt type per bank.
|
* SMCA sets the Deferred Error Interrupt type per bank.
|
||||||
*
|
*
|
||||||
|
@ -755,37 +741,19 @@ out_err:
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);
|
EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);
|
||||||
|
|
||||||
static void
|
static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
|
||||||
__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
|
|
||||||
{
|
{
|
||||||
u32 msr_status = msr_ops.status(bank);
|
|
||||||
u32 msr_addr = msr_ops.addr(bank);
|
|
||||||
struct mce m;
|
struct mce m;
|
||||||
u64 status;
|
|
||||||
|
|
||||||
WARN_ON_ONCE(deferred_err && threshold_err);
|
|
||||||
|
|
||||||
if (deferred_err && mce_flags.smca) {
|
|
||||||
msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
|
|
||||||
msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
|
|
||||||
}
|
|
||||||
|
|
||||||
rdmsrl(msr_status, status);
|
|
||||||
|
|
||||||
if (!(status & MCI_STATUS_VAL))
|
|
||||||
return;
|
|
||||||
|
|
||||||
mce_setup(&m);
|
mce_setup(&m);
|
||||||
|
|
||||||
m.status = status;
|
m.status = status;
|
||||||
|
m.misc = misc;
|
||||||
m.bank = bank;
|
m.bank = bank;
|
||||||
m.tsc = rdtsc();
|
m.tsc = rdtsc();
|
||||||
|
|
||||||
if (threshold_err)
|
|
||||||
m.misc = misc;
|
|
||||||
|
|
||||||
if (m.status & MCI_STATUS_ADDRV) {
|
if (m.status & MCI_STATUS_ADDRV) {
|
||||||
rdmsrl(msr_addr, m.addr);
|
m.addr = addr;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Extract [55:<lsb>] where lsb is the least significant
|
* Extract [55:<lsb>] where lsb is the least significant
|
||||||
|
@ -806,8 +774,6 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
|
||||||
}
|
}
|
||||||
|
|
||||||
mce_log(&m);
|
mce_log(&m);
|
||||||
|
|
||||||
wrmsrl(msr_status, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __smp_deferred_error_interrupt(void)
|
static inline void __smp_deferred_error_interrupt(void)
|
||||||
|
@ -832,45 +798,85 @@ asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void)
|
||||||
exiting_ack_irq();
|
exiting_ack_irq();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns true if the logged error is deferred. False, otherwise.
|
||||||
|
*/
|
||||||
|
static inline bool
|
||||||
|
_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
|
||||||
|
{
|
||||||
|
u64 status, addr = 0;
|
||||||
|
|
||||||
|
rdmsrl(msr_stat, status);
|
||||||
|
if (!(status & MCI_STATUS_VAL))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (status & MCI_STATUS_ADDRV)
|
||||||
|
rdmsrl(msr_addr, addr);
|
||||||
|
|
||||||
|
__log_error(bank, status, addr, misc);
|
||||||
|
|
||||||
|
wrmsrl(status, 0);
|
||||||
|
|
||||||
|
return status & MCI_STATUS_DEFERRED;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We have three scenarios for checking for Deferred errors:
|
||||||
|
*
|
||||||
|
* 1) Non-SMCA systems check MCA_STATUS and log error if found.
|
||||||
|
* 2) SMCA systems check MCA_STATUS. If error is found then log it and also
|
||||||
|
* clear MCA_DESTAT.
|
||||||
|
* 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
|
||||||
|
* log it.
|
||||||
|
*/
|
||||||
|
static void log_error_deferred(unsigned int bank)
|
||||||
|
{
|
||||||
|
bool defrd;
|
||||||
|
|
||||||
|
defrd = _log_error_bank(bank, msr_ops.status(bank),
|
||||||
|
msr_ops.addr(bank), 0);
|
||||||
|
|
||||||
|
if (!mce_flags.smca)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Clear MCA_DESTAT if we logged the deferred error from MCA_STATUS. */
|
||||||
|
if (defrd) {
|
||||||
|
wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
|
||||||
|
* for a valid error.
|
||||||
|
*/
|
||||||
|
_log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank),
|
||||||
|
MSR_AMD64_SMCA_MCx_DEADDR(bank), 0);
|
||||||
|
}
|
||||||
|
|
||||||
/* APIC interrupt handler for deferred errors */
|
/* APIC interrupt handler for deferred errors */
|
||||||
static void amd_deferred_error_interrupt(void)
|
static void amd_deferred_error_interrupt(void)
|
||||||
{
|
{
|
||||||
unsigned int bank;
|
unsigned int bank;
|
||||||
u32 msr_status;
|
|
||||||
u64 status;
|
|
||||||
|
|
||||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
for (bank = 0; bank < mca_cfg.banks; ++bank)
|
||||||
msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
|
log_error_deferred(bank);
|
||||||
: msr_ops.status(bank);
|
}
|
||||||
|
|
||||||
rdmsrl(msr_status, status);
|
static void log_error_thresholding(unsigned int bank, u64 misc)
|
||||||
|
{
|
||||||
if (!(status & MCI_STATUS_VAL) ||
|
_log_error_bank(bank, msr_ops.status(bank), msr_ops.addr(bank), misc);
|
||||||
!(status & MCI_STATUS_DEFERRED))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
__log_error(bank, true, false, 0);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* APIC Interrupt Handler
|
* Threshold interrupt handler will service THRESHOLD_APIC_VECTOR. The interrupt
|
||||||
|
* goes off when error_count reaches threshold_limit.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
|
||||||
* threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
|
|
||||||
* the interrupt goes off when error_count reaches threshold_limit.
|
|
||||||
* the handler will simply log mcelog w/ software defined bank number.
|
|
||||||
*/
|
|
||||||
|
|
||||||
static void amd_threshold_interrupt(void)
|
static void amd_threshold_interrupt(void)
|
||||||
{
|
{
|
||||||
u32 low = 0, high = 0, address = 0;
|
u32 low = 0, high = 0, address = 0;
|
||||||
unsigned int bank, block, cpu = smp_processor_id();
|
unsigned int bank, block, cpu = smp_processor_id();
|
||||||
struct thresh_restart tr;
|
struct thresh_restart tr;
|
||||||
|
|
||||||
/* assume first bank caused it */
|
|
||||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||||
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
|
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
|
||||||
continue;
|
continue;
|
||||||
|
@ -893,23 +899,18 @@ static void amd_threshold_interrupt(void)
|
||||||
(high & MASK_LOCKED_HI))
|
(high & MASK_LOCKED_HI))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/*
|
if (!(high & MASK_OVERFLOW_HI))
|
||||||
* Log the machine check that caused the threshold
|
continue;
|
||||||
* event.
|
|
||||||
*/
|
/* Log the MCE which caused the threshold event. */
|
||||||
if (high & MASK_OVERFLOW_HI)
|
log_error_thresholding(bank, ((u64)high << 32) | low);
|
||||||
goto log;
|
|
||||||
|
/* Reset threshold block after logging error. */
|
||||||
|
memset(&tr, 0, sizeof(tr));
|
||||||
|
tr.b = &per_cpu(threshold_banks, cpu)[bank]->blocks[block];
|
||||||
|
threshold_restart_bank(&tr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return;
|
|
||||||
|
|
||||||
log:
|
|
||||||
__log_error(bank, false, true, ((u64)high << 32) | low);
|
|
||||||
|
|
||||||
/* Reset threshold block after logging error. */
|
|
||||||
memset(&tr, 0, sizeof(tr));
|
|
||||||
tr.b = &per_cpu(threshold_banks, cpu)[bank]->blocks[block];
|
|
||||||
threshold_restart_bank(&tr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Загрузка…
Ссылка в новой задаче