x86/mce/AMD, EDAC/mce_amd: Define and use tables for known SMCA IP types
Scalable MCA defines a number of IP types. An MCA bank on an SMCA system is defined as one of these IP types. A bank's type is uniquely identified by the combination of the HWID and MCATYPE values read from its MCA_IPID register. Add the required tables in order to be able to lookup error descriptions based on a bank's type and the error's extended error code. [ bp: Align comments, simplify a bit. ] Signed-off-by: Yazen Ghannam <Yazen.Ghannam@amd.com> Signed-off-by: Borislav Petkov <bp@suse.de> Link: http://lkml.kernel.org/r/1472741832-1690-1-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
Родитель
856095b179
Коммит
5896820e0a
|
@ -337,44 +337,47 @@ extern void apei_mce_report_mem_error(int corrected,
|
|||
* Scalable MCA.
|
||||
*/
|
||||
#ifdef CONFIG_X86_MCE_AMD
|
||||
enum amd_ip_types {
|
||||
SMCA_F17H_CORE = 0, /* Core errors */
|
||||
SMCA_DF, /* Data Fabric */
|
||||
|
||||
/* These may be used by multiple smca_hwid_mcatypes */
|
||||
enum smca_bank_types {
|
||||
SMCA_LS = 0, /* Load Store */
|
||||
SMCA_IF, /* Instruction Fetch */
|
||||
SMCA_L2_CACHE, /* L2 Cache */
|
||||
SMCA_DE, /* Decoder Unit */
|
||||
SMCA_EX, /* Execution Unit */
|
||||
SMCA_FP, /* Floating Point */
|
||||
SMCA_L3_CACHE, /* L3 Cache */
|
||||
SMCA_CS, /* Coherent Slave */
|
||||
SMCA_PIE, /* Power, Interrupts, etc. */
|
||||
SMCA_UMC, /* Unified Memory Controller */
|
||||
SMCA_PB, /* Parameter Block */
|
||||
SMCA_PSP, /* Platform Security Processor */
|
||||
SMCA_SMU, /* System Management Unit */
|
||||
N_AMD_IP_TYPES
|
||||
N_SMCA_BANK_TYPES
|
||||
};
|
||||
|
||||
struct amd_hwid {
|
||||
const char *name;
|
||||
unsigned int hwid;
|
||||
struct smca_bank_name {
|
||||
const char *name; /* Short name for sysfs */
|
||||
const char *long_name; /* Long name for pretty-printing */
|
||||
};
|
||||
|
||||
extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES];
|
||||
extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES];
|
||||
|
||||
enum amd_core_mca_blocks {
|
||||
SMCA_LS = 0, /* Load Store */
|
||||
SMCA_IF, /* Instruction Fetch */
|
||||
SMCA_L2_CACHE, /* L2 cache */
|
||||
SMCA_DE, /* Decoder unit */
|
||||
RES, /* Reserved */
|
||||
SMCA_EX, /* Execution unit */
|
||||
SMCA_FP, /* Floating Point */
|
||||
SMCA_L3_CACHE, /* L3 cache */
|
||||
N_CORE_MCA_BLOCKS
|
||||
#define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype)
|
||||
|
||||
struct smca_hwid_mcatype {
|
||||
unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
|
||||
u32 hwid_mcatype; /* (hwid,mcatype) tuple */
|
||||
u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */
|
||||
};
|
||||
|
||||
extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS];
|
||||
|
||||
enum amd_df_mca_blocks {
|
||||
SMCA_CS = 0, /* Coherent Slave */
|
||||
SMCA_PIE, /* Power management, Interrupts, etc */
|
||||
N_DF_BLOCKS
|
||||
struct smca_bank_info {
|
||||
struct smca_hwid_mcatype *type;
|
||||
u32 type_instance;
|
||||
};
|
||||
|
||||
extern const char * const amd_df_mcablock_names[N_DF_BLOCKS];
|
||||
extern struct smca_bank_info smca_banks[MAX_NR_BANKS];
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_MCE_H */
|
||||
|
|
|
@ -63,34 +63,55 @@ static const char * const th_names[] = {
|
|||
"execution_unit",
|
||||
};
|
||||
|
||||
/* Define HWID to IP type mappings for Scalable MCA */
|
||||
struct amd_hwid amd_hwids[] = {
|
||||
[SMCA_F17H_CORE] = { "f17h_core", 0xB0 },
|
||||
[SMCA_DF] = { "data_fabric", 0x2E },
|
||||
[SMCA_UMC] = { "umc", 0x96 },
|
||||
[SMCA_PB] = { "param_block", 0x5 },
|
||||
[SMCA_PSP] = { "psp", 0xFF },
|
||||
[SMCA_SMU] = { "smu", 0x1 },
|
||||
struct smca_bank_name smca_bank_names[] = {
|
||||
[SMCA_LS] = { "load_store", "Load Store Unit" },
|
||||
[SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
|
||||
[SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
|
||||
[SMCA_DE] = { "decode_unit", "Decode Unit" },
|
||||
[SMCA_EX] = { "execution_unit", "Execution Unit" },
|
||||
[SMCA_FP] = { "floating_point", "Floating Point Unit" },
|
||||
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
|
||||
[SMCA_CS] = { "coherent_slave", "Coherent Slave" },
|
||||
[SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
|
||||
[SMCA_UMC] = { "umc", "Unified Memory Controller" },
|
||||
[SMCA_PB] = { "param_block", "Parameter Block" },
|
||||
[SMCA_PSP] = { "psp", "Platform Security Processor" },
|
||||
[SMCA_SMU] = { "smu", "System Management Unit" },
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(amd_hwids);
|
||||
EXPORT_SYMBOL_GPL(smca_bank_names);
|
||||
|
||||
const char * const amd_core_mcablock_names[] = {
|
||||
[SMCA_LS] = "load_store",
|
||||
[SMCA_IF] = "insn_fetch",
|
||||
[SMCA_L2_CACHE] = "l2_cache",
|
||||
[SMCA_DE] = "decode_unit",
|
||||
[RES] = "",
|
||||
[SMCA_EX] = "execution_unit",
|
||||
[SMCA_FP] = "floating_point",
|
||||
[SMCA_L3_CACHE] = "l3_cache",
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(amd_core_mcablock_names);
|
||||
static struct smca_hwid_mcatype smca_hwid_mcatypes[] = {
|
||||
/* { bank_type, hwid_mcatype, xec_bitmap } */
|
||||
|
||||
const char * const amd_df_mcablock_names[] = {
|
||||
[SMCA_CS] = "coherent_slave",
|
||||
[SMCA_PIE] = "pie",
|
||||
/* ZN Core (HWID=0xB0) MCA types */
|
||||
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
|
||||
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
|
||||
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
|
||||
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
|
||||
/* HWID 0xB0 MCATYPE 0x4 is Reserved */
|
||||
{ SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF },
|
||||
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
|
||||
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
|
||||
|
||||
/* Data Fabric MCA types */
|
||||
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
|
||||
{ SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF },
|
||||
|
||||
/* Unified Memory Controller MCA type */
|
||||
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F },
|
||||
|
||||
/* Parameter Block MCA type */
|
||||
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
|
||||
|
||||
/* Platform Security Processor MCA type */
|
||||
{ SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
|
||||
|
||||
/* System Management Unit MCA type */
|
||||
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
|
||||
|
||||
struct smca_bank_info smca_banks[MAX_NR_BANKS];
|
||||
EXPORT_SYMBOL_GPL(smca_banks);
|
||||
|
||||
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
|
||||
static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
|
||||
|
@ -108,6 +129,36 @@ void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
|
|||
* CPU Initialization
|
||||
*/
|
||||
|
||||
static void get_smca_bank_info(unsigned int bank)
|
||||
{
|
||||
unsigned int i, hwid_mcatype, cpu = smp_processor_id();
|
||||
struct smca_hwid_mcatype *type;
|
||||
u32 high, instanceId;
|
||||
u16 hwid, mcatype;
|
||||
|
||||
/* Collect bank_info using CPU 0 for now. */
|
||||
if (cpu)
|
||||
return;
|
||||
|
||||
if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instanceId, &high)) {
|
||||
pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
|
||||
return;
|
||||
}
|
||||
|
||||
hwid = high & MCI_IPID_HWID;
|
||||
mcatype = (high & MCI_IPID_MCATYPE) >> 16;
|
||||
hwid_mcatype = HWID_MCATYPE(hwid, mcatype);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
|
||||
type = &smca_hwid_mcatypes[i];
|
||||
if (hwid_mcatype == type->hwid_mcatype) {
|
||||
smca_banks[bank].type = type;
|
||||
smca_banks[bank].type_instance = instanceId;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct thresh_restart {
|
||||
struct threshold_block *b;
|
||||
int reset;
|
||||
|
@ -425,6 +476,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
|||
int offset = -1;
|
||||
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
if (mce_flags.smca)
|
||||
get_smca_bank_info(bank);
|
||||
|
||||
for (block = 0; block < NR_BLOCKS; ++block) {
|
||||
address = get_block_address(cpu, address, low, high, bank, block);
|
||||
if (!address)
|
||||
|
|
|
@ -283,6 +283,27 @@ static const char * const smca_smu_mce_desc[] = {
|
|||
"SMU RAM ECC or parity error",
|
||||
};
|
||||
|
||||
struct smca_mce_desc {
|
||||
const char * const *descs;
|
||||
unsigned int num_descs;
|
||||
};
|
||||
|
||||
static struct smca_mce_desc smca_mce_descs[] = {
|
||||
[SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) },
|
||||
[SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) },
|
||||
[SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) },
|
||||
[SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) },
|
||||
[SMCA_EX] = { smca_ex_mce_desc, ARRAY_SIZE(smca_ex_mce_desc) },
|
||||
[SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) },
|
||||
[SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) },
|
||||
[SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) },
|
||||
[SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
|
||||
[SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
|
||||
[SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
|
||||
[SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
|
||||
[SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
|
||||
};
|
||||
|
||||
static bool f12h_mc0_mce(u16 ec, u8 xec)
|
||||
{
|
||||
bool ret = false;
|
||||
|
@ -827,175 +848,32 @@ static void decode_mc6_mce(struct mce *m)
|
|||
pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
|
||||
}
|
||||
|
||||
static void decode_f17h_core_errors(const char *ip_name, u8 xec,
|
||||
unsigned int mca_type)
|
||||
{
|
||||
const char * const *error_desc_array;
|
||||
size_t len;
|
||||
|
||||
pr_emerg(HW_ERR "%s Error: ", ip_name);
|
||||
|
||||
switch (mca_type) {
|
||||
case SMCA_LS:
|
||||
error_desc_array = smca_ls_mce_desc;
|
||||
len = ARRAY_SIZE(smca_ls_mce_desc) - 1;
|
||||
|
||||
if (xec == 0x4) {
|
||||
pr_cont("Unrecognized LS MCA error code.\n");
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case SMCA_IF:
|
||||
error_desc_array = smca_if_mce_desc;
|
||||
len = ARRAY_SIZE(smca_if_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_L2_CACHE:
|
||||
error_desc_array = smca_l2_mce_desc;
|
||||
len = ARRAY_SIZE(smca_l2_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_DE:
|
||||
error_desc_array = smca_de_mce_desc;
|
||||
len = ARRAY_SIZE(smca_de_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_EX:
|
||||
error_desc_array = smca_ex_mce_desc;
|
||||
len = ARRAY_SIZE(smca_ex_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_FP:
|
||||
error_desc_array = smca_fp_mce_desc;
|
||||
len = ARRAY_SIZE(smca_fp_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_L3_CACHE:
|
||||
error_desc_array = smca_l3_mce_desc;
|
||||
len = ARRAY_SIZE(smca_l3_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
pr_cont("Corrupted MCA core error info.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (xec > len) {
|
||||
pr_cont("Unrecognized %s MCA bank error code.\n",
|
||||
amd_core_mcablock_names[mca_type]);
|
||||
return;
|
||||
}
|
||||
|
||||
pr_cont("%s.\n", error_desc_array[xec]);
|
||||
}
|
||||
|
||||
static void decode_df_errors(u8 xec, unsigned int mca_type)
|
||||
{
|
||||
const char * const *error_desc_array;
|
||||
size_t len;
|
||||
|
||||
pr_emerg(HW_ERR "Data Fabric Error: ");
|
||||
|
||||
switch (mca_type) {
|
||||
case SMCA_CS:
|
||||
error_desc_array = smca_cs_mce_desc;
|
||||
len = ARRAY_SIZE(smca_cs_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_PIE:
|
||||
error_desc_array = smca_pie_mce_desc;
|
||||
len = ARRAY_SIZE(smca_pie_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
pr_cont("Corrupted MCA Data Fabric info.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (xec > len) {
|
||||
pr_cont("Unrecognized %s MCA bank error code.\n",
|
||||
amd_df_mcablock_names[mca_type]);
|
||||
return;
|
||||
}
|
||||
|
||||
pr_cont("%s.\n", error_desc_array[xec]);
|
||||
}
|
||||
|
||||
/* Decode errors according to Scalable MCA specification */
|
||||
static void decode_smca_errors(struct mce *m)
|
||||
{
|
||||
u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank);
|
||||
unsigned int hwid, mca_type, i;
|
||||
u8 xec = XEC(m->status, xec_mask);
|
||||
const char * const *error_desc_array;
|
||||
struct smca_hwid_mcatype *type;
|
||||
unsigned int bank_type;
|
||||
const char *ip_name;
|
||||
u32 low, high;
|
||||
size_t len;
|
||||
u8 xec = XEC(m->status, xec_mask);
|
||||
|
||||
if (rdmsr_safe(addr, &low, &high)) {
|
||||
pr_emerg(HW_ERR "Invalid IP block specified.\n");
|
||||
if (m->bank >= ARRAY_SIZE(smca_banks))
|
||||
return;
|
||||
}
|
||||
|
||||
hwid = high & MCI_IPID_HWID;
|
||||
mca_type = (high & MCI_IPID_MCATYPE) >> 16;
|
||||
|
||||
pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n", m->bank, high, low);
|
||||
|
||||
/*
|
||||
* Based on hwid and mca_type values, decode errors from respective IPs.
|
||||
* Note: mca_type values make sense only in the context of an hwid.
|
||||
*/
|
||||
for (i = 0; i < ARRAY_SIZE(amd_hwids); i++)
|
||||
if (amd_hwids[i].hwid == hwid)
|
||||
break;
|
||||
|
||||
switch (i) {
|
||||
case SMCA_F17H_CORE:
|
||||
ip_name = (mca_type == SMCA_L3_CACHE) ?
|
||||
"L3 Cache" : "F17h Core";
|
||||
return decode_f17h_core_errors(ip_name, xec, mca_type);
|
||||
break;
|
||||
|
||||
case SMCA_DF:
|
||||
return decode_df_errors(xec, mca_type);
|
||||
break;
|
||||
|
||||
case SMCA_UMC:
|
||||
error_desc_array = smca_umc_mce_desc;
|
||||
len = ARRAY_SIZE(smca_umc_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_PB:
|
||||
error_desc_array = smca_pb_mce_desc;
|
||||
len = ARRAY_SIZE(smca_pb_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_PSP:
|
||||
error_desc_array = smca_psp_mce_desc;
|
||||
len = ARRAY_SIZE(smca_psp_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
case SMCA_SMU:
|
||||
error_desc_array = smca_smu_mce_desc;
|
||||
len = ARRAY_SIZE(smca_smu_mce_desc) - 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
pr_emerg(HW_ERR "HWID:%d does not match any existing IPs.\n", hwid);
|
||||
type = smca_banks[m->bank].type;
|
||||
if (!type)
|
||||
return;
|
||||
}
|
||||
|
||||
ip_name = amd_hwids[i].name;
|
||||
bank_type = type->bank_type;
|
||||
ip_name = smca_bank_names[bank_type].long_name;
|
||||
|
||||
pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
|
||||
|
||||
/* Only print the decode of valid error codes */
|
||||
if (xec < smca_mce_descs[bank_type].num_descs &&
|
||||
(type->xec_bitmap & BIT_ULL(xec))) {
|
||||
pr_emerg(HW_ERR "%s Error: ", ip_name);
|
||||
|
||||
if (xec > len) {
|
||||
pr_cont("Unrecognized %s MCA bank error code.\n", ip_name);
|
||||
return;
|
||||
pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
|
||||
}
|
||||
|
||||
pr_cont("%s.\n", error_desc_array[xec]);
|
||||
}
|
||||
|
||||
static inline void amd_decode_err_code(u16 ec)
|
||||
|
|
Загрузка…
Ссылка в новой задаче