- Add the required information to the faked APEI-reported mem error so

that the kernel properly attempts to offline the corresponding page, as
 it does for kernel-detected correctable errors.
 
 - Fix a typo in AMD's error descriptions.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmDZg1cACgkQEsHwGGHe
 VUpW8BAAlnwit5Vg4UVocY7mwTi0GvP36Fz2u81kppMROpWgQhhmX35ZxoxgQoSC
 0ojKnOJTgGpOdKknmK/vom4ysxNRZxjz0zat9n+cqcfqVwP14KzhjaX1FPXnEQfE
 mPkn3v8fsML87glPTzmpELYSOZTpu6OYdiFZAzKL8Gp8aytyh4FamTV2eTxn5ClG
 +dejrN0NFiSALarliNttPnpfC5JvQ0KUJFxapYaMd27ssqL/2XMvJmBSpGC+OaZg
 lvvv7XuRrIPRZ7lU3Zipz7Rv5r8tTfPUMr33DcUuAZxpXW3zRpds153HktTYSqsv
 pZHTTLZ73GbAFVlkjqP6wcAtW2ygKW3lxsPuBSR8aIj8yU7rrrkG4wm2XsvCtrXP
 4KrTZLgqGHFQaXbp1BzJzrnLyb6dxZXkEaAnX/7ZygDz+L5aMlG/7XEk4c/R9YbS
 bg6NO/Dh1E547cf+bN6/yYNwPjNaO1lGOMU9N2IwjCiHFERzTsFGyFNjqMSGa7Ul
 34FZAB11aklqbj+0amu5IeMd8vM3unqTGnYEQCcyG09mdsa9/bjEvEgCirq5FXf3
 szjUmGpdtAsxCRZ7SzhsDu1IMT0F2D8hwgJbFSLXmtpiq5WB/EHaYbiqg8F6V36J
 bENGE3rLj3HkgWHsLpgEMX2OXh7Pzo3UqwwbtOuYEiwwhvh7CZk=
 =1Azq
 -----END PGP SIGNATURE-----

Merge tag 'ras_core_for_v5.14_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 RAS updates from Borislav Petkov:

 - Add the required information to the faked APEI-reported mem error so
   that the kernel properly attempts to offline the corresponding page,
   as it does for kernel-detected correctable errors.

 - Fix a typo in AMD's error descriptions.

* tag 'ras_core_for_v5.14_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  EDAC/mce_amd: Fix typo "FIfo" -> "Fifo"
  x86/mce: Include a MCi_MISC value in faked mce logs
  x86/MCE/AMD, EDAC/mce_amd: Add new SMCA bank types
This commit is contained in:
Linus Torvalds 2021-06-28 11:19:40 -07:00
Родитель 2a5c61843e 429b2ba708
Коммит f565b20734
4 изменённых файлов: 115 добавлений и 26 удалений

Просмотреть файл

@ -305,7 +305,7 @@ extern void apei_mce_report_mem_error(int corrected,
/* These may be used by multiple smca_hwid_mcatypes */
enum smca_bank_types {
SMCA_LS = 0, /* Load Store */
SMCA_LS_V2, /* Load Store */
SMCA_LS_V2,
SMCA_IF, /* Instruction Fetch */
SMCA_L2_CACHE, /* L2 Cache */
SMCA_DE, /* Decoder Unit */
@ -314,17 +314,22 @@ enum smca_bank_types {
SMCA_FP, /* Floating Point */
SMCA_L3_CACHE, /* L3 Cache */
SMCA_CS, /* Coherent Slave */
SMCA_CS_V2, /* Coherent Slave */
SMCA_CS_V2,
SMCA_PIE, /* Power, Interrupts, etc. */
SMCA_UMC, /* Unified Memory Controller */
SMCA_UMC_V2,
SMCA_PB, /* Parameter Block */
SMCA_PSP, /* Platform Security Processor */
SMCA_PSP_V2, /* Platform Security Processor */
SMCA_PSP_V2,
SMCA_SMU, /* System Management Unit */
SMCA_SMU_V2, /* System Management Unit */
SMCA_SMU_V2,
SMCA_MP5, /* Microprocessor 5 Unit */
SMCA_NBIO, /* Northbridge IO Unit */
SMCA_PCIE, /* PCI Express Unit */
SMCA_PCIE_V2,
SMCA_XGMI_PCS, /* xGMI PCS Unit */
SMCA_XGMI_PHY, /* xGMI PHY Unit */
SMCA_WAFL_PHY, /* WAFL PHY Unit */
N_SMCA_BANK_TYPES
};

Просмотреть файл

@ -77,27 +77,29 @@ struct smca_bank_name {
};
static struct smca_bank_name smca_names[] = {
[SMCA_LS] = { "load_store", "Load Store Unit" },
[SMCA_LS_V2] = { "load_store", "Load Store Unit" },
[SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
[SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
[SMCA_DE] = { "decode_unit", "Decode Unit" },
[SMCA_RESERVED] = { "reserved", "Reserved" },
[SMCA_EX] = { "execution_unit", "Execution Unit" },
[SMCA_FP] = { "floating_point", "Floating Point Unit" },
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
[SMCA_CS] = { "coherent_slave", "Coherent Slave" },
[SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" },
[SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
[SMCA_UMC] = { "umc", "Unified Memory Controller" },
[SMCA_PB] = { "param_block", "Parameter Block" },
[SMCA_PSP] = { "psp", "Platform Security Processor" },
[SMCA_PSP_V2] = { "psp", "Platform Security Processor" },
[SMCA_SMU] = { "smu", "System Management Unit" },
[SMCA_SMU_V2] = { "smu", "System Management Unit" },
[SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" },
[SMCA_NBIO] = { "nbio", "Northbridge IO Unit" },
[SMCA_PCIE] = { "pcie", "PCI Express Unit" },
[SMCA_LS ... SMCA_LS_V2] = { "load_store", "Load Store Unit" },
[SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
[SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
[SMCA_DE] = { "decode_unit", "Decode Unit" },
[SMCA_RESERVED] = { "reserved", "Reserved" },
[SMCA_EX] = { "execution_unit", "Execution Unit" },
[SMCA_FP] = { "floating_point", "Floating Point Unit" },
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
[SMCA_CS ... SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" },
[SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
/* UMC v2 is separate because both of them can exist in a single system. */
[SMCA_UMC] = { "umc", "Unified Memory Controller" },
[SMCA_UMC_V2] = { "umc_v2", "Unified Memory Controller v2" },
[SMCA_PB] = { "param_block", "Parameter Block" },
[SMCA_PSP ... SMCA_PSP_V2] = { "psp", "Platform Security Processor" },
[SMCA_SMU ... SMCA_SMU_V2] = { "smu", "System Management Unit" },
[SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" },
[SMCA_NBIO] = { "nbio", "Northbridge IO Unit" },
[SMCA_PCIE ... SMCA_PCIE_V2] = { "pcie", "PCI Express Unit" },
[SMCA_XGMI_PCS] = { "xgmi_pcs", "Ext Global Memory Interconnect PCS Unit" },
[SMCA_XGMI_PHY] = { "xgmi_phy", "Ext Global Memory Interconnect PHY Unit" },
[SMCA_WAFL_PHY] = { "wafl_phy", "WAFL PHY Unit" },
};
static const char *smca_get_name(enum smca_bank_types t)
@ -155,6 +157,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
/* Unified Memory Controller MCA type */
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0) },
{ SMCA_UMC_V2, HWID_MCATYPE(0x96, 0x1) },
/* Parameter Block MCA type */
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0) },
@ -175,6 +178,16 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
/* PCI Express Unit MCA type */
{ SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) },
{ SMCA_PCIE_V2, HWID_MCATYPE(0x46, 0x1) },
/* xGMI PCS MCA type */
{ SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0) },
/* xGMI PHY MCA type */
{ SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) },
/* WAFL PHY MCA type */
{ SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) },
};
struct smca_bank smca_banks[MAX_NR_BANKS];

Просмотреть файл

@ -36,7 +36,8 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
mce_setup(&m);
m.bank = -1;
/* Fake a memory read error with unknown channel */
m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
m.misc = (MCI_MISC_ADDR_PHYS << 6) | PAGE_SHIFT;
if (severity >= GHES_SEV_RECOVERABLE)
m.status |= MCI_STATUS_UC;

Просмотреть файл

@ -323,6 +323,21 @@ static const char * const smca_umc_mce_desc[] = {
"AES SRAM ECC error",
};
static const char * const smca_umc2_mce_desc[] = {
"DRAM ECC error",
"Data poison error",
"SDP parity error",
"Reserved",
"Address/Command parity error",
"Write data parity error",
"DCQ SRAM ECC error",
"Reserved",
"Read data parity error",
"Rdb SRAM ECC error",
"RdRsp SRAM ECC error",
"LM32 MP errors",
};
static const char * const smca_pb_mce_desc[] = {
"An ECC error in the Parameter Block RAM array",
};
@ -400,6 +415,56 @@ static const char * const smca_pcie_mce_desc[] = {
"CCIX Non-okay write response with data error",
};
static const char * const smca_pcie2_mce_desc[] = {
"SDP Parity Error logging",
};
static const char * const smca_xgmipcs_mce_desc[] = {
"Data Loss Error",
"Training Error",
"Flow Control Acknowledge Error",
"Rx Fifo Underflow Error",
"Rx Fifo Overflow Error",
"CRC Error",
"BER Exceeded Error",
"Tx Vcid Data Error",
"Replay Buffer Parity Error",
"Data Parity Error",
"Replay Fifo Overflow Error",
"Replay Fifo Underflow Error",
"Elastic Fifo Overflow Error",
"Deskew Error",
"Flow Control CRC Error",
"Data Startup Limit Error",
"FC Init Timeout Error",
"Recovery Timeout Error",
"Ready Serial Timeout Error",
"Ready Serial Attempt Error",
"Recovery Attempt Error",
"Recovery Relock Attempt Error",
"Replay Attempt Error",
"Sync Header Error",
"Tx Replay Timeout Error",
"Rx Replay Timeout Error",
"LinkSub Tx Timeout Error",
"LinkSub Rx Timeout Error",
"Rx CMD Pocket Error",
};
static const char * const smca_xgmiphy_mce_desc[] = {
"RAM ECC Error",
"ARC instruction buffer parity error",
"ARC data buffer parity error",
"PHY APB error",
};
static const char * const smca_waflphy_mce_desc[] = {
"RAM ECC Error",
"ARC instruction buffer parity error",
"ARC data buffer parity error",
"PHY APB error",
};
struct smca_mce_desc {
const char * const *descs;
unsigned int num_descs;
@ -418,6 +483,7 @@ static struct smca_mce_desc smca_mce_descs[] = {
[SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) },
[SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
[SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
[SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) },
[SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
[SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
[SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc) },
@ -426,6 +492,10 @@ static struct smca_mce_desc smca_mce_descs[] = {
[SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) },
[SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) },
[SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) },
[SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) },
[SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) },
[SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
[SMCA_WAFL_PHY] = { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc) },
};
static bool f12h_mc0_mce(u16 ec, u8 xec)