Merge branch 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac

* 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac: (21 commits)
  MAINTAINERS: add an entry for Edac Sandy Bridge driver
  edac: tag sb_edac as EXPERIMENTAL, as it requires more testing
  EDAC: Fix incorrect edac mode reporting in sb_edac
  edac: sb_edac: Add it to the building system
  edac: Add an experimental new driver to support Sandy Bridge CPU's
  i7300_edac: Fix error cleanup logic
  i7core_edac: Initialize memory name with cpu, channel, bank
  i7core_edac: Fix compilation on 32 bits arch
  i7core_edac: scrubbing fixups
  EDAC: Correct Kconfig dependencies
  i7core_edac: return -ENODEV if no MC is found
  i7core_edac: use edac's own way to print errors
  MAINTAINERS: remove dropped edac_mce.* from the file
  i7core_edac: Drop the edac_mce facility
  x86, MCE: Use notifier chain only for MCE decoding
  EDAC i7core: Use mce socketid for better compatibility
  i7core_edac: Don't enable memory scrubbing for Xeon 35xx
  i7core_edac: Add scrubbing support
  edac: Move edac main structs to include/linux/edac.h
  i7core_edac: Fix oops when trying to inject errors
  ...
This commit is contained in:
Linus Torvalds 2011-11-02 16:55:15 -07:00
Родитель 06ef93e1b8 4d096ca7e6
Коммит 6681ba7ec4
11 изменённых файлов: 2671 добавлений и 525 удалений

Просмотреть файл

@ -2467,8 +2467,6 @@ L: linux-edac@vger.kernel.org
W: bluesmoke.sourceforge.net W: bluesmoke.sourceforge.net
S: Maintained S: Maintained
F: drivers/edac/i7core_edac.c F: drivers/edac/i7core_edac.c
F: drivers/edac/edac_mce.c
F: include/linux/edac_mce.h
EDAC-I82975X EDAC-I82975X
M: Ranganathan Desikan <ravi@jetztechnologies.com> M: Ranganathan Desikan <ravi@jetztechnologies.com>
@ -2492,6 +2490,13 @@ W: bluesmoke.sourceforge.net
S: Maintained S: Maintained
F: drivers/edac/r82600_edac.c F: drivers/edac/r82600_edac.c
EDAC-SBRIDGE
M: Mauro Carvalho Chehab <mchehab@redhat.com>
L: linux-edac@vger.kernel.org
W: bluesmoke.sourceforge.net
S: Maintained
F: drivers/edac/sb_edac.c
EDIROL UA-101/UA-1000 DRIVER EDIROL UA-101/UA-1000 DRIVER
M: Clemens Ladisch <clemens@ladisch.de> M: Clemens Ladisch <clemens@ladisch.de>
L: alsa-devel@alsa-project.org (moderated for non-subscribers) L: alsa-devel@alsa-project.org (moderated for non-subscribers)

Просмотреть файл

@ -36,7 +36,6 @@
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/edac_mce.h>
#include <linux/irq_work.h> #include <linux/irq_work.h>
#include <asm/processor.h> #include <asm/processor.h>
@ -144,23 +143,20 @@ static struct mce_log mcelog = {
void mce_log(struct mce *mce) void mce_log(struct mce *mce)
{ {
unsigned next, entry; unsigned next, entry;
int ret = 0;
/* Emit the trace record: */ /* Emit the trace record: */
trace_mce_record(mce); trace_mce_record(mce);
ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
if (ret == NOTIFY_STOP)
return;
mce->finished = 0; mce->finished = 0;
wmb(); wmb();
for (;;) { for (;;) {
entry = rcu_dereference_check_mce(mcelog.next); entry = rcu_dereference_check_mce(mcelog.next);
for (;;) { for (;;) {
/*
* If edac_mce is enabled, it will check the error type
* and will process it, if it is a known error.
* Otherwise, the error will be sent through mcelog
* interface
*/
if (edac_mce_parse(mce))
return;
/* /*
* When the buffer fills up discard new entries. * When the buffer fills up discard new entries.
@ -556,10 +552,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* Don't get the IP here because it's unlikely to * Don't get the IP here because it's unlikely to
* have anything to do with the actual error location. * have anything to do with the actual error location.
*/ */
if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce)
mce_log(&m); mce_log(&m);
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m);
}
/* /*
* Clear state for this bank. * Clear state for this bank.

Просмотреть файл

@ -41,7 +41,7 @@ config EDAC_DEBUG
config EDAC_DECODE_MCE config EDAC_DECODE_MCE
tristate "Decode MCEs in human-readable form (only on AMD for now)" tristate "Decode MCEs in human-readable form (only on AMD for now)"
depends on CPU_SUP_AMD && X86_MCE depends on CPU_SUP_AMD && X86_MCE_AMD
default y default y
---help--- ---help---
Enable this option if you want to decode Machine Check Exceptions Enable this option if you want to decode Machine Check Exceptions
@ -71,9 +71,6 @@ config EDAC_MM_EDAC
occurred so that a particular failing memory module can be occurred so that a particular failing memory module can be
replaced. If unsure, select 'Y'. replaced. If unsure, select 'Y'.
config EDAC_MCE
bool
config EDAC_AMD64 config EDAC_AMD64
tristate "AMD64 (Opteron, Athlon64) K8, F10h" tristate "AMD64 (Opteron, Athlon64) K8, F10h"
depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE
@ -173,8 +170,7 @@ config EDAC_I5400
config EDAC_I7CORE config EDAC_I7CORE
tristate "Intel i7 Core (Nehalem) processors" tristate "Intel i7 Core (Nehalem) processors"
depends on EDAC_MM_EDAC && PCI && X86 depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL
select EDAC_MCE
help help
Support for error detection and correction the Intel Support for error detection and correction the Intel
i7 Core (Nehalem) Integrated Memory Controller that exists on i7 Core (Nehalem) Integrated Memory Controller that exists on
@ -216,6 +212,14 @@ config EDAC_I7300
Support for error detection and correction the Intel Support for error detection and correction the Intel
Clarksboro MCH (Intel 7300 chipset). Clarksboro MCH (Intel 7300 chipset).
config EDAC_SBRIDGE
tristate "Intel Sandy-Bridge Integrated MC"
depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL
depends on EXPERIMENTAL
help
Support for error detection and correction the Intel
Sandy Bridge Integrated Memory Controller.
config EDAC_MPC85XX config EDAC_MPC85XX
tristate "Freescale MPC83xx / MPC85xx" tristate "Freescale MPC83xx / MPC85xx"
depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx) depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx)

Просмотреть файл

@ -8,7 +8,6 @@
obj-$(CONFIG_EDAC) := edac_stub.o obj-$(CONFIG_EDAC) := edac_stub.o
obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o
obj-$(CONFIG_EDAC_MCE) += edac_mce.o
edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o
edac_core-y += edac_module.o edac_device_sysfs.o edac_core-y += edac_module.o edac_device_sysfs.o
@ -29,6 +28,7 @@ obj-$(CONFIG_EDAC_I5100) += i5100_edac.o
obj-$(CONFIG_EDAC_I5400) += i5400_edac.o obj-$(CONFIG_EDAC_I5400) += i5400_edac.o
obj-$(CONFIG_EDAC_I7300) += i7300_edac.o obj-$(CONFIG_EDAC_I7300) += i7300_edac.o
obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o
obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o
obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o
obj-$(CONFIG_EDAC_E752X) += e752x_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o
obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o

Просмотреть файл

@ -34,11 +34,10 @@
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <linux/sysdev.h> #include <linux/sysdev.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/edac.h>
#define EDAC_MC_LABEL_LEN 31
#define EDAC_DEVICE_NAME_LEN 31 #define EDAC_DEVICE_NAME_LEN 31
#define EDAC_ATTRIB_VALUE_LEN 15 #define EDAC_ATTRIB_VALUE_LEN 15
#define MC_PROC_NAME_MAX_LEN 7
#if PAGE_SHIFT < 20 #if PAGE_SHIFT < 20
#define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT)) #define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT))
@ -101,353 +100,6 @@ extern int edac_debug_level;
#define edac_dev_name(dev) (dev)->dev_name #define edac_dev_name(dev) (dev)->dev_name
/* memory devices */
enum dev_type {
DEV_UNKNOWN = 0,
DEV_X1,
DEV_X2,
DEV_X4,
DEV_X8,
DEV_X16,
DEV_X32, /* Do these parts exist? */
DEV_X64 /* Do these parts exist? */
};
#define DEV_FLAG_UNKNOWN BIT(DEV_UNKNOWN)
#define DEV_FLAG_X1 BIT(DEV_X1)
#define DEV_FLAG_X2 BIT(DEV_X2)
#define DEV_FLAG_X4 BIT(DEV_X4)
#define DEV_FLAG_X8 BIT(DEV_X8)
#define DEV_FLAG_X16 BIT(DEV_X16)
#define DEV_FLAG_X32 BIT(DEV_X32)
#define DEV_FLAG_X64 BIT(DEV_X64)
/* memory types */
enum mem_type {
MEM_EMPTY = 0, /* Empty csrow */
MEM_RESERVED, /* Reserved csrow type */
MEM_UNKNOWN, /* Unknown csrow type */
MEM_FPM, /* Fast page mode */
MEM_EDO, /* Extended data out */
MEM_BEDO, /* Burst Extended data out */
MEM_SDR, /* Single data rate SDRAM */
MEM_RDR, /* Registered single data rate SDRAM */
MEM_DDR, /* Double data rate SDRAM */
MEM_RDDR, /* Registered Double data rate SDRAM */
MEM_RMBS, /* Rambus DRAM */
MEM_DDR2, /* DDR2 RAM */
MEM_FB_DDR2, /* fully buffered DDR2 */
MEM_RDDR2, /* Registered DDR2 RAM */
MEM_XDR, /* Rambus XDR */
MEM_DDR3, /* DDR3 RAM */
MEM_RDDR3, /* Registered DDR3 RAM */
};
#define MEM_FLAG_EMPTY BIT(MEM_EMPTY)
#define MEM_FLAG_RESERVED BIT(MEM_RESERVED)
#define MEM_FLAG_UNKNOWN BIT(MEM_UNKNOWN)
#define MEM_FLAG_FPM BIT(MEM_FPM)
#define MEM_FLAG_EDO BIT(MEM_EDO)
#define MEM_FLAG_BEDO BIT(MEM_BEDO)
#define MEM_FLAG_SDR BIT(MEM_SDR)
#define MEM_FLAG_RDR BIT(MEM_RDR)
#define MEM_FLAG_DDR BIT(MEM_DDR)
#define MEM_FLAG_RDDR BIT(MEM_RDDR)
#define MEM_FLAG_RMBS BIT(MEM_RMBS)
#define MEM_FLAG_DDR2 BIT(MEM_DDR2)
#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2)
#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2)
#define MEM_FLAG_XDR BIT(MEM_XDR)
#define MEM_FLAG_DDR3 BIT(MEM_DDR3)
#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3)
/* chipset Error Detection and Correction capabilities and mode */
enum edac_type {
EDAC_UNKNOWN = 0, /* Unknown if ECC is available */
EDAC_NONE, /* Doesn't support ECC */
EDAC_RESERVED, /* Reserved ECC type */
EDAC_PARITY, /* Detects parity errors */
EDAC_EC, /* Error Checking - no correction */
EDAC_SECDED, /* Single bit error correction, Double detection */
EDAC_S2ECD2ED, /* Chipkill x2 devices - do these exist? */
EDAC_S4ECD4ED, /* Chipkill x4 devices */
EDAC_S8ECD8ED, /* Chipkill x8 devices */
EDAC_S16ECD16ED, /* Chipkill x16 devices */
};
#define EDAC_FLAG_UNKNOWN BIT(EDAC_UNKNOWN)
#define EDAC_FLAG_NONE BIT(EDAC_NONE)
#define EDAC_FLAG_PARITY BIT(EDAC_PARITY)
#define EDAC_FLAG_EC BIT(EDAC_EC)
#define EDAC_FLAG_SECDED BIT(EDAC_SECDED)
#define EDAC_FLAG_S2ECD2ED BIT(EDAC_S2ECD2ED)
#define EDAC_FLAG_S4ECD4ED BIT(EDAC_S4ECD4ED)
#define EDAC_FLAG_S8ECD8ED BIT(EDAC_S8ECD8ED)
#define EDAC_FLAG_S16ECD16ED BIT(EDAC_S16ECD16ED)
/* scrubbing capabilities */
enum scrub_type {
SCRUB_UNKNOWN = 0, /* Unknown if scrubber is available */
SCRUB_NONE, /* No scrubber */
SCRUB_SW_PROG, /* SW progressive (sequential) scrubbing */
SCRUB_SW_SRC, /* Software scrub only errors */
SCRUB_SW_PROG_SRC, /* Progressive software scrub from an error */
SCRUB_SW_TUNABLE, /* Software scrub frequency is tunable */
SCRUB_HW_PROG, /* HW progressive (sequential) scrubbing */
SCRUB_HW_SRC, /* Hardware scrub only errors */
SCRUB_HW_PROG_SRC, /* Progressive hardware scrub from an error */
SCRUB_HW_TUNABLE /* Hardware scrub frequency is tunable */
};
#define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG)
#define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC)
#define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC)
#define SCRUB_FLAG_SW_TUN BIT(SCRUB_SW_SCRUB_TUNABLE)
#define SCRUB_FLAG_HW_PROG BIT(SCRUB_HW_PROG)
#define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC)
#define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC)
#define SCRUB_FLAG_HW_TUN BIT(SCRUB_HW_TUNABLE)
/* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */
/* EDAC internal operation states */
#define OP_ALLOC 0x100
#define OP_RUNNING_POLL 0x201
#define OP_RUNNING_INTERRUPT 0x202
#define OP_RUNNING_POLL_INTR 0x203
#define OP_OFFLINE 0x300
/*
* There are several things to be aware of that aren't at all obvious:
*
*
* SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc..
*
* These are some of the many terms that are thrown about that don't always
* mean what people think they mean (Inconceivable!). In the interest of
* creating a common ground for discussion, terms and their definitions
* will be established.
*
* Memory devices: The individual chip on a memory stick. These devices
* commonly output 4 and 8 bits each. Grouping several
* of these in parallel provides 64 bits which is common
* for a memory stick.
*
* Memory Stick: A printed circuit board that aggregates multiple
* memory devices in parallel. This is the atomic
* memory component that is purchaseable by Joe consumer
* and loaded into a memory socket.
*
* Socket: A physical connector on the motherboard that accepts
* a single memory stick.
*
* Channel: Set of memory devices on a memory stick that must be
* grouped in parallel with one or more additional
* channels from other memory sticks. This parallel
* grouping of the output from multiple channels are
* necessary for the smallest granularity of memory access.
* Some memory controllers are capable of single channel -
* which means that memory sticks can be loaded
* individually. Other memory controllers are only
* capable of dual channel - which means that memory
* sticks must be loaded as pairs (see "socket set").
*
* Chip-select row: All of the memory devices that are selected together.
* for a single, minimum grain of memory access.
* This selects all of the parallel memory devices across
* all of the parallel channels. Common chip-select rows
* for single channel are 64 bits, for dual channel 128
* bits.
*
* Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory.
* Motherboards commonly drive two chip-select pins to
* a memory stick. A single-ranked stick, will occupy
* only one of those rows. The other will be unused.
*
* Double-Ranked stick: A double-ranked stick has two chip-select rows which
* access different sets of memory devices. The two
* rows cannot be accessed concurrently.
*
* Double-sided stick: DEPRECATED TERM, see Double-Ranked stick.
* A double-sided stick has two chip-select rows which
* access different sets of memory devices. The two
* rows cannot be accessed concurrently. "Double-sided"
* is irrespective of the memory devices being mounted
* on both sides of the memory stick.
*
* Socket set: All of the memory sticks that are required for
* a single memory access or all of the memory sticks
* spanned by a chip-select row. A single socket set
* has two chip-select rows and if double-sided sticks
* are used these will occupy those chip-select rows.
*
* Bank: This term is avoided because it is unclear when
* needing to distinguish between chip-select rows and
* socket sets.
*
* Controller pages:
*
* Physical pages:
*
* Virtual pages:
*
*
* STRUCTURE ORGANIZATION AND CHOICES
*
*
*
* PS - I enjoyed writing all that about as much as you enjoyed reading it.
*/
struct channel_info {
int chan_idx; /* channel index */
u32 ce_count; /* Correctable Errors for this CHANNEL */
char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
struct csrow_info *csrow; /* the parent */
};
struct csrow_info {
unsigned long first_page; /* first page number in dimm */
unsigned long last_page; /* last page number in dimm */
unsigned long page_mask; /* used for interleaving -
* 0UL for non intlv
*/
u32 nr_pages; /* number of pages in csrow */
u32 grain; /* granularity of reported error in bytes */
int csrow_idx; /* the chip-select row */
enum dev_type dtype; /* memory device type */
u32 ue_count; /* Uncorrectable Errors for this csrow */
u32 ce_count; /* Correctable Errors for this csrow */
enum mem_type mtype; /* memory csrow type */
enum edac_type edac_mode; /* EDAC mode for this csrow */
struct mem_ctl_info *mci; /* the parent */
struct kobject kobj; /* sysfs kobject for this csrow */
/* channel information for this csrow */
u32 nr_channels;
struct channel_info *channels;
};
struct mcidev_sysfs_group {
const char *name; /* group name */
const struct mcidev_sysfs_attribute *mcidev_attr; /* group attributes */
};
struct mcidev_sysfs_group_kobj {
struct list_head list; /* list for all instances within a mc */
struct kobject kobj; /* kobj for the group */
const struct mcidev_sysfs_group *grp; /* group description table */
struct mem_ctl_info *mci; /* the parent */
};
/* mcidev_sysfs_attribute structure
* used for driver sysfs attributes and in mem_ctl_info
* sysfs top level entries
*/
struct mcidev_sysfs_attribute {
/* It should use either attr or grp */
struct attribute attr;
const struct mcidev_sysfs_group *grp; /* Points to a group of attributes */
/* Ops for show/store values at the attribute - not used on group */
ssize_t (*show)(struct mem_ctl_info *,char *);
ssize_t (*store)(struct mem_ctl_info *, const char *,size_t);
};
/* MEMORY controller information structure
*/
struct mem_ctl_info {
struct list_head link; /* for global list of mem_ctl_info structs */
struct module *owner; /* Module owner of this control struct */
unsigned long mtype_cap; /* memory types supported by mc */
unsigned long edac_ctl_cap; /* Mem controller EDAC capabilities */
unsigned long edac_cap; /* configuration capabilities - this is
* closely related to edac_ctl_cap. The
* difference is that the controller may be
* capable of s4ecd4ed which would be listed
* in edac_ctl_cap, but if channels aren't
* capable of s4ecd4ed then the edac_cap would
* not have that capability.
*/
unsigned long scrub_cap; /* chipset scrub capabilities */
enum scrub_type scrub_mode; /* current scrub mode */
/* Translates sdram memory scrub rate given in bytes/sec to the
internal representation and configures whatever else needs
to be configured.
*/
int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw);
/* Get the current sdram memory scrub rate from the internal
representation and converts it to the closest matching
bandwidth in bytes/sec.
*/
int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci);
/* pointer to edac checking routine */
void (*edac_check) (struct mem_ctl_info * mci);
/*
* Remaps memory pages: controller pages to physical pages.
* For most MC's, this will be NULL.
*/
/* FIXME - why not send the phys page to begin with? */
unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
unsigned long page);
int mc_idx;
int nr_csrows;
struct csrow_info *csrows;
/*
* FIXME - what about controllers on other busses? - IDs must be
* unique. dev pointer should be sufficiently unique, but
* BUS:SLOT.FUNC numbers may not be unique.
*/
struct device *dev;
const char *mod_name;
const char *mod_ver;
const char *ctl_name;
const char *dev_name;
char proc_name[MC_PROC_NAME_MAX_LEN + 1];
void *pvt_info;
u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */
u32 ce_noinfo_count; /* Correctable Errors w/o info */
u32 ue_count; /* Total Uncorrectable Errors for this MC */
u32 ce_count; /* Total Correctable Errors for this MC */
unsigned long start_time; /* mci load start time (in jiffies) */
struct completion complete;
/* edac sysfs device control */
struct kobject edac_mci_kobj;
/* list for all grp instances within a mc */
struct list_head grp_kobj_list;
/* Additional top controller level attributes, but specified
* by the low level driver.
*
* Set by the low level driver to provide attributes at the
* controller level, same level as 'ue_count' and 'ce_count' above.
* An array of structures, NULL terminated
*
* If attributes are desired, then set to array of attributes
* If no attributes are desired, leave NULL
*/
const struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes;
/* work struct for this MC */
struct delayed_work work;
/* the internal state of this controller instance */
int op_state;
};
/* /*
* The following are the structures to provide for a generic * The following are the structures to provide for a generic
* or abstract 'edac_device'. This set of structures and the * or abstract 'edac_device'. This set of structures and the

Просмотреть файл

@ -1,61 +0,0 @@
/* Provides edac interface to mcelog events
*
* This file may be distributed under the terms of the
* GNU General Public License version 2.
*
* Copyright (c) 2009 by:
* Mauro Carvalho Chehab <mchehab@redhat.com>
*
* Red Hat Inc. http://www.redhat.com
*/
#include <linux/module.h>
#include <linux/edac_mce.h>
#include <asm/mce.h>
int edac_mce_enabled;
EXPORT_SYMBOL_GPL(edac_mce_enabled);
/*
* Extension interface
*/
static LIST_HEAD(edac_mce_list);
static DEFINE_MUTEX(edac_mce_lock);
int edac_mce_register(struct edac_mce *edac_mce)
{
mutex_lock(&edac_mce_lock);
list_add_tail(&edac_mce->list, &edac_mce_list);
mutex_unlock(&edac_mce_lock);
return 0;
}
EXPORT_SYMBOL(edac_mce_register);
void edac_mce_unregister(struct edac_mce *edac_mce)
{
mutex_lock(&edac_mce_lock);
list_del(&edac_mce->list);
mutex_unlock(&edac_mce_lock);
}
EXPORT_SYMBOL(edac_mce_unregister);
int edac_mce_parse(struct mce *mce)
{
struct edac_mce *edac_mce;
list_for_each_entry(edac_mce, &edac_mce_list, list) {
if (edac_mce->check_error(edac_mce->priv, mce))
return 1;
}
/* Nobody queued the error */
return 0;
}
EXPORT_SYMBOL_GPL(edac_mce_parse);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
MODULE_DESCRIPTION("EDAC Driver for mcelog captured errors");

Просмотреть файл

@ -372,7 +372,7 @@ static const char *get_err_from_table(const char *table[], int size, int pos)
static void i7300_process_error_global(struct mem_ctl_info *mci) static void i7300_process_error_global(struct mem_ctl_info *mci)
{ {
struct i7300_pvt *pvt; struct i7300_pvt *pvt;
u32 errnum, value; u32 errnum, error_reg;
unsigned long errors; unsigned long errors;
const char *specific; const char *specific;
bool is_fatal; bool is_fatal;
@ -381,9 +381,9 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
/* read in the 1st FATAL error register */ /* read in the 1st FATAL error register */
pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs, pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
FERR_GLOBAL_HI, &value); FERR_GLOBAL_HI, &error_reg);
if (unlikely(value)) { if (unlikely(error_reg)) {
errors = value; errors = error_reg;
errnum = find_first_bit(&errors, errnum = find_first_bit(&errors,
ARRAY_SIZE(ferr_global_hi_name)); ARRAY_SIZE(ferr_global_hi_name));
specific = GET_ERR_FROM_TABLE(ferr_global_hi_name, errnum); specific = GET_ERR_FROM_TABLE(ferr_global_hi_name, errnum);
@ -391,15 +391,15 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
/* Clear the error bit */ /* Clear the error bit */
pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs, pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
FERR_GLOBAL_HI, value); FERR_GLOBAL_HI, error_reg);
goto error_global; goto error_global;
} }
pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs, pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
FERR_GLOBAL_LO, &value); FERR_GLOBAL_LO, &error_reg);
if (unlikely(value)) { if (unlikely(error_reg)) {
errors = value; errors = error_reg;
errnum = find_first_bit(&errors, errnum = find_first_bit(&errors,
ARRAY_SIZE(ferr_global_lo_name)); ARRAY_SIZE(ferr_global_lo_name));
specific = GET_ERR_FROM_TABLE(ferr_global_lo_name, errnum); specific = GET_ERR_FROM_TABLE(ferr_global_lo_name, errnum);
@ -407,7 +407,7 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
/* Clear the error bit */ /* Clear the error bit */
pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs, pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
FERR_GLOBAL_LO, value); FERR_GLOBAL_LO, error_reg);
goto error_global; goto error_global;
} }
@ -427,7 +427,7 @@ error_global:
static void i7300_process_fbd_error(struct mem_ctl_info *mci) static void i7300_process_fbd_error(struct mem_ctl_info *mci)
{ {
struct i7300_pvt *pvt; struct i7300_pvt *pvt;
u32 errnum, value; u32 errnum, value, error_reg;
u16 val16; u16 val16;
unsigned branch, channel, bank, rank, cas, ras; unsigned branch, channel, bank, rank, cas, ras;
u32 syndrome; u32 syndrome;
@ -440,14 +440,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
/* read in the 1st FATAL error register */ /* read in the 1st FATAL error register */
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
FERR_FAT_FBD, &value); FERR_FAT_FBD, &error_reg);
if (unlikely(value & FERR_FAT_FBD_ERR_MASK)) { if (unlikely(error_reg & FERR_FAT_FBD_ERR_MASK)) {
errors = value & FERR_FAT_FBD_ERR_MASK ; errors = error_reg & FERR_FAT_FBD_ERR_MASK ;
errnum = find_first_bit(&errors, errnum = find_first_bit(&errors,
ARRAY_SIZE(ferr_fat_fbd_name)); ARRAY_SIZE(ferr_fat_fbd_name));
specific = GET_ERR_FROM_TABLE(ferr_fat_fbd_name, errnum); specific = GET_ERR_FROM_TABLE(ferr_fat_fbd_name, errnum);
branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0;
branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0;
pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
NRECMEMA, &val16); NRECMEMA, &val16);
bank = NRECMEMA_BANK(val16); bank = NRECMEMA_BANK(val16);
@ -455,11 +455,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
NRECMEMB, &value); NRECMEMB, &value);
is_wr = NRECMEMB_IS_WR(value); is_wr = NRECMEMB_IS_WR(value);
cas = NRECMEMB_CAS(value); cas = NRECMEMB_CAS(value);
ras = NRECMEMB_RAS(value); ras = NRECMEMB_RAS(value);
/* Clean the error register */
pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
FERR_FAT_FBD, error_reg);
snprintf(pvt->tmp_prt_buffer, PAGE_SIZE, snprintf(pvt->tmp_prt_buffer, PAGE_SIZE,
"FATAL (Branch=%d DRAM-Bank=%d %s " "FATAL (Branch=%d DRAM-Bank=%d %s "
"RAS=%d CAS=%d Err=0x%lx (%s))", "RAS=%d CAS=%d Err=0x%lx (%s))",
@ -476,21 +479,17 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
/* read in the 1st NON-FATAL error register */ /* read in the 1st NON-FATAL error register */
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
FERR_NF_FBD, &value); FERR_NF_FBD, &error_reg);
if (unlikely(value & FERR_NF_FBD_ERR_MASK)) { if (unlikely(error_reg & FERR_NF_FBD_ERR_MASK)) {
errors = value & FERR_NF_FBD_ERR_MASK; errors = error_reg & FERR_NF_FBD_ERR_MASK;
errnum = find_first_bit(&errors, errnum = find_first_bit(&errors,
ARRAY_SIZE(ferr_nf_fbd_name)); ARRAY_SIZE(ferr_nf_fbd_name));
specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum); specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum);
branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0;
/* Clear the error bit */
pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
FERR_GLOBAL_LO, value);
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
REDMEMA, &syndrome); REDMEMA, &syndrome);
branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0;
pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
RECMEMA, &val16); RECMEMA, &val16);
bank = RECMEMA_BANK(val16); bank = RECMEMA_BANK(val16);
@ -498,18 +497,20 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
RECMEMB, &value); RECMEMB, &value);
is_wr = RECMEMB_IS_WR(value); is_wr = RECMEMB_IS_WR(value);
cas = RECMEMB_CAS(value); cas = RECMEMB_CAS(value);
ras = RECMEMB_RAS(value); ras = RECMEMB_RAS(value);
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
REDMEMB, &value); REDMEMB, &value);
channel = (branch << 1); channel = (branch << 1);
if (IS_SECOND_CH(value)) if (IS_SECOND_CH(value))
channel++; channel++;
/* Clear the error bit */
pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
FERR_NF_FBD, error_reg);
/* Form out message */ /* Form out message */
snprintf(pvt->tmp_prt_buffer, PAGE_SIZE, snprintf(pvt->tmp_prt_buffer, PAGE_SIZE,
"Corrected error (Branch=%d, Channel %d), " "Corrected error (Branch=%d, Channel %d), "

Просмотреть файл

@ -31,11 +31,13 @@
#include <linux/pci_ids.h> #include <linux/pci_ids.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/dmi.h>
#include <linux/edac.h> #include <linux/edac.h>
#include <linux/mmzone.h> #include <linux/mmzone.h>
#include <linux/edac_mce.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <asm/mce.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/div64.h>
#include "edac_core.h" #include "edac_core.h"
@ -78,6 +80,8 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
/* OFFSETS for Device 0 Function 0 */ /* OFFSETS for Device 0 Function 0 */
#define MC_CFG_CONTROL 0x90 #define MC_CFG_CONTROL 0x90
#define MC_CFG_UNLOCK 0x02
#define MC_CFG_LOCK 0x00
/* OFFSETS for Device 3 Function 0 */ /* OFFSETS for Device 3 Function 0 */
@ -98,6 +102,15 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
#define DIMM0_COR_ERR(r) ((r) & 0x7fff) #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */ /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
#define MC_SSRCONTROL 0x48
#define SSR_MODE_DISABLE 0x00
#define SSR_MODE_ENABLE 0x01
#define SSR_MODE_MASK 0x03
#define MC_SCRUB_CONTROL 0x4c
#define STARTSCRUB (1 << 24)
#define SCRUBINTERVAL_MASK 0xffffff
#define MC_COR_ECC_CNT_0 0x80 #define MC_COR_ECC_CNT_0 0x80
#define MC_COR_ECC_CNT_1 0x84 #define MC_COR_ECC_CNT_1 0x84
#define MC_COR_ECC_CNT_2 0x88 #define MC_COR_ECC_CNT_2 0x88
@ -253,10 +266,7 @@ struct i7core_pvt {
unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS]; unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS]; int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
unsigned int is_registered; bool is_registered, enable_scrub;
/* mcelog glue */
struct edac_mce edac_mce;
/* Fifo double buffers */ /* Fifo double buffers */
struct mce mce_entry[MCE_LOG_LEN]; struct mce mce_entry[MCE_LOG_LEN];
@ -268,6 +278,9 @@ struct i7core_pvt {
/* Count indicator to show errors not got */ /* Count indicator to show errors not got */
unsigned mce_overrun; unsigned mce_overrun;
/* DCLK Frequency used for computing scrub rate */
int dclk_freq;
/* Struct to control EDAC polling */ /* Struct to control EDAC polling */
struct edac_pci_ctl_info *i7core_pci; struct edac_pci_ctl_info *i7core_pci;
}; };
@ -281,8 +294,7 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
/* Memory controller */ /* Memory controller */
{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) }, { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) }, { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
/* Exists only for RDIMM */
/* Exists only for RDIMM */
{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 }, { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) }, { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
@ -303,6 +315,16 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) }, { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) }, { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) }, { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
/* Generic Non-core registers */
/*
* This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
* On Xeon 55xx, however, it has a different id (8086:2c40). So,
* the probing code needs to test for the other address in case of
* failure of this one
*/
{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) },
}; };
static const struct pci_id_descr pci_dev_descr_lynnfield[] = { static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
@ -319,6 +341,12 @@ static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
{ PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) }, { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
{ PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) }, { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
{ PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) }, { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) },
/*
* This is the PCI device has an alternate address on some
* processors like Core i7 860
*/
{ PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE) },
}; };
static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = { static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
@ -346,6 +374,10 @@ static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) }, { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) }, { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2) }, { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2) },
/* Generic Non-core registers */
{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2) },
}; };
#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) } #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
@ -714,6 +746,10 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
csr->edac_mode = mode; csr->edac_mode = mode;
csr->mtype = mtype; csr->mtype = mtype;
snprintf(csr->channels[0].label,
sizeof(csr->channels[0].label),
"CPU#%uChannel#%u_DIMM#%u",
pvt->i7core_dev->socket, i, j);
csrow++; csrow++;
} }
@ -731,7 +767,7 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
debugf1("\t\t%#x\t%#x\t%#x\n", debugf1("\t\t%#x\t%#x\t%#x\n",
(value[j] >> 27) & 0x1, (value[j] >> 27) & 0x1,
(value[j] >> 24) & 0x7, (value[j] >> 24) & 0x7,
(value[j] && ((1 << 24) - 1))); (value[j] & ((1 << 24) - 1)));
} }
return 0; return 0;
@ -1324,6 +1360,20 @@ static int i7core_get_onedevice(struct pci_dev **prev,
pdev = pci_get_device(PCI_VENDOR_ID_INTEL, pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
dev_descr->dev_id, *prev); dev_descr->dev_id, *prev);
/*
* On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
* is at addr 8086:2c40, instead of 8086:2c41. So, we need
* to probe for the alternate address in case of failure
*/
if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
*prev);
if (!pdev) { if (!pdev) {
if (*prev) { if (*prev) {
*prev = pdev; *prev = pdev;
@ -1444,8 +1494,10 @@ static int mci_bind_devs(struct mem_ctl_info *mci,
struct i7core_pvt *pvt = mci->pvt_info; struct i7core_pvt *pvt = mci->pvt_info;
struct pci_dev *pdev; struct pci_dev *pdev;
int i, func, slot; int i, func, slot;
char *family;
pvt->is_registered = 0; pvt->is_registered = false;
pvt->enable_scrub = false;
for (i = 0; i < i7core_dev->n_devs; i++) { for (i = 0; i < i7core_dev->n_devs; i++) {
pdev = i7core_dev->pdev[i]; pdev = i7core_dev->pdev[i];
if (!pdev) if (!pdev)
@ -1461,9 +1513,37 @@ static int mci_bind_devs(struct mem_ctl_info *mci,
if (unlikely(func > MAX_CHAN_FUNC)) if (unlikely(func > MAX_CHAN_FUNC))
goto error; goto error;
pvt->pci_ch[slot - 4][func] = pdev; pvt->pci_ch[slot - 4][func] = pdev;
} else if (!slot && !func) } else if (!slot && !func) {
pvt->pci_noncore = pdev; pvt->pci_noncore = pdev;
else
/* Detect the processor family */
switch (pdev->device) {
case PCI_DEVICE_ID_INTEL_I7_NONCORE:
family = "Xeon 35xx/ i7core";
pvt->enable_scrub = false;
break;
case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
family = "i7-800/i5-700";
pvt->enable_scrub = false;
break;
case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
family = "Xeon 34xx";
pvt->enable_scrub = false;
break;
case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
family = "Xeon 55xx";
pvt->enable_scrub = true;
break;
case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
family = "Xeon 56xx / i7-900";
pvt->enable_scrub = true;
break;
default:
family = "unknown";
pvt->enable_scrub = false;
}
debugf0("Detected a processor type %s\n", family);
} else
goto error; goto error;
debugf0("Associated fn %d.%d, dev = %p, socket %d\n", debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
@ -1472,7 +1552,7 @@ static int mci_bind_devs(struct mem_ctl_info *mci,
if (PCI_SLOT(pdev->devfn) == 3 && if (PCI_SLOT(pdev->devfn) == 3 &&
PCI_FUNC(pdev->devfn) == 2) PCI_FUNC(pdev->devfn) == 2)
pvt->is_registered = 1; pvt->is_registered = true;
} }
return 0; return 0;
@ -1826,33 +1906,43 @@ check_ce_error:
* WARNING: As this routine should be called at NMI time, extra care should * WARNING: As this routine should be called at NMI time, extra care should
* be taken to avoid deadlocks, and to be as fast as possible. * be taken to avoid deadlocks, and to be as fast as possible.
*/ */
static int i7core_mce_check_error(void *priv, struct mce *mce) static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
void *data)
{ {
struct mem_ctl_info *mci = priv; struct mce *mce = (struct mce *)data;
struct i7core_pvt *pvt = mci->pvt_info; struct i7core_dev *i7_dev;
struct mem_ctl_info *mci;
struct i7core_pvt *pvt;
i7_dev = get_i7core_dev(mce->socketid);
if (!i7_dev)
return NOTIFY_BAD;
mci = i7_dev->mci;
pvt = mci->pvt_info;
/* /*
* Just let mcelog handle it if the error is * Just let mcelog handle it if the error is
* outside the memory controller * outside the memory controller
*/ */
if (((mce->status & 0xffff) >> 7) != 1) if (((mce->status & 0xffff) >> 7) != 1)
return 0; return NOTIFY_DONE;
/* Bank 8 registers are the only ones that we know how to handle */ /* Bank 8 registers are the only ones that we know how to handle */
if (mce->bank != 8) if (mce->bank != 8)
return 0; return NOTIFY_DONE;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* Only handle if it is the right mc controller */ /* Only handle if it is the right mc controller */
if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket) if (mce->socketid != pvt->i7core_dev->socket)
return 0; return NOTIFY_DONE;
#endif #endif
smp_rmb(); smp_rmb();
if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
smp_wmb(); smp_wmb();
pvt->mce_overrun++; pvt->mce_overrun++;
return 0; return NOTIFY_DONE;
} }
/* Copy memory error at the ringbuffer */ /* Copy memory error at the ringbuffer */
@ -1865,7 +1955,240 @@ static int i7core_mce_check_error(void *priv, struct mce *mce)
i7core_check_error(mci); i7core_check_error(mci);
/* Advise mcelog that the errors were handled */ /* Advise mcelog that the errors were handled */
return 1; return NOTIFY_STOP;
}
static struct notifier_block i7_mce_dec = {
.notifier_call = i7core_mce_check_error,
};
struct memdev_dmi_entry {
u8 type;
u8 length;
u16 handle;
u16 phys_mem_array_handle;
u16 mem_err_info_handle;
u16 total_width;
u16 data_width;
u16 size;
u8 form;
u8 device_set;
u8 device_locator;
u8 bank_locator;
u8 memory_type;
u16 type_detail;
u16 speed;
u8 manufacturer;
u8 serial_number;
u8 asset_tag;
u8 part_number;
u8 attributes;
u32 extended_size;
u16 conf_mem_clk_speed;
} __attribute__((__packed__));
/*
* Decode the DRAM Clock Frequency, be paranoid, make sure that all
* memory devices show the same speed, and if they don't then consider
* all speeds to be invalid.
*/
static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
{
int *dclk_freq = _dclk_freq;
u16 dmi_mem_clk_speed;
if (*dclk_freq == -1)
return;
if (dh->type == DMI_ENTRY_MEM_DEVICE) {
struct memdev_dmi_entry *memdev_dmi_entry =
(struct memdev_dmi_entry *)dh;
unsigned long conf_mem_clk_speed_offset =
(unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
(unsigned long)&memdev_dmi_entry->type;
unsigned long speed_offset =
(unsigned long)&memdev_dmi_entry->speed -
(unsigned long)&memdev_dmi_entry->type;
/* Check that a DIMM is present */
if (memdev_dmi_entry->size == 0)
return;
/*
* Pick the configured speed if it's available, otherwise
* pick the DIMM speed, or we don't have a speed.
*/
if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
dmi_mem_clk_speed =
memdev_dmi_entry->conf_mem_clk_speed;
} else if (memdev_dmi_entry->length > speed_offset) {
dmi_mem_clk_speed = memdev_dmi_entry->speed;
} else {
*dclk_freq = -1;
return;
}
if (*dclk_freq == 0) {
/* First pass, speed was 0 */
if (dmi_mem_clk_speed > 0) {
/* Set speed if a valid speed is read */
*dclk_freq = dmi_mem_clk_speed;
} else {
/* Otherwise we don't have a valid speed */
*dclk_freq = -1;
}
} else if (*dclk_freq > 0 &&
*dclk_freq != dmi_mem_clk_speed) {
/*
* If we have a speed, check that all DIMMS are the same
* speed, otherwise set the speed as invalid.
*/
*dclk_freq = -1;
}
}
}
/*
* The default DCLK frequency is used as a fallback if we
* fail to find anything reliable in the DMI. The value
* is taken straight from the datasheet.
*/
#define DEFAULT_DCLK_FREQ 800
static int get_dclk_freq(void)
{
int dclk_freq = 0;
dmi_walk(decode_dclk, (void *)&dclk_freq);
if (dclk_freq < 1)
return DEFAULT_DCLK_FREQ;
return dclk_freq;
}
/*
* set_sdram_scrub_rate This routine sets byte/sec bandwidth scrub rate
* to hardware according to SCRUBINTERVAL formula
* found in datasheet.
*/
static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
{
struct i7core_pvt *pvt = mci->pvt_info;
struct pci_dev *pdev;
u32 dw_scrub;
u32 dw_ssr;
/* Get data from the MC register, function 2 */
pdev = pvt->pci_mcr[2];
if (!pdev)
return -ENODEV;
pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
if (new_bw == 0) {
/* Prepare to disable petrol scrub */
dw_scrub &= ~STARTSCRUB;
/* Stop the patrol scrub engine */
write_and_test(pdev, MC_SCRUB_CONTROL,
dw_scrub & ~SCRUBINTERVAL_MASK);
/* Get current status of scrub rate and set bit to disable */
pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
dw_ssr &= ~SSR_MODE_MASK;
dw_ssr |= SSR_MODE_DISABLE;
} else {
const int cache_line_size = 64;
const u32 freq_dclk_mhz = pvt->dclk_freq;
unsigned long long scrub_interval;
/*
* Translate the desired scrub rate to a register value and
* program the corresponding register value.
*/
scrub_interval = (unsigned long long)freq_dclk_mhz *
cache_line_size * 1000000;
do_div(scrub_interval, new_bw);
if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
return -EINVAL;
dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
/* Start the patrol scrub engine */
pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
STARTSCRUB | dw_scrub);
/* Get current status of scrub rate and set bit to enable */
pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
dw_ssr &= ~SSR_MODE_MASK;
dw_ssr |= SSR_MODE_ENABLE;
}
/* Disable or enable scrubbing */
pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
return new_bw;
}
/*
* get_sdram_scrub_rate This routine convert current scrub rate value
* into byte/sec bandwidth accourding to
* SCRUBINTERVAL formula found in datasheet.
*/
static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
{
struct i7core_pvt *pvt = mci->pvt_info;
struct pci_dev *pdev;
const u32 cache_line_size = 64;
const u32 freq_dclk_mhz = pvt->dclk_freq;
unsigned long long scrub_rate;
u32 scrubval;
/* Get data from the MC register, function 2 */
pdev = pvt->pci_mcr[2];
if (!pdev)
return -ENODEV;
/* Get current scrub control data */
pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
/* Mask highest 8-bits to 0 */
scrubval &= SCRUBINTERVAL_MASK;
if (!scrubval)
return 0;
/* Calculate scrub rate value into byte/sec bandwidth */
scrub_rate = (unsigned long long)freq_dclk_mhz *
1000000 * cache_line_size;
do_div(scrub_rate, scrubval);
return (int)scrub_rate;
}
static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
{
struct i7core_pvt *pvt = mci->pvt_info;
u32 pci_lock;
/* Unlock writes to pci registers */
pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
pci_lock &= ~0x3;
pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
pci_lock | MC_CFG_UNLOCK);
mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
}
static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
{
struct i7core_pvt *pvt = mci->pvt_info;
u32 pci_lock;
/* Lock writes to pci registers */
pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
pci_lock &= ~0x3;
pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
pci_lock | MC_CFG_LOCK);
} }
static void i7core_pci_ctl_create(struct i7core_pvt *pvt) static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
@ -1874,7 +2197,8 @@ static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
&pvt->i7core_dev->pdev[0]->dev, &pvt->i7core_dev->pdev[0]->dev,
EDAC_MOD_STR); EDAC_MOD_STR);
if (unlikely(!pvt->i7core_pci)) if (unlikely(!pvt->i7core_pci))
pr_warn("Unable to setup PCI error report via EDAC\n"); i7core_printk(KERN_WARNING,
"Unable to setup PCI error report via EDAC\n");
} }
static void i7core_pci_ctl_release(struct i7core_pvt *pvt) static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
@ -1906,8 +2230,11 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
__func__, mci, &i7core_dev->pdev[0]->dev); __func__, mci, &i7core_dev->pdev[0]->dev);
/* Disable MCE NMI handler */ /* Disable scrubrate setting */
edac_mce_unregister(&pvt->edac_mce); if (pvt->enable_scrub)
disable_sdram_scrub_setting(mci);
atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &i7_mce_dec);
/* Disable EDAC polling */ /* Disable EDAC polling */
i7core_pci_ctl_release(pvt); i7core_pci_ctl_release(pvt);
@ -1979,6 +2306,10 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
/* Set the function pointer to an actual operation function */ /* Set the function pointer to an actual operation function */
mci->edac_check = i7core_check_error; mci->edac_check = i7core_check_error;
/* Enable scrubrate setting */
if (pvt->enable_scrub)
enable_sdram_scrub_setting(mci);
/* add this new MC control structure to EDAC's list of MCs */ /* add this new MC control structure to EDAC's list of MCs */
if (unlikely(edac_mc_add_mc(mci))) { if (unlikely(edac_mc_add_mc(mci))) {
debugf0("MC: " __FILE__ debugf0("MC: " __FILE__
@ -2002,21 +2333,13 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
/* allocating generic PCI control info */ /* allocating generic PCI control info */
i7core_pci_ctl_create(pvt); i7core_pci_ctl_create(pvt);
/* Registers on edac_mce in order to receive memory errors */ /* DCLK for scrub rate setting */
pvt->edac_mce.priv = mci; pvt->dclk_freq = get_dclk_freq();
pvt->edac_mce.check_error = i7core_mce_check_error;
rc = edac_mce_register(&pvt->edac_mce); atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec);
if (unlikely(rc < 0)) {
debugf0("MC: " __FILE__
": %s(): failed edac_mce_register()\n", __func__);
goto fail1;
}
return 0; return 0;
fail1:
i7core_pci_ctl_release(pvt);
edac_mc_del_mc(mci->dev);
fail0: fail0:
kfree(mci->ctl_name); kfree(mci->ctl_name);
edac_mc_free(mci); edac_mc_free(mci);
@ -2035,7 +2358,7 @@ fail0:
static int __devinit i7core_probe(struct pci_dev *pdev, static int __devinit i7core_probe(struct pci_dev *pdev,
const struct pci_device_id *id) const struct pci_device_id *id)
{ {
int rc; int rc, count = 0;
struct i7core_dev *i7core_dev; struct i7core_dev *i7core_dev;
/* get the pci devices we want to reserve for our use */ /* get the pci devices we want to reserve for our use */
@ -2055,12 +2378,28 @@ static int __devinit i7core_probe(struct pci_dev *pdev,
goto fail0; goto fail0;
list_for_each_entry(i7core_dev, &i7core_edac_list, list) { list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
count++;
rc = i7core_register_mci(i7core_dev); rc = i7core_register_mci(i7core_dev);
if (unlikely(rc < 0)) if (unlikely(rc < 0))
goto fail1; goto fail1;
} }
i7core_printk(KERN_INFO, "Driver loaded.\n"); /*
* Nehalem-EX uses a different memory controller. However, as the
* memory controller is not visible on some Nehalem/Nehalem-EP, we
* need to indirectly probe via a X58 PCI device. The same devices
* are found on (some) Nehalem-EX. So, on those machines, the
* probe routine needs to return -ENODEV, as the actual Memory
* Controller registers won't be detected.
*/
if (!count) {
rc = -ENODEV;
goto fail1;
}
i7core_printk(KERN_INFO,
"Driver loaded, %d memory controller(s) found.\n",
count);
mutex_unlock(&i7core_edac_lock); mutex_unlock(&i7core_edac_lock);
return 0; return 0;

1893
drivers/edac/sb_edac.c Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -42,4 +42,354 @@ static inline void opstate_init(void)
return; return;
} }
#define EDAC_MC_LABEL_LEN 31
#define MC_PROC_NAME_MAX_LEN 7
/* memory devices */
enum dev_type {
DEV_UNKNOWN = 0,
DEV_X1,
DEV_X2,
DEV_X4,
DEV_X8,
DEV_X16,
DEV_X32, /* Do these parts exist? */
DEV_X64 /* Do these parts exist? */
};
#define DEV_FLAG_UNKNOWN BIT(DEV_UNKNOWN)
#define DEV_FLAG_X1 BIT(DEV_X1)
#define DEV_FLAG_X2 BIT(DEV_X2)
#define DEV_FLAG_X4 BIT(DEV_X4)
#define DEV_FLAG_X8 BIT(DEV_X8)
#define DEV_FLAG_X16 BIT(DEV_X16)
#define DEV_FLAG_X32 BIT(DEV_X32)
#define DEV_FLAG_X64 BIT(DEV_X64)
/* memory types */
enum mem_type {
MEM_EMPTY = 0, /* Empty csrow */
MEM_RESERVED, /* Reserved csrow type */
MEM_UNKNOWN, /* Unknown csrow type */
MEM_FPM, /* Fast page mode */
MEM_EDO, /* Extended data out */
MEM_BEDO, /* Burst Extended data out */
MEM_SDR, /* Single data rate SDRAM */
MEM_RDR, /* Registered single data rate SDRAM */
MEM_DDR, /* Double data rate SDRAM */
MEM_RDDR, /* Registered Double data rate SDRAM */
MEM_RMBS, /* Rambus DRAM */
MEM_DDR2, /* DDR2 RAM */
MEM_FB_DDR2, /* fully buffered DDR2 */
MEM_RDDR2, /* Registered DDR2 RAM */
MEM_XDR, /* Rambus XDR */
MEM_DDR3, /* DDR3 RAM */
MEM_RDDR3, /* Registered DDR3 RAM */
};
#define MEM_FLAG_EMPTY BIT(MEM_EMPTY)
#define MEM_FLAG_RESERVED BIT(MEM_RESERVED)
#define MEM_FLAG_UNKNOWN BIT(MEM_UNKNOWN)
#define MEM_FLAG_FPM BIT(MEM_FPM)
#define MEM_FLAG_EDO BIT(MEM_EDO)
#define MEM_FLAG_BEDO BIT(MEM_BEDO)
#define MEM_FLAG_SDR BIT(MEM_SDR)
#define MEM_FLAG_RDR BIT(MEM_RDR)
#define MEM_FLAG_DDR BIT(MEM_DDR)
#define MEM_FLAG_RDDR BIT(MEM_RDDR)
#define MEM_FLAG_RMBS BIT(MEM_RMBS)
#define MEM_FLAG_DDR2 BIT(MEM_DDR2)
#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2)
#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2)
#define MEM_FLAG_XDR BIT(MEM_XDR)
#define MEM_FLAG_DDR3 BIT(MEM_DDR3)
#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3)
/* chipset Error Detection and Correction capabilities and mode */
enum edac_type {
EDAC_UNKNOWN = 0, /* Unknown if ECC is available */
EDAC_NONE, /* Doesn't support ECC */
EDAC_RESERVED, /* Reserved ECC type */
EDAC_PARITY, /* Detects parity errors */
EDAC_EC, /* Error Checking - no correction */
EDAC_SECDED, /* Single bit error correction, Double detection */
EDAC_S2ECD2ED, /* Chipkill x2 devices - do these exist? */
EDAC_S4ECD4ED, /* Chipkill x4 devices */
EDAC_S8ECD8ED, /* Chipkill x8 devices */
EDAC_S16ECD16ED, /* Chipkill x16 devices */
};
#define EDAC_FLAG_UNKNOWN BIT(EDAC_UNKNOWN)
#define EDAC_FLAG_NONE BIT(EDAC_NONE)
#define EDAC_FLAG_PARITY BIT(EDAC_PARITY)
#define EDAC_FLAG_EC BIT(EDAC_EC)
#define EDAC_FLAG_SECDED BIT(EDAC_SECDED)
#define EDAC_FLAG_S2ECD2ED BIT(EDAC_S2ECD2ED)
#define EDAC_FLAG_S4ECD4ED BIT(EDAC_S4ECD4ED)
#define EDAC_FLAG_S8ECD8ED BIT(EDAC_S8ECD8ED)
#define EDAC_FLAG_S16ECD16ED BIT(EDAC_S16ECD16ED)
/* scrubbing capabilities */
enum scrub_type {
SCRUB_UNKNOWN = 0, /* Unknown if scrubber is available */
SCRUB_NONE, /* No scrubber */
SCRUB_SW_PROG, /* SW progressive (sequential) scrubbing */
SCRUB_SW_SRC, /* Software scrub only errors */
SCRUB_SW_PROG_SRC, /* Progressive software scrub from an error */
SCRUB_SW_TUNABLE, /* Software scrub frequency is tunable */
SCRUB_HW_PROG, /* HW progressive (sequential) scrubbing */
SCRUB_HW_SRC, /* Hardware scrub only errors */
SCRUB_HW_PROG_SRC, /* Progressive hardware scrub from an error */
SCRUB_HW_TUNABLE /* Hardware scrub frequency is tunable */
};
#define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG)
#define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC)
#define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC)
#define SCRUB_FLAG_SW_TUN BIT(SCRUB_SW_SCRUB_TUNABLE)
#define SCRUB_FLAG_HW_PROG BIT(SCRUB_HW_PROG)
#define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC)
#define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC)
#define SCRUB_FLAG_HW_TUN BIT(SCRUB_HW_TUNABLE)
/* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */
/* EDAC internal operation states */
#define OP_ALLOC 0x100
#define OP_RUNNING_POLL 0x201
#define OP_RUNNING_INTERRUPT 0x202
#define OP_RUNNING_POLL_INTR 0x203
#define OP_OFFLINE 0x300
/*
* There are several things to be aware of that aren't at all obvious:
*
*
* SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc..
*
* These are some of the many terms that are thrown about that don't always
* mean what people think they mean (Inconceivable!). In the interest of
* creating a common ground for discussion, terms and their definitions
* will be established.
*
* Memory devices: The individual chip on a memory stick. These devices
* commonly output 4 and 8 bits each. Grouping several
* of these in parallel provides 64 bits which is common
* for a memory stick.
*
* Memory Stick: A printed circuit board that aggregates multiple
* memory devices in parallel. This is the atomic
* memory component that is purchaseable by Joe consumer
* and loaded into a memory socket.
*
* Socket: A physical connector on the motherboard that accepts
* a single memory stick.
*
* Channel: Set of memory devices on a memory stick that must be
* grouped in parallel with one or more additional
* channels from other memory sticks. This parallel
* grouping of the output from multiple channels are
* necessary for the smallest granularity of memory access.
* Some memory controllers are capable of single channel -
* which means that memory sticks can be loaded
* individually. Other memory controllers are only
* capable of dual channel - which means that memory
* sticks must be loaded as pairs (see "socket set").
*
* Chip-select row: All of the memory devices that are selected together.
* for a single, minimum grain of memory access.
* This selects all of the parallel memory devices across
* all of the parallel channels. Common chip-select rows
* for single channel are 64 bits, for dual channel 128
* bits.
*
* Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory.
* Motherboards commonly drive two chip-select pins to
* a memory stick. A single-ranked stick, will occupy
* only one of those rows. The other will be unused.
*
* Double-Ranked stick: A double-ranked stick has two chip-select rows which
* access different sets of memory devices. The two
* rows cannot be accessed concurrently.
*
* Double-sided stick: DEPRECATED TERM, see Double-Ranked stick.
* A double-sided stick has two chip-select rows which
* access different sets of memory devices. The two
* rows cannot be accessed concurrently. "Double-sided"
* is irrespective of the memory devices being mounted
* on both sides of the memory stick.
*
* Socket set: All of the memory sticks that are required for
* a single memory access or all of the memory sticks
* spanned by a chip-select row. A single socket set
* has two chip-select rows and if double-sided sticks
* are used these will occupy those chip-select rows.
*
* Bank: This term is avoided because it is unclear when
* needing to distinguish between chip-select rows and
* socket sets.
*
* Controller pages:
*
* Physical pages:
*
* Virtual pages:
*
*
* STRUCTURE ORGANIZATION AND CHOICES
*
*
*
* PS - I enjoyed writing all that about as much as you enjoyed reading it.
*/
struct channel_info {
int chan_idx; /* channel index */
u32 ce_count; /* Correctable Errors for this CHANNEL */
char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
struct csrow_info *csrow; /* the parent */
};
struct csrow_info {
unsigned long first_page; /* first page number in dimm */
unsigned long last_page; /* last page number in dimm */
unsigned long page_mask; /* used for interleaving -
* 0UL for non intlv
*/
u32 nr_pages; /* number of pages in csrow */
u32 grain; /* granularity of reported error in bytes */
int csrow_idx; /* the chip-select row */
enum dev_type dtype; /* memory device type */
u32 ue_count; /* Uncorrectable Errors for this csrow */
u32 ce_count; /* Correctable Errors for this csrow */
enum mem_type mtype; /* memory csrow type */
enum edac_type edac_mode; /* EDAC mode for this csrow */
struct mem_ctl_info *mci; /* the parent */
struct kobject kobj; /* sysfs kobject for this csrow */
/* channel information for this csrow */
u32 nr_channels;
struct channel_info *channels;
};
struct mcidev_sysfs_group {
const char *name; /* group name */
const struct mcidev_sysfs_attribute *mcidev_attr; /* group attributes */
};
struct mcidev_sysfs_group_kobj {
struct list_head list; /* list for all instances within a mc */
struct kobject kobj; /* kobj for the group */
const struct mcidev_sysfs_group *grp; /* group description table */
struct mem_ctl_info *mci; /* the parent */
};
/* mcidev_sysfs_attribute structure
* used for driver sysfs attributes and in mem_ctl_info
* sysfs top level entries
*/
struct mcidev_sysfs_attribute {
/* It should use either attr or grp */
struct attribute attr;
const struct mcidev_sysfs_group *grp; /* Points to a group of attributes */
/* Ops for show/store values at the attribute - not used on group */
ssize_t (*show)(struct mem_ctl_info *,char *);
ssize_t (*store)(struct mem_ctl_info *, const char *,size_t);
};
/* MEMORY controller information structure
*/
struct mem_ctl_info {
struct list_head link; /* for global list of mem_ctl_info structs */
struct module *owner; /* Module owner of this control struct */
unsigned long mtype_cap; /* memory types supported by mc */
unsigned long edac_ctl_cap; /* Mem controller EDAC capabilities */
unsigned long edac_cap; /* configuration capabilities - this is
* closely related to edac_ctl_cap. The
* difference is that the controller may be
* capable of s4ecd4ed which would be listed
* in edac_ctl_cap, but if channels aren't
* capable of s4ecd4ed then the edac_cap would
* not have that capability.
*/
unsigned long scrub_cap; /* chipset scrub capabilities */
enum scrub_type scrub_mode; /* current scrub mode */
/* Translates sdram memory scrub rate given in bytes/sec to the
internal representation and configures whatever else needs
to be configured.
*/
int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw);
/* Get the current sdram memory scrub rate from the internal
representation and converts it to the closest matching
bandwidth in bytes/sec.
*/
int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci);
/* pointer to edac checking routine */
void (*edac_check) (struct mem_ctl_info * mci);
/*
* Remaps memory pages: controller pages to physical pages.
* For most MC's, this will be NULL.
*/
/* FIXME - why not send the phys page to begin with? */
unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
unsigned long page);
int mc_idx;
int nr_csrows;
struct csrow_info *csrows;
/*
* FIXME - what about controllers on other busses? - IDs must be
* unique. dev pointer should be sufficiently unique, but
* BUS:SLOT.FUNC numbers may not be unique.
*/
struct device *dev;
const char *mod_name;
const char *mod_ver;
const char *ctl_name;
const char *dev_name;
char proc_name[MC_PROC_NAME_MAX_LEN + 1];
void *pvt_info;
u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */
u32 ce_noinfo_count; /* Correctable Errors w/o info */
u32 ue_count; /* Total Uncorrectable Errors for this MC */
u32 ce_count; /* Total Correctable Errors for this MC */
unsigned long start_time; /* mci load start time (in jiffies) */
struct completion complete;
/* edac sysfs device control */
struct kobject edac_mci_kobj;
/* list for all grp instances within a mc */
struct list_head grp_kobj_list;
/* Additional top controller level attributes, but specified
* by the low level driver.
*
* Set by the low level driver to provide attributes at the
* controller level, same level as 'ue_count' and 'ce_count' above.
* An array of structures, NULL terminated
*
* If attributes are desired, then set to array of attributes
* If no attributes are desired, leave NULL
*/
const struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes;
/* work struct for this MC */
struct delayed_work work;
/* the internal state of this controller instance */
int op_state;
};
#endif #endif

Просмотреть файл

@ -1,31 +0,0 @@
/* Provides edac interface to mcelog events
*
* This file may be distributed under the terms of the
* GNU General Public License version 2.
*
* Copyright (c) 2009 by:
* Mauro Carvalho Chehab <mchehab@redhat.com>
*
* Red Hat Inc. http://www.redhat.com
*/
#if defined(CONFIG_EDAC_MCE) || \
(defined(CONFIG_EDAC_MCE_MODULE) && defined(MODULE))
#include <asm/mce.h>
#include <linux/list.h>
struct edac_mce {
struct list_head list;
void *priv;
int (*check_error)(void *priv, struct mce *mce);
};
int edac_mce_register(struct edac_mce *edac_mce);
void edac_mce_unregister(struct edac_mce *edac_mce);
int edac_mce_parse(struct mce *mce);
#else
#define edac_mce_parse(mce) (0)
#endif