path: root/include
diff options
authorMauro Carvalho Chehab <mchehab@redhat.com>2012-04-18 15:20:50 -0300
committerMauro Carvalho Chehab <mchehab@redhat.com>2012-05-28 19:10:59 -0300
commit4275be63559719c3149b19751029f1b0f1b26775 (patch)
treed215a184f4278d7bc9095f18eb4c748149e241f3 /include
parent982216a4290543fe73ae4f0a156f3d7906bd9b73 (diff)
edac: Change internal representation to work with layers
Change the EDAC internal representation to work with non-csrow based memory controllers. There are lots of those memory controllers nowadays, and more are coming. So, the EDAC internal representation needs to be changed, in order to work with those memory controllers, while preserving backward compatibility with the old ones. The edac core was written with the idea that memory controllers are able to directly access csrows. This is not true for FB-DIMM and RAMBUS memory controllers. Also, some recent advanced memory controllers don't present a per-csrows view. Instead, they view memories as DIMMs, instead of ranks. So, change the allocation and error report routines to allow them to work with all types of architectures. This will allow the removal of several hacks with FB-DIMM and RAMBUS memory controllers. Also, several tests were done on different platforms using different x86 drivers. TODO: a multi-rank DIMMs are currently represented by multiple DIMM entries in struct dimm_info. That means that changing a label for one rank won't change the same label for the other ranks at the same DIMM. This bug is present since the beginning of the EDAC, so it is not a big deal. However, on several drivers, it is possible to fix this issue, but it should be a per-driver fix, as the csrow => DIMM arrangement may not be equal for all. So, don't try to fix it here yet. I tried to make this patch as short as possible, preceding it with several other patches that simplified the logic here. Yet, as the internal API changes, all drivers need changes. The changes are generally bigger in the drivers for FB-DIMMs. Cc: Aristeu Rozanski <arozansk@redhat.com> Cc: Doug Thompson <norsk5@yahoo.com> Cc: Borislav Petkov <borislav.petkov@amd.com> Cc: Mark Gross <mark.gross@intel.com> Cc: Jason Uhlenkott <juhlenko@akamai.com> Cc: Tim Small <tim@buttersideup.com> Cc: Ranganathan Desikan <ravi@jetztechnologies.com> Cc: "Arvind R." <arvino55@gmail.com> Cc: Olof Johansson <olof@lixom.net> Cc: Egor Martovetsky <egor@pasemi.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Michal Marek <mmarek@suse.cz> Cc: Jiri Kosina <jkosina@suse.cz> Cc: Joe Perches <joe@perches.com> Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Hitoshi Mitake <h.mitake@gmail.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: "Niklas Söderlund" <niklas.soderlund@ericsson.com> Cc: Shaohui Xie <Shaohui.Xie@freescale.com> Cc: Josh Boyer <jwboyer@gmail.com> Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Diffstat (limited to 'include')
1 files changed, 25 insertions, 13 deletions
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 9e628434e164..d68b01cad068 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -416,18 +416,20 @@ struct edac_mc_layer {
/* FIXME: add the proper per-location error counts */
struct dimm_info {
char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
- unsigned memory_controller;
- unsigned csrow;
- unsigned csrow_channel;
+ /* Memory location data */
+ unsigned location[EDAC_MAX_LAYERS];
+ struct mem_ctl_info *mci; /* the parent */
u32 grain; /* granularity of reported error in bytes */
enum dev_type dtype; /* memory device type */
enum mem_type mtype; /* memory dimm type */
enum edac_type edac_mode; /* EDAC mode for this dimm */
- u32 nr_pages; /* number of pages in csrow */
+ u32 nr_pages; /* number of pages on this dimm */
- u32 ce_count; /* Correctable Errors for this dimm */
+ unsigned csrow, cschannel; /* Points to the old API data */
@@ -447,9 +449,10 @@ struct dimm_info {
struct rank_info {
int chan_idx;
- u32 ce_count;
struct csrow_info *csrow;
struct dimm_info *dimm;
+ u32 ce_count; /* Correctable Errors for this csrow */
struct csrow_info {
@@ -545,13 +548,18 @@ struct mem_ctl_info {
unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
unsigned long page);
int mc_idx;
- int nr_csrows;
struct csrow_info *csrows;
+ unsigned nr_csrows, num_cschannel;
+ /* Memory Controller hierarchy */
+ unsigned n_layers;
+ struct edac_mc_layer *layers;
+ bool mem_is_per_rank;
* DIMM info. Will eventually remove the entire csrows_info some day
- unsigned nr_dimms;
+ unsigned tot_dimms;
struct dimm_info *dimms;
@@ -566,12 +574,16 @@ struct mem_ctl_info {
const char *dev_name;
char proc_name[MC_PROC_NAME_MAX_LEN + 1];
void *pvt_info;
- u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */
- u32 ce_noinfo_count; /* Correctable Errors w/o info */
- u32 ue_count; /* Total Uncorrectable Errors for this MC */
- u32 ce_count; /* Total Correctable Errors for this MC */
unsigned long start_time; /* mci load start time (in jiffies) */
+ /*
+ * drivers shouldn't access those fields directly, as the core
+ * already handles that.
+ */
+ u32 ce_noinfo_count, ue_noinfo_count;
+ u32 ue_count, ce_count;
+ u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
struct completion complete;
/* edac sysfs device control */
@@ -584,7 +596,7 @@ struct mem_ctl_info {
* by the low level driver.
* Set by the low level driver to provide attributes at the
- * controller level, same level as 'ue_count' and 'ce_count' above.
+ * controller level.
* An array of structures, NULL terminated
* If attributes are desired, then set to array of attributes