summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 14:24:40 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 14:24:40 -0700
commitf0f3680e50352c57b6cfc5b0d44d63bb0aa20f80 (patch)
tree2005ec90f9d90f25ceeba147dfe09db8c8036fa6
parent61e5191c9d96268746bd57ed55d035678a1a2cf9 (diff)
parenta4b4be3fd7a76021f67380b03d8bccebf067db72 (diff)
downloadblackbird-op-linux-f0f3680e50352c57b6cfc5b0d44d63bb0aa20f80.tar.gz
blackbird-op-linux-f0f3680e50352c57b6cfc5b0d44d63bb0aa20f80.zip
Merge branch 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac
Pull EDAC fixes from Mauro Carvalho Chehab: "A series of EDAC driver fixes. It also has one core fix at the documentation, and a rename patch, fixing the name of the struct that contains the rank information." * 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac: edac: rename channel_info to rank_info i5400_edac: Avoid calling pci_put_device() twice edac: i5100 ack error detection register after each read edac: i5100 fix erroneous define for M1Err edac: sb_edac: Fix a wrong value setting for the previous value edac: sb_edac: Fix a INTERLEAVE_MODE() misuse edac: sb_edac: Let the driver depend on PCI_MMCONFIG edac: Improve the comments to better describe the memory concepts edac/ppc4xx_edac: Fix compilation Fix sb_edac compilation with 32 bits kernels
-rw-r--r--drivers/edac/Kconfig2
-rw-r--r--drivers/edac/edac_mc.c6
-rw-r--r--drivers/edac/i5100_edac.c13
-rw-r--r--drivers/edac/i5400_edac.c54
-rw-r--r--drivers/edac/ppc4xx_edac.c4
-rw-r--r--drivers/edac/sb_edac.c52
-rw-r--r--include/linux/edac.h179
7 files changed, 208 insertions, 102 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 5948a2194f50..fdffa1beca17 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -215,7 +215,7 @@ config EDAC_I7300
config EDAC_SBRIDGE
tristate "Intel Sandy-Bridge Integrated MC"
depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
- depends on EXPERIMENTAL
+ depends on PCI_MMCONFIG && EXPERIMENTAL
help
Support for error detection and correction the Intel
Sandy Bridge Integrated Memory Controller.
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index da09cd74bc5b..feef7733fae7 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -39,7 +39,7 @@ static LIST_HEAD(mc_devices);
#ifdef CONFIG_EDAC_DEBUG
-static void edac_mc_dump_channel(struct channel_info *chan)
+static void edac_mc_dump_channel(struct rank_info *chan)
{
debugf4("\tchannel = %p\n", chan);
debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
@@ -156,7 +156,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
{
struct mem_ctl_info *mci;
struct csrow_info *csi, *csrow;
- struct channel_info *chi, *chp, *chan;
+ struct rank_info *chi, *chp, *chan;
void *pvt;
unsigned size;
int row, chn;
@@ -181,7 +181,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
* rather than an imaginary chunk of memory located at address 0.
*/
csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
- chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
+ chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
/* setup index and various internal pointers */
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index 2e23547b2f24..d500749464ea 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -49,7 +49,7 @@
#define I5100_FERR_NF_MEM_M6ERR_MASK (1 << 6)
#define I5100_FERR_NF_MEM_M5ERR_MASK (1 << 5)
#define I5100_FERR_NF_MEM_M4ERR_MASK (1 << 4)
-#define I5100_FERR_NF_MEM_M1ERR_MASK 1
+#define I5100_FERR_NF_MEM_M1ERR_MASK (1 << 1)
#define I5100_FERR_NF_MEM_ANY_MASK \
(I5100_FERR_NF_MEM_M16ERR_MASK | \
I5100_FERR_NF_MEM_M15ERR_MASK | \
@@ -535,23 +535,20 @@ static void i5100_read_log(struct mem_ctl_info *mci, int chan,
static void i5100_check_error(struct mem_ctl_info *mci)
{
struct i5100_priv *priv = mci->pvt_info;
- u32 dw;
-
+ u32 dw, dw2;
pci_read_config_dword(priv->mc, I5100_FERR_NF_MEM, &dw);
if (i5100_ferr_nf_mem_any(dw)) {
- u32 dw2;
pci_read_config_dword(priv->mc, I5100_NERR_NF_MEM, &dw2);
- if (dw2)
- pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM,
- dw2);
- pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw);
i5100_read_log(mci, i5100_ferr_nf_mem_chan_indx(dw),
i5100_ferr_nf_mem_any(dw),
i5100_nerr_nf_mem_any(dw2));
+
+ pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM, dw2);
}
+ pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw);
}
/* The i5100 chipset will scrub the entire memory once, then
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index 67ec9626a330..1869a1018fb5 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -735,7 +735,7 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx)
/* Attempt to 'get' the MCH register we want */
pdev = NULL;
- while (!pvt->branchmap_werrors || !pvt->fsb_error_regs) {
+ while (1) {
pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_5400_ERR, pdev);
if (!pdev) {
@@ -743,23 +743,42 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx)
i5400_printk(KERN_ERR,
"'system address,Process Bus' "
"device not found:"
- "vendor 0x%x device 0x%x ERR funcs "
+ "vendor 0x%x device 0x%x ERR func 1 "
"(broken BIOS?)\n",
PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_5400_ERR);
- goto error;
+ return -ENODEV;
}
- /* Store device 16 funcs 1 and 2 */
- switch (PCI_FUNC(pdev->devfn)) {
- case 1:
- pvt->branchmap_werrors = pdev;
- break;
- case 2:
- pvt->fsb_error_regs = pdev;
+ /* Store device 16 func 1 */
+ if (PCI_FUNC(pdev->devfn) == 1)
break;
+ }
+ pvt->branchmap_werrors = pdev;
+
+ pdev = NULL;
+ while (1) {
+ pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_5400_ERR, pdev);
+ if (!pdev) {
+ /* End of list, leave */
+ i5400_printk(KERN_ERR,
+ "'system address,Process Bus' "
+ "device not found:"
+ "vendor 0x%x device 0x%x ERR func 2 "
+ "(broken BIOS?)\n",
+ PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_5400_ERR);
+
+ pci_dev_put(pvt->branchmap_werrors);
+ return -ENODEV;
}
+
+ /* Store device 16 func 2 */
+ if (PCI_FUNC(pdev->devfn) == 2)
+ break;
}
+ pvt->fsb_error_regs = pdev;
debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n",
pci_name(pvt->system_address),
@@ -778,7 +797,10 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx)
"MC: 'BRANCH 0' device not found:"
"vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n",
PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_FBD0);
- goto error;
+
+ pci_dev_put(pvt->fsb_error_regs);
+ pci_dev_put(pvt->branchmap_werrors);
+ return -ENODEV;
}
/* If this device claims to have more than 2 channels then
@@ -796,14 +818,14 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx)
"(broken BIOS?)\n",
PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_5400_FBD1);
- goto error;
+
+ pci_dev_put(pvt->branch_0);
+ pci_dev_put(pvt->fsb_error_regs);
+ pci_dev_put(pvt->branchmap_werrors);
+ return -ENODEV;
}
return 0;
-
-error:
- i5400_put_devices(mci);
- return -ENODEV;
}
/*
diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c
index fc757069c6af..d427c69bb8b1 100644
--- a/drivers/edac/ppc4xx_edac.c
+++ b/drivers/edac/ppc4xx_edac.c
@@ -184,7 +184,7 @@ struct ppc4xx_ecc_status {
/* Function Prototypes */
-static int ppc4xx_edac_probe(struct platform_device *device)
+static int ppc4xx_edac_probe(struct platform_device *device);
static int ppc4xx_edac_remove(struct platform_device *device);
/* Global Variables */
@@ -1068,7 +1068,7 @@ ppc4xx_edac_mc_init(struct mem_ctl_info *mci,
mci->mod_name = PPC4XX_EDAC_MODULE_NAME;
mci->mod_ver = PPC4XX_EDAC_MODULE_REVISION;
- mci->ctl_name = match->compatible,
+ mci->ctl_name = ppc4xx_edac_match->compatible,
mci->dev_name = np->full_name;
/* Initialize callbacks */
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 3a605f777712..a203536d90dd 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -20,6 +20,7 @@
#include <linux/mmzone.h>
#include <linux/smp.h>
#include <linux/bitmap.h>
+#include <linux/math64.h>
#include <asm/processor.h>
#include <asm/mce.h>
@@ -670,6 +671,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
u32 reg;
u64 limit, prv = 0;
u64 tmp_mb;
+ u32 mb, kb;
u32 rir_way;
/*
@@ -682,8 +684,9 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
pvt->tolm = GET_TOLM(reg);
tmp_mb = (1 + pvt->tolm) >> 20;
- debugf0("TOLM: %Lu.%03Lu GB (0x%016Lx)\n",
- tmp_mb / 1000, tmp_mb % 1000, (u64)pvt->tolm);
+ mb = div_u64_rem(tmp_mb, 1000, &kb);
+ debugf0("TOLM: %u.%03u GB (0x%016Lx)\n",
+ mb, kb, (u64)pvt->tolm);
/* Address range is already 45:25 */
pci_read_config_dword(pvt->pci_sad1, TOHM,
@@ -691,8 +694,9 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
pvt->tohm = GET_TOHM(reg);
tmp_mb = (1 + pvt->tohm) >> 20;
- debugf0("TOHM: %Lu.%03Lu GB (0x%016Lx)",
- tmp_mb / 1000, tmp_mb % 1000, (u64)pvt->tohm);
+ mb = div_u64_rem(tmp_mb, 1000, &kb);
+ debugf0("TOHM: %u.%03u GB (0x%016Lx)",
+ mb, kb, (u64)pvt->tohm);
/*
* Step 2) Get SAD range and SAD Interleave list
@@ -714,10 +718,11 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
break;
tmp_mb = (limit + 1) >> 20;
- debugf0("SAD#%d %s up to %Lu.%03Lu GB (0x%016Lx) %s reg=0x%08x\n",
+ mb = div_u64_rem(tmp_mb, 1000, &kb);
+ debugf0("SAD#%d %s up to %u.%03u GB (0x%016Lx) %s reg=0x%08x\n",
n_sads,
get_dram_attr(reg),
- tmp_mb / 1000, tmp_mb % 1000,
+ mb, kb,
((u64)tmp_mb) << 20L,
INTERLEAVE_MODE(reg) ? "Interleave: 8:6" : "Interleave: [8:6]XOR[18:16]",
reg);
@@ -747,8 +752,9 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
break;
tmp_mb = (limit + 1) >> 20;
- debugf0("TAD#%d: up to %Lu.%03Lu GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
- n_tads, tmp_mb / 1000, tmp_mb % 1000,
+ mb = div_u64_rem(tmp_mb, 1000, &kb);
+ debugf0("TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
+ n_tads, mb, kb,
((u64)tmp_mb) << 20L,
(u32)TAD_SOCK(reg),
(u32)TAD_CH(reg),
@@ -757,7 +763,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
(u32)TAD_TGT2(reg),
(u32)TAD_TGT3(reg),
reg);
- prv = tmp_mb;
+ prv = limit;
}
/*
@@ -771,9 +777,10 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
tad_ch_nilv_offset[j],
&reg);
tmp_mb = TAD_OFFSET(reg) >> 20;
- debugf0("TAD CH#%d, offset #%d: %Lu.%03Lu GB (0x%016Lx), reg=0x%08x\n",
+ mb = div_u64_rem(tmp_mb, 1000, &kb);
+ debugf0("TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n",
i, j,
- tmp_mb / 1000, tmp_mb % 1000,
+ mb, kb,
((u64)tmp_mb) << 20L,
reg);
}
@@ -795,9 +802,10 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
tmp_mb = RIR_LIMIT(reg) >> 20;
rir_way = 1 << RIR_WAY(reg);
- debugf0("CH#%d RIR#%d, limit: %Lu.%03Lu GB (0x%016Lx), way: %d, reg=0x%08x\n",
+ mb = div_u64_rem(tmp_mb, 1000, &kb);
+ debugf0("CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n",
i, j,
- tmp_mb / 1000, tmp_mb % 1000,
+ mb, kb,
((u64)tmp_mb) << 20L,
rir_way,
reg);
@@ -808,9 +816,10 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
&reg);
tmp_mb = RIR_OFFSET(reg) << 6;
- debugf0("CH#%d RIR#%d INTL#%d, offset %Lu.%03Lu GB (0x%016Lx), tgt: %d, reg=0x%08x\n",
+ mb = div_u64_rem(tmp_mb, 1000, &kb);
+ debugf0("CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n",
i, j, k,
- tmp_mb / 1000, tmp_mb % 1000,
+ mb, kb,
((u64)tmp_mb) << 20L,
(u32)RIR_RNK_TGT(reg),
reg);
@@ -848,6 +857,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
u8 ch_way,sck_way;
u32 tad_offset;
u32 rir_way;
+ u32 mb, kb;
u64 ch_addr, offset, limit, prv = 0;
@@ -858,7 +868,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
* range (e. g. VGA addresses). It is unlikely, however, that the
* memory controller would generate an error on that range.
*/
- if ((addr > (u64) pvt->tolm) && (addr < (1L << 32))) {
+ if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) {
sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr);
edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
@@ -913,7 +923,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
addr,
limit,
sad_way + 7,
- INTERLEAVE_MODE(reg) ? "" : "XOR[18:16]");
+ interleave_mode ? "" : "XOR[18:16]");
if (interleave_mode)
idx = ((addr >> 6) ^ (addr >> 16)) & 7;
else
@@ -1053,7 +1063,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
ch_addr = addr & 0x7f;
/* Remove socket wayness and remove 6 bits */
addr >>= 6;
- addr /= sck_xch;
+ addr = div_u64(addr, sck_xch);
#if 0
/* Divide by channel way */
addr = addr / ch_way;
@@ -1073,10 +1083,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
continue;
limit = RIR_LIMIT(reg);
-
- debugf0("RIR#%d, limit: %Lu.%03Lu GB (0x%016Lx), way: %d\n",
+ mb = div_u64_rem(limit >> 20, 1000, &kb);
+ debugf0("RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n",
n_rir,
- (limit >> 20) / 1000, (limit >> 20) % 1000,
+ mb, kb,
limit,
1 << RIR_WAY(reg));
if (ch_addr <= limit)
diff --git a/include/linux/edac.h b/include/linux/edac.h
index ba317e2930a1..c621d762bb2c 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -70,25 +70,64 @@ enum dev_type {
#define DEV_FLAG_X32 BIT(DEV_X32)
#define DEV_FLAG_X64 BIT(DEV_X64)
-/* memory types */
+/**
+ * enum mem_type - memory types. For a more detailed reference, please see
+ * http://en.wikipedia.org/wiki/DRAM
+ *
+ * @MEM_EMPTY Empty csrow
+ * @MEM_RESERVED: Reserved csrow type
+ * @MEM_UNKNOWN: Unknown csrow type
+ * @MEM_FPM: FPM - Fast Page Mode, used on systems up to 1995.
+ * @MEM_EDO: EDO - Extended data out, used on systems up to 1998.
+ * @MEM_BEDO: BEDO - Burst Extended data out, an EDO variant.
+ * @MEM_SDR: SDR - Single data rate SDRAM
+ * http://en.wikipedia.org/wiki/Synchronous_dynamic_random-access_memory
+ * They use 3 pins for chip select: Pins 0 and 2 are
+ * for rank 0; pins 1 and 3 are for rank 1, if the memory
+ * is dual-rank.
+ * @MEM_RDR: Registered SDR SDRAM
+ * @MEM_DDR: Double data rate SDRAM
+ * http://en.wikipedia.org/wiki/DDR_SDRAM
+ * @MEM_RDDR: Registered Double data rate SDRAM
+ * This is a variant of the DDR memories.
+ * A registered memory has a buffer inside it, hiding
+ * part of the memory details to the memory controller.
+ * @MEM_RMBS: Rambus DRAM, used on a few Pentium III/IV controllers.
+ * @MEM_DDR2: DDR2 RAM, as described at JEDEC JESD79-2F.
+ * Those memories are labed as "PC2-" instead of "PC" to
+ * differenciate from DDR.
+ * @MEM_FB_DDR2: Fully-Buffered DDR2, as described at JEDEC Std No. 205
+ * and JESD206.
+ * Those memories are accessed per DIMM slot, and not by
+ * a chip select signal.
+ * @MEM_RDDR2: Registered DDR2 RAM
+ * This is a variant of the DDR2 memories.
+ * @MEM_XDR: Rambus XDR
+ * It is an evolution of the original RAMBUS memories,
+ * created to compete with DDR2. Weren't used on any
+ * x86 arch, but cell_edac PPC memory controller uses it.
+ * @MEM_DDR3: DDR3 RAM
+ * @MEM_RDDR3: Registered DDR3 RAM
+ * This is a variant of the DDR3 memories.
+ */
enum mem_type {
- MEM_EMPTY = 0, /* Empty csrow */
- MEM_RESERVED, /* Reserved csrow type */
- MEM_UNKNOWN, /* Unknown csrow type */
- MEM_FPM, /* Fast page mode */
- MEM_EDO, /* Extended data out */
- MEM_BEDO, /* Burst Extended data out */
- MEM_SDR, /* Single data rate SDRAM */
- MEM_RDR, /* Registered single data rate SDRAM */
- MEM_DDR, /* Double data rate SDRAM */
- MEM_RDDR, /* Registered Double data rate SDRAM */
- MEM_RMBS, /* Rambus DRAM */
- MEM_DDR2, /* DDR2 RAM */
- MEM_FB_DDR2, /* fully buffered DDR2 */
- MEM_RDDR2, /* Registered DDR2 RAM */
- MEM_XDR, /* Rambus XDR */
- MEM_DDR3, /* DDR3 RAM */
- MEM_RDDR3, /* Registered DDR3 RAM */
+ MEM_EMPTY = 0,
+ MEM_RESERVED,
+ MEM_UNKNOWN,
+ MEM_FPM,
+ MEM_EDO,
+ MEM_BEDO,
+ MEM_SDR,
+ MEM_RDR,
+ MEM_DDR,
+ MEM_RDDR,
+ MEM_RMBS,
+ MEM_DDR2,
+ MEM_FB_DDR2,
+ MEM_RDDR2,
+ MEM_XDR,
+ MEM_DDR3,
+ MEM_RDDR3,
};
#define MEM_FLAG_EMPTY BIT(MEM_EMPTY)
@@ -166,8 +205,9 @@ enum scrub_type {
#define OP_OFFLINE 0x300
/*
- * There are several things to be aware of that aren't at all obvious:
+ * Concepts used at the EDAC subsystem
*
+ * There are several things to be aware of that aren't at all obvious:
*
* SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc..
*
@@ -176,36 +216,61 @@ enum scrub_type {
* creating a common ground for discussion, terms and their definitions
* will be established.
*
- * Memory devices: The individual chip on a memory stick. These devices
- * commonly output 4 and 8 bits each. Grouping several
- * of these in parallel provides 64 bits which is common
- * for a memory stick.
+ * Memory devices: The individual DRAM chips on a memory stick. These
+ * devices commonly output 4 and 8 bits each (x4, x8).
+ * Grouping several of these in parallel provides the
+ * number of bits that the memory controller expects:
+ * typically 72 bits, in order to provide 64 bits +
+ * 8 bits of ECC data.
*
* Memory Stick: A printed circuit board that aggregates multiple
- * memory devices in parallel. This is the atomic
- * memory component that is purchaseable by Joe consumer
- * and loaded into a memory socket.
+ * memory devices in parallel. In general, this is the
+ * Field Replaceable Unit (FRU) which gets replaced, in
+ * the case of excessive errors. Most often it is also
+ * called DIMM (Dual Inline Memory Module).
+ *
+ * Memory Socket: A physical connector on the motherboard that accepts
+ * a single memory stick. Also called as "slot" on several
+ * datasheets.
*
- * Socket: A physical connector on the motherboard that accepts
- * a single memory stick.
+ * Channel: A memory controller channel, responsible to communicate
+ * with a group of DIMMs. Each channel has its own
+ * independent control (command) and data bus, and can
+ * be used independently or grouped with other channels.
*
- * Channel: Set of memory devices on a memory stick that must be
- * grouped in parallel with one or more additional
- * channels from other memory sticks. This parallel
- * grouping of the output from multiple channels are
- * necessary for the smallest granularity of memory access.
- * Some memory controllers are capable of single channel -
- * which means that memory sticks can be loaded
- * individually. Other memory controllers are only
- * capable of dual channel - which means that memory
- * sticks must be loaded as pairs (see "socket set").
+ * Branch: It is typically the highest hierarchy on a
+ * Fully-Buffered DIMM memory controller.
+ * Typically, it contains two channels.
+ * Two channels at the same branch can be used in single
+ * mode or in lockstep mode.
+ * When lockstep is enabled, the cacheline is doubled,
+ * but it generally brings some performance penalty.
+ * Also, it is generally not possible to point to just one
+ * memory stick when an error occurs, as the error
+ * correction code is calculated using two DIMMs instead
+ * of one. Due to that, it is capable of correcting more
+ * errors than on single mode.
*
- * Chip-select row: All of the memory devices that are selected together.
- * for a single, minimum grain of memory access.
- * This selects all of the parallel memory devices across
- * all of the parallel channels. Common chip-select rows
- * for single channel are 64 bits, for dual channel 128
- * bits.
+ * Single-channel: The data accessed by the memory controller is contained
+ * into one dimm only. E. g. if the data is 64 bits-wide,
+ * the data flows to the CPU using one 64 bits parallel
+ * access.
+ * Typically used with SDR, DDR, DDR2 and DDR3 memories.
+ * FB-DIMM and RAMBUS use a different concept for channel,
+ * so this concept doesn't apply there.
+ *
+ * Double-channel: The data size accessed by the memory controller is
+ * interlaced into two dimms, accessed at the same time.
+ * E. g. if the DIMM is 64 bits-wide (72 bits with ECC),
+ * the data flows to the CPU using a 128 bits parallel
+ * access.
+ *
+ * Chip-select row: This is the name of the DRAM signal used to select the
+ * DRAM ranks to be accessed. Common chip-select rows for
+ * single channel are 64 bits, for dual channel 128 bits.
+ * It may not be visible by the memory controller, as some
+ * DIMM types have a memory buffer that can hide direct
+ * access to it from the Memory Controller.
*
* Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory.
* Motherboards commonly drive two chip-select pins to
@@ -218,8 +283,8 @@ enum scrub_type {
*
* Double-sided stick: DEPRECATED TERM, see Double-Ranked stick.
* A double-sided stick has two chip-select rows which
- * access different sets of memory devices. The two
- * rows cannot be accessed concurrently. "Double-sided"
+ * access different sets of memory devices. The two
+ * rows cannot be accessed concurrently. "Double-sided"
* is irrespective of the memory devices being mounted
* on both sides of the memory stick.
*
@@ -247,10 +312,22 @@ enum scrub_type {
* PS - I enjoyed writing all that about as much as you enjoyed reading it.
*/
-struct channel_info {
- int chan_idx; /* channel index */
- u32 ce_count; /* Correctable Errors for this CHANNEL */
- char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
+/**
+ * struct rank_info - contains the information for one DIMM rank
+ *
+ * @chan_idx: channel number where the rank is (typically, 0 or 1)
+ * @ce_count: number of correctable errors for this rank
+ * @label: DIMM label. Different ranks for the same DIMM should be
+ * filled, on userspace, with the same label.
+ * FIXME: The core currently won't enforce it.
+ * @csrow: A pointer to the chip select row structure (the parent
+ * structure). The location of the rank is given by
+ * the (csrow->csrow_idx, chan_idx) vector.
+ */
+struct rank_info {
+ int chan_idx;
+ u32 ce_count;
+ char label[EDAC_MC_LABEL_LEN + 1];
struct csrow_info *csrow; /* the parent */
};
@@ -274,7 +351,7 @@ struct csrow_info {
/* channel information for this csrow */
u32 nr_channels;
- struct channel_info *channels;
+ struct rank_info *channels;
};
struct mcidev_sysfs_group {
OpenPOWER on IntegriCloud