diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-28 14:24:40 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-28 14:24:40 -0700 |
commit | f0f3680e50352c57b6cfc5b0d44d63bb0aa20f80 (patch) | |
tree | 2005ec90f9d90f25ceeba147dfe09db8c8036fa6 | |
parent | 61e5191c9d96268746bd57ed55d035678a1a2cf9 (diff) | |
parent | a4b4be3fd7a76021f67380b03d8bccebf067db72 (diff) | |
download | blackbird-op-linux-f0f3680e50352c57b6cfc5b0d44d63bb0aa20f80.tar.gz blackbird-op-linux-f0f3680e50352c57b6cfc5b0d44d63bb0aa20f80.zip |
Merge branch 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac
Pull EDAC fixes from Mauro Carvalho Chehab:
"A series of EDAC driver fixes. It also has one core fix at the
documentation, and a rename patch, fixing the name of the struct that
contains the rank information."
* 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac:
edac: rename channel_info to rank_info
i5400_edac: Avoid calling pci_put_device() twice
edac: i5100 ack error detection register after each read
edac: i5100 fix erroneous define for M1Err
edac: sb_edac: Fix a wrong value setting for the previous value
edac: sb_edac: Fix a INTERLEAVE_MODE() misuse
edac: sb_edac: Let the driver depend on PCI_MMCONFIG
edac: Improve the comments to better describe the memory concepts
edac/ppc4xx_edac: Fix compilation
Fix sb_edac compilation with 32 bits kernels
-rw-r--r-- | drivers/edac/Kconfig | 2 | ||||
-rw-r--r-- | drivers/edac/edac_mc.c | 6 | ||||
-rw-r--r-- | drivers/edac/i5100_edac.c | 13 | ||||
-rw-r--r-- | drivers/edac/i5400_edac.c | 54 | ||||
-rw-r--r-- | drivers/edac/ppc4xx_edac.c | 4 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 52 | ||||
-rw-r--r-- | include/linux/edac.h | 179 |
7 files changed, 208 insertions, 102 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 5948a2194f50..fdffa1beca17 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -215,7 +215,7 @@ config EDAC_I7300 config EDAC_SBRIDGE tristate "Intel Sandy-Bridge Integrated MC" depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL - depends on EXPERIMENTAL + depends on PCI_MMCONFIG && EXPERIMENTAL help Support for error detection and correction the Intel Sandy Bridge Integrated Memory Controller. diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index da09cd74bc5b..feef7733fae7 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -39,7 +39,7 @@ static LIST_HEAD(mc_devices); #ifdef CONFIG_EDAC_DEBUG -static void edac_mc_dump_channel(struct channel_info *chan) +static void edac_mc_dump_channel(struct rank_info *chan) { debugf4("\tchannel = %p\n", chan); debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx); @@ -156,7 +156,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, { struct mem_ctl_info *mci; struct csrow_info *csi, *csrow; - struct channel_info *chi, *chp, *chan; + struct rank_info *chi, *chp, *chan; void *pvt; unsigned size; int row, chn; @@ -181,7 +181,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, * rather than an imaginary chunk of memory located at address 0. */ csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi)); - chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi)); + chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi)); pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; /* setup index and various internal pointers */ diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index 2e23547b2f24..d500749464ea 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -49,7 +49,7 @@ #define I5100_FERR_NF_MEM_M6ERR_MASK (1 << 6) #define I5100_FERR_NF_MEM_M5ERR_MASK (1 << 5) #define I5100_FERR_NF_MEM_M4ERR_MASK (1 << 4) -#define I5100_FERR_NF_MEM_M1ERR_MASK 1 +#define I5100_FERR_NF_MEM_M1ERR_MASK (1 << 1) #define I5100_FERR_NF_MEM_ANY_MASK \ (I5100_FERR_NF_MEM_M16ERR_MASK | \ I5100_FERR_NF_MEM_M15ERR_MASK | \ @@ -535,23 +535,20 @@ static void i5100_read_log(struct mem_ctl_info *mci, int chan, static void i5100_check_error(struct mem_ctl_info *mci) { struct i5100_priv *priv = mci->pvt_info; - u32 dw; - + u32 dw, dw2; pci_read_config_dword(priv->mc, I5100_FERR_NF_MEM, &dw); if (i5100_ferr_nf_mem_any(dw)) { - u32 dw2; pci_read_config_dword(priv->mc, I5100_NERR_NF_MEM, &dw2); - if (dw2) - pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM, - dw2); - pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw); i5100_read_log(mci, i5100_ferr_nf_mem_chan_indx(dw), i5100_ferr_nf_mem_any(dw), i5100_nerr_nf_mem_any(dw2)); + + pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM, dw2); } + pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw); } /* The i5100 chipset will scrub the entire memory once, then diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index 67ec9626a330..1869a1018fb5 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -735,7 +735,7 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx) /* Attempt to 'get' the MCH register we want */ pdev = NULL; - while (!pvt->branchmap_werrors || !pvt->fsb_error_regs) { + while (1) { pdev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR, pdev); if (!pdev) { @@ -743,23 +743,42 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx) i5400_printk(KERN_ERR, "'system address,Process Bus' " "device not found:" - "vendor 0x%x device 0x%x ERR funcs " + "vendor 0x%x device 0x%x ERR func 1 " "(broken BIOS?)\n", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR); - goto error; + return -ENODEV; } - /* Store device 16 funcs 1 and 2 */ - switch (PCI_FUNC(pdev->devfn)) { - case 1: - pvt->branchmap_werrors = pdev; - break; - case 2: - pvt->fsb_error_regs = pdev; + /* Store device 16 func 1 */ + if (PCI_FUNC(pdev->devfn) == 1) break; + } + pvt->branchmap_werrors = pdev; + + pdev = NULL; + while (1) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5400_ERR, pdev); + if (!pdev) { + /* End of list, leave */ + i5400_printk(KERN_ERR, + "'system address,Process Bus' " + "device not found:" + "vendor 0x%x device 0x%x ERR func 2 " + "(broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5400_ERR); + + pci_dev_put(pvt->branchmap_werrors); + return -ENODEV; } + + /* Store device 16 func 2 */ + if (PCI_FUNC(pdev->devfn) == 2) + break; } + pvt->fsb_error_regs = pdev; debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n", pci_name(pvt->system_address), @@ -778,7 +797,10 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx) "MC: 'BRANCH 0' device not found:" "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_FBD0); - goto error; + + pci_dev_put(pvt->fsb_error_regs); + pci_dev_put(pvt->branchmap_werrors); + return -ENODEV; } /* If this device claims to have more than 2 channels then @@ -796,14 +818,14 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx) "(broken BIOS?)\n", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_FBD1); - goto error; + + pci_dev_put(pvt->branch_0); + pci_dev_put(pvt->fsb_error_regs); + pci_dev_put(pvt->branchmap_werrors); + return -ENODEV; } return 0; - -error: - i5400_put_devices(mci); - return -ENODEV; } /* diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index fc757069c6af..d427c69bb8b1 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -184,7 +184,7 @@ struct ppc4xx_ecc_status { /* Function Prototypes */ -static int ppc4xx_edac_probe(struct platform_device *device) +static int ppc4xx_edac_probe(struct platform_device *device); static int ppc4xx_edac_remove(struct platform_device *device); /* Global Variables */ @@ -1068,7 +1068,7 @@ ppc4xx_edac_mc_init(struct mem_ctl_info *mci, mci->mod_name = PPC4XX_EDAC_MODULE_NAME; mci->mod_ver = PPC4XX_EDAC_MODULE_REVISION; - mci->ctl_name = match->compatible, + mci->ctl_name = ppc4xx_edac_match->compatible, mci->dev_name = np->full_name; /* Initialize callbacks */ diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 3a605f777712..a203536d90dd 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -20,6 +20,7 @@ #include <linux/mmzone.h> #include <linux/smp.h> #include <linux/bitmap.h> +#include <linux/math64.h> #include <asm/processor.h> #include <asm/mce.h> @@ -670,6 +671,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci) u32 reg; u64 limit, prv = 0; u64 tmp_mb; + u32 mb, kb; u32 rir_way; /* @@ -682,8 +684,9 @@ static void get_memory_layout(const struct mem_ctl_info *mci) pvt->tolm = GET_TOLM(reg); tmp_mb = (1 + pvt->tolm) >> 20; - debugf0("TOLM: %Lu.%03Lu GB (0x%016Lx)\n", - tmp_mb / 1000, tmp_mb % 1000, (u64)pvt->tolm); + mb = div_u64_rem(tmp_mb, 1000, &kb); + debugf0("TOLM: %u.%03u GB (0x%016Lx)\n", + mb, kb, (u64)pvt->tolm); /* Address range is already 45:25 */ pci_read_config_dword(pvt->pci_sad1, TOHM, @@ -691,8 +694,9 @@ static void get_memory_layout(const struct mem_ctl_info *mci) pvt->tohm = GET_TOHM(reg); tmp_mb = (1 + pvt->tohm) >> 20; - debugf0("TOHM: %Lu.%03Lu GB (0x%016Lx)", - tmp_mb / 1000, tmp_mb % 1000, (u64)pvt->tohm); + mb = div_u64_rem(tmp_mb, 1000, &kb); + debugf0("TOHM: %u.%03u GB (0x%016Lx)", + mb, kb, (u64)pvt->tohm); /* * Step 2) Get SAD range and SAD Interleave list @@ -714,10 +718,11 @@ static void get_memory_layout(const struct mem_ctl_info *mci) break; tmp_mb = (limit + 1) >> 20; - debugf0("SAD#%d %s up to %Lu.%03Lu GB (0x%016Lx) %s reg=0x%08x\n", + mb = div_u64_rem(tmp_mb, 1000, &kb); + debugf0("SAD#%d %s up to %u.%03u GB (0x%016Lx) %s reg=0x%08x\n", n_sads, get_dram_attr(reg), - tmp_mb / 1000, tmp_mb % 1000, + mb, kb, ((u64)tmp_mb) << 20L, INTERLEAVE_MODE(reg) ? "Interleave: 8:6" : "Interleave: [8:6]XOR[18:16]", reg); @@ -747,8 +752,9 @@ static void get_memory_layout(const struct mem_ctl_info *mci) break; tmp_mb = (limit + 1) >> 20; - debugf0("TAD#%d: up to %Lu.%03Lu GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n", - n_tads, tmp_mb / 1000, tmp_mb % 1000, + mb = div_u64_rem(tmp_mb, 1000, &kb); + debugf0("TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n", + n_tads, mb, kb, ((u64)tmp_mb) << 20L, (u32)TAD_SOCK(reg), (u32)TAD_CH(reg), @@ -757,7 +763,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci) (u32)TAD_TGT2(reg), (u32)TAD_TGT3(reg), reg); - prv = tmp_mb; + prv = limit; } /* @@ -771,9 +777,10 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tad_ch_nilv_offset[j], ®); tmp_mb = TAD_OFFSET(reg) >> 20; - debugf0("TAD CH#%d, offset #%d: %Lu.%03Lu GB (0x%016Lx), reg=0x%08x\n", + mb = div_u64_rem(tmp_mb, 1000, &kb); + debugf0("TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n", i, j, - tmp_mb / 1000, tmp_mb % 1000, + mb, kb, ((u64)tmp_mb) << 20L, reg); } @@ -795,9 +802,10 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tmp_mb = RIR_LIMIT(reg) >> 20; rir_way = 1 << RIR_WAY(reg); - debugf0("CH#%d RIR#%d, limit: %Lu.%03Lu GB (0x%016Lx), way: %d, reg=0x%08x\n", + mb = div_u64_rem(tmp_mb, 1000, &kb); + debugf0("CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n", i, j, - tmp_mb / 1000, tmp_mb % 1000, + mb, kb, ((u64)tmp_mb) << 20L, rir_way, reg); @@ -808,9 +816,10 @@ static void get_memory_layout(const struct mem_ctl_info *mci) ®); tmp_mb = RIR_OFFSET(reg) << 6; - debugf0("CH#%d RIR#%d INTL#%d, offset %Lu.%03Lu GB (0x%016Lx), tgt: %d, reg=0x%08x\n", + mb = div_u64_rem(tmp_mb, 1000, &kb); + debugf0("CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n", i, j, k, - tmp_mb / 1000, tmp_mb % 1000, + mb, kb, ((u64)tmp_mb) << 20L, (u32)RIR_RNK_TGT(reg), reg); @@ -848,6 +857,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, u8 ch_way,sck_way; u32 tad_offset; u32 rir_way; + u32 mb, kb; u64 ch_addr, offset, limit, prv = 0; @@ -858,7 +868,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, * range (e. g. VGA addresses). It is unlikely, however, that the * memory controller would generate an error on that range. */ - if ((addr > (u64) pvt->tolm) && (addr < (1L << 32))) { + if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) { sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr); edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; @@ -913,7 +923,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, addr, limit, sad_way + 7, - INTERLEAVE_MODE(reg) ? "" : "XOR[18:16]"); + interleave_mode ? "" : "XOR[18:16]"); if (interleave_mode) idx = ((addr >> 6) ^ (addr >> 16)) & 7; else @@ -1053,7 +1063,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, ch_addr = addr & 0x7f; /* Remove socket wayness and remove 6 bits */ addr >>= 6; - addr /= sck_xch; + addr = div_u64(addr, sck_xch); #if 0 /* Divide by channel way */ addr = addr / ch_way; @@ -1073,10 +1083,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci, continue; limit = RIR_LIMIT(reg); - - debugf0("RIR#%d, limit: %Lu.%03Lu GB (0x%016Lx), way: %d\n", + mb = div_u64_rem(limit >> 20, 1000, &kb); + debugf0("RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n", n_rir, - (limit >> 20) / 1000, (limit >> 20) % 1000, + mb, kb, limit, 1 << RIR_WAY(reg)); if (ch_addr <= limit) diff --git a/include/linux/edac.h b/include/linux/edac.h index ba317e2930a1..c621d762bb2c 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -70,25 +70,64 @@ enum dev_type { #define DEV_FLAG_X32 BIT(DEV_X32) #define DEV_FLAG_X64 BIT(DEV_X64) -/* memory types */ +/** + * enum mem_type - memory types. For a more detailed reference, please see + * http://en.wikipedia.org/wiki/DRAM + * + * @MEM_EMPTY Empty csrow + * @MEM_RESERVED: Reserved csrow type + * @MEM_UNKNOWN: Unknown csrow type + * @MEM_FPM: FPM - Fast Page Mode, used on systems up to 1995. + * @MEM_EDO: EDO - Extended data out, used on systems up to 1998. + * @MEM_BEDO: BEDO - Burst Extended data out, an EDO variant. + * @MEM_SDR: SDR - Single data rate SDRAM + * http://en.wikipedia.org/wiki/Synchronous_dynamic_random-access_memory + * They use 3 pins for chip select: Pins 0 and 2 are + * for rank 0; pins 1 and 3 are for rank 1, if the memory + * is dual-rank. + * @MEM_RDR: Registered SDR SDRAM + * @MEM_DDR: Double data rate SDRAM + * http://en.wikipedia.org/wiki/DDR_SDRAM + * @MEM_RDDR: Registered Double data rate SDRAM + * This is a variant of the DDR memories. + * A registered memory has a buffer inside it, hiding + * part of the memory details to the memory controller. + * @MEM_RMBS: Rambus DRAM, used on a few Pentium III/IV controllers. + * @MEM_DDR2: DDR2 RAM, as described at JEDEC JESD79-2F. + * Those memories are labed as "PC2-" instead of "PC" to + * differenciate from DDR. + * @MEM_FB_DDR2: Fully-Buffered DDR2, as described at JEDEC Std No. 205 + * and JESD206. + * Those memories are accessed per DIMM slot, and not by + * a chip select signal. + * @MEM_RDDR2: Registered DDR2 RAM + * This is a variant of the DDR2 memories. + * @MEM_XDR: Rambus XDR + * It is an evolution of the original RAMBUS memories, + * created to compete with DDR2. Weren't used on any + * x86 arch, but cell_edac PPC memory controller uses it. + * @MEM_DDR3: DDR3 RAM + * @MEM_RDDR3: Registered DDR3 RAM + * This is a variant of the DDR3 memories. + */ enum mem_type { - MEM_EMPTY = 0, /* Empty csrow */ - MEM_RESERVED, /* Reserved csrow type */ - MEM_UNKNOWN, /* Unknown csrow type */ - MEM_FPM, /* Fast page mode */ - MEM_EDO, /* Extended data out */ - MEM_BEDO, /* Burst Extended data out */ - MEM_SDR, /* Single data rate SDRAM */ - MEM_RDR, /* Registered single data rate SDRAM */ - MEM_DDR, /* Double data rate SDRAM */ - MEM_RDDR, /* Registered Double data rate SDRAM */ - MEM_RMBS, /* Rambus DRAM */ - MEM_DDR2, /* DDR2 RAM */ - MEM_FB_DDR2, /* fully buffered DDR2 */ - MEM_RDDR2, /* Registered DDR2 RAM */ - MEM_XDR, /* Rambus XDR */ - MEM_DDR3, /* DDR3 RAM */ - MEM_RDDR3, /* Registered DDR3 RAM */ + MEM_EMPTY = 0, + MEM_RESERVED, + MEM_UNKNOWN, + MEM_FPM, + MEM_EDO, + MEM_BEDO, + MEM_SDR, + MEM_RDR, + MEM_DDR, + MEM_RDDR, + MEM_RMBS, + MEM_DDR2, + MEM_FB_DDR2, + MEM_RDDR2, + MEM_XDR, + MEM_DDR3, + MEM_RDDR3, }; #define MEM_FLAG_EMPTY BIT(MEM_EMPTY) @@ -166,8 +205,9 @@ enum scrub_type { #define OP_OFFLINE 0x300 /* - * There are several things to be aware of that aren't at all obvious: + * Concepts used at the EDAC subsystem * + * There are several things to be aware of that aren't at all obvious: * * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc.. * @@ -176,36 +216,61 @@ enum scrub_type { * creating a common ground for discussion, terms and their definitions * will be established. * - * Memory devices: The individual chip on a memory stick. These devices - * commonly output 4 and 8 bits each. Grouping several - * of these in parallel provides 64 bits which is common - * for a memory stick. + * Memory devices: The individual DRAM chips on a memory stick. These + * devices commonly output 4 and 8 bits each (x4, x8). + * Grouping several of these in parallel provides the + * number of bits that the memory controller expects: + * typically 72 bits, in order to provide 64 bits + + * 8 bits of ECC data. * * Memory Stick: A printed circuit board that aggregates multiple - * memory devices in parallel. This is the atomic - * memory component that is purchaseable by Joe consumer - * and loaded into a memory socket. + * memory devices in parallel. In general, this is the + * Field Replaceable Unit (FRU) which gets replaced, in + * the case of excessive errors. Most often it is also + * called DIMM (Dual Inline Memory Module). + * + * Memory Socket: A physical connector on the motherboard that accepts + * a single memory stick. Also called as "slot" on several + * datasheets. * - * Socket: A physical connector on the motherboard that accepts - * a single memory stick. + * Channel: A memory controller channel, responsible to communicate + * with a group of DIMMs. Each channel has its own + * independent control (command) and data bus, and can + * be used independently or grouped with other channels. * - * Channel: Set of memory devices on a memory stick that must be - * grouped in parallel with one or more additional - * channels from other memory sticks. This parallel - * grouping of the output from multiple channels are - * necessary for the smallest granularity of memory access. - * Some memory controllers are capable of single channel - - * which means that memory sticks can be loaded - * individually. Other memory controllers are only - * capable of dual channel - which means that memory - * sticks must be loaded as pairs (see "socket set"). + * Branch: It is typically the highest hierarchy on a + * Fully-Buffered DIMM memory controller. + * Typically, it contains two channels. + * Two channels at the same branch can be used in single + * mode or in lockstep mode. + * When lockstep is enabled, the cacheline is doubled, + * but it generally brings some performance penalty. + * Also, it is generally not possible to point to just one + * memory stick when an error occurs, as the error + * correction code is calculated using two DIMMs instead + * of one. Due to that, it is capable of correcting more + * errors than on single mode. * - * Chip-select row: All of the memory devices that are selected together. - * for a single, minimum grain of memory access. - * This selects all of the parallel memory devices across - * all of the parallel channels. Common chip-select rows - * for single channel are 64 bits, for dual channel 128 - * bits. + * Single-channel: The data accessed by the memory controller is contained + * into one dimm only. E. g. if the data is 64 bits-wide, + * the data flows to the CPU using one 64 bits parallel + * access. + * Typically used with SDR, DDR, DDR2 and DDR3 memories. + * FB-DIMM and RAMBUS use a different concept for channel, + * so this concept doesn't apply there. + * + * Double-channel: The data size accessed by the memory controller is + * interlaced into two dimms, accessed at the same time. + * E. g. if the DIMM is 64 bits-wide (72 bits with ECC), + * the data flows to the CPU using a 128 bits parallel + * access. + * + * Chip-select row: This is the name of the DRAM signal used to select the + * DRAM ranks to be accessed. Common chip-select rows for + * single channel are 64 bits, for dual channel 128 bits. + * It may not be visible by the memory controller, as some + * DIMM types have a memory buffer that can hide direct + * access to it from the Memory Controller. * * Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory. * Motherboards commonly drive two chip-select pins to @@ -218,8 +283,8 @@ enum scrub_type { * * Double-sided stick: DEPRECATED TERM, see Double-Ranked stick. * A double-sided stick has two chip-select rows which - * access different sets of memory devices. The two - * rows cannot be accessed concurrently. "Double-sided" + * access different sets of memory devices. The two + * rows cannot be accessed concurrently. "Double-sided" * is irrespective of the memory devices being mounted * on both sides of the memory stick. * @@ -247,10 +312,22 @@ enum scrub_type { * PS - I enjoyed writing all that about as much as you enjoyed reading it. */ -struct channel_info { - int chan_idx; /* channel index */ - u32 ce_count; /* Correctable Errors for this CHANNEL */ - char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */ +/** + * struct rank_info - contains the information for one DIMM rank + * + * @chan_idx: channel number where the rank is (typically, 0 or 1) + * @ce_count: number of correctable errors for this rank + * @label: DIMM label. Different ranks for the same DIMM should be + * filled, on userspace, with the same label. + * FIXME: The core currently won't enforce it. + * @csrow: A pointer to the chip select row structure (the parent + * structure). The location of the rank is given by + * the (csrow->csrow_idx, chan_idx) vector. + */ +struct rank_info { + int chan_idx; + u32 ce_count; + char label[EDAC_MC_LABEL_LEN + 1]; struct csrow_info *csrow; /* the parent */ }; @@ -274,7 +351,7 @@ struct csrow_info { /* channel information for this csrow */ u32 nr_channels; - struct channel_info *channels; + struct rank_info *channels; }; struct mcidev_sysfs_group { |