summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <mchehab@redhat.com>2012-05-11 11:41:45 -0300
committerMauro Carvalho Chehab <mchehab@redhat.com>2012-05-28 19:13:51 -0300
commite17a2f42a484562be48128c5b8dc9f7291e8c902 (patch)
tree67d92491b66625b20be6fcb03a39c18424adbe7e
parent5926ff502f6b93ca0c1654f8a5c5317ea236dbdb (diff)
downloadtalos-op-linux-e17a2f42a484562be48128c5b8dc9f7291e8c902.tar.gz
talos-op-linux-e17a2f42a484562be48128c5b8dc9f7291e8c902.zip
edac: Cleanup the logs for i7core and sb edac drivers
Remove some information that it is duplicated at the MCE log, and don't have much usage for the error. Those data will be added again, when creating a trace function that outputs both memory errors and MCE fields. Cc: Aristeu Rozanski <arozansk@redhat.com> Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
-rw-r--r--drivers/edac/i7core_edac.c9
-rw-r--r--drivers/edac/sb_edac.c42
2 files changed, 20 insertions, 31 deletions
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 6d89c78a9b7a..2aacd951d41c 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -1623,7 +1623,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
const struct mce *m)
{
struct i7core_pvt *pvt = mci->pvt_info;
- char *type, *optype, *err, *msg;
+ char *type, *optype, *err, msg[80];
enum hw_event_mc_err_type tp_event;
unsigned long error = m->status & 0x1ff0000l;
bool uncorrected_error = m->mcgstatus & 1ll << 61;
@@ -1701,10 +1701,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
err = "unknown";
}
- msg = kasprintf(GFP_ATOMIC,
- "addr=0x%08llx cpu=%d count=%d Err=%08llx:%08llx (%s: %s))\n",
- (long long) m->addr, m->cpu, core_err_cnt,
- (long long)m->status, (long long)m->misc, optype, err);
+ snprintf(msg, sizeof(msg), "count=%d %s", core_err_cnt, optype);
/*
* Call the helper to output message
@@ -1718,8 +1715,6 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
syndrome,
channel, dimm, -1,
err, msg, m);
-
- kfree(msg);
}
/*
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 2f95a1b583dc..e834dfd034d6 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -788,7 +788,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
u8 *socket,
long *channel_mask,
u8 *rank,
- char *area_type, char *msg)
+ char **area_type, char *msg)
{
struct mem_ctl_info *new_mci;
struct sbridge_pvt *pvt = mci->pvt_info;
@@ -843,7 +843,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
sprintf(msg, "Can't discover the memory socket");
return -EINVAL;
}
- area_type = get_dram_attr(reg);
+ *area_type = get_dram_attr(reg);
interleave_mode = INTERLEAVE_MODE(reg);
pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads],
@@ -1342,7 +1342,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
struct mem_ctl_info *new_mci;
struct sbridge_pvt *pvt = mci->pvt_info;
enum hw_event_mc_err_type tp_event;
- char *type, *optype, msg[256], *recoverable_msg;
+ char *type, *optype, msg[256];
bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
bool overflow = GET_BITFIELD(m->status, 62, 62);
bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
@@ -1355,7 +1355,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
long channel_mask, first_channel;
u8 rank, socket;
int rc, dimm;
- char *area_type = "Unknown";
+ char *area_type = NULL;
if (uncorrected_error) {
if (ripv) {
@@ -1407,7 +1407,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
}
rc = get_memory_error_data(mci, m->addr, &socket,
- &channel_mask, &rank, area_type, msg);
+ &channel_mask, &rank, &area_type, msg);
if (rc < 0)
goto err_parsing;
new_mci = get_mci_for_node_id(socket);
@@ -1427,29 +1427,23 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
else
dimm = 2;
- if (uncorrected_error && recoverable)
- recoverable_msg = " recoverable";
- else
- recoverable_msg = "";
/*
- * FIXME: What should we do with "channel" information on mcelog?
- * Probably, we can just discard it, as the channel information
- * comes from the get_memory_error_data() address decoding
+ * FIXME: On some memory configurations (mirror, lockstep), the
+ * Memory Controller can't point the error to a single DIMM. The
+ * EDAC core should be handling the channel mask, in order to point
+ * to the group of dimm's where the error may be happening.
*/
snprintf(msg, sizeof(msg),
- "%d error(s)%s: %s%s: cpu=%d Err=%04x:%04x addr = 0x%08llx socket=%d Channel=%ld(mask=%ld), rank=%d\n",
- core_err_cnt,
- overflow ? " OVERFLOW" : "",
- area_type,
- recoverable_msg,
- m->cpu,
- mscod, errcode,
- (long long) m->addr,
- socket,
- first_channel, /* This is the real channel on SB */
- channel_mask,
- rank);
+ "count:%d%s%s area:%s err_code:%04x:%04x socket:%d channel_mask:%ld rank:%d",
+ core_err_cnt,
+ overflow ? " OVERFLOW" : "",
+ (uncorrected_error && recoverable) ? " recoverable" : "",
+ area_type,
+ mscod, errcode,
+ socket,
+ channel_mask,
+ rank);
debugf0("%s", msg);
OpenPOWER on IntegriCloud