summaryrefslogtreecommitdiffstats
path: root/src/usr/diag/prdf
diff options
context:
space:
mode:
authorCaleb Palmer <cnpalmer@us.ibm.com>2019-09-17 16:50:20 -0500
committerZane C Shelley <zshelle@us.ibm.com>2019-09-18 19:58:02 -0500
commitbfe56382ff15dd94196a45f9619e8b629740d5fa (patch)
treef0f9485f68c17ae31d6477c79648f84cf4feb594 /src/usr/diag/prdf
parentfeef0389e6acfe3d57674564212df6b0760a4e0b (diff)
downloadtalos-hostboot-bfe56382ff15dd94196a45f9619e8b629740d5fa.tar.gz
talos-hostboot-bfe56382ff15dd94196a45f9619e8b629740d5fa.zip
PRD: NVDIMM keep log hidden if no error found during analysis
Change-Id: I56d3444b2b9a87e34ce03da9a7a805cdc629d573 CQ: SW476229 Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/83930 Reviewed-by: Paul Greenwood <paul.greenwood@ibm.com> Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Brian J Stegmiller <bjs@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Benjamen G Tyner <ben.tyner@ibm.com> Reviewed-by: Zane C Shelley <zshelle@us.ibm.com>
Diffstat (limited to 'src/usr/diag/prdf')
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H1
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfP9Mca.C56
2 files changed, 39 insertions, 18 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H b/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H
index 7f078957b..05cf5d0d8 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H
@@ -111,6 +111,7 @@ PRDR_ERROR_SIGNATURE(EsTmpWarnLow, 0xffff0098, "", "NVDIMM Energy Source Temper
PRDR_ERROR_SIGNATURE(BelowWarnTh, 0xffff0099, "", "NVDIMM Below Warning Threshold");
PRDR_ERROR_SIGNATURE(IntNvdimmErr, 0xffff009A, "", "NVDIMM Intermittent error");
PRDR_ERROR_SIGNATURE(NotifStatErr, 0xffff009B, "", "NVDIMM Set Event Notification Status Error");
+PRDR_ERROR_SIGNATURE(FirEvntGone, 0xffff009C, "", "NVDIMM Event Triggering the FIR no longer present");
#endif // __prdfMemExtraSig_H
diff --git a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C
index c7a16e60f..19e90c01b 100644
--- a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C
+++ b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C
@@ -1072,9 +1072,6 @@ uint32_t __analyzeWarningThrStatusReg(STEP_CODE_DATA_STRUCT & io_sc,
// BIT 2: ES_TEMP_WARNING
if ( bitList.count(2) )
{
- // Make the log predictive and mask the FIR.
- io_sc.service_data->SetThresholdMaskId(0);
-
// Read the ES_TEMP and ES_TEMP_WARNING_HIGH_THRESHOLD values
uint16_t msbEsTempReg = NVDIMM::i2cReg::ES_TEMP1;
uint16_t lsbEsTempReg = NVDIMM::i2cReg::ES_TEMP0;
@@ -1116,6 +1113,9 @@ uint32_t __analyzeWarningThrStatusReg(STEP_CODE_DATA_STRUCT & io_sc,
// Callout NVDIMM low, no gard
io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD );
+ // Make the log predictive and mask the FIR.
+ io_sc.service_data->SetThresholdMaskId(0);
+
// Send message to PHYP that save/restore may work
o_rc = PlatServices::nvdimmNotifyProtChange( i_dimm,
NVDIMM::NVDIMM_RISKY_HW_ERROR );
@@ -1126,9 +1126,6 @@ uint32_t __analyzeWarningThrStatusReg(STEP_CODE_DATA_STRUCT & io_sc,
// BIT 0: NVM_LIFETIME_WARNING
if ( bitList.count(0) )
{
- // Make the log predictive, but do not mask the FIR
- io_sc.service_data->setServiceCall();
-
// Adjust warning threshold.
uint16_t warnThReg = NVDIMM::i2cReg::NVM_LIFETIME_WARNING_THRESHOLD;
uint16_t errThReg = NVDIMM::i2cReg::NVM_LIFETIME_ERROR_THRESHOLD;
@@ -1138,6 +1135,9 @@ uint32_t __analyzeWarningThrStatusReg(STEP_CODE_DATA_STRUCT & io_sc,
firstWarn, statusErr );
if ( SUCCESS != o_rc ) break;
+ // Make the log predictive, but do not mask the FIR
+ io_sc.service_data->setServiceCall();
+
// If we got a set event notification status error, add the
// signature for that before adding the signature for the warning.
// Also do not take our normal callout action since we already will
@@ -1174,9 +1174,6 @@ uint32_t __analyzeWarningThrStatusReg(STEP_CODE_DATA_STRUCT & io_sc,
// BIT 1: ES_LIFETIME_WARNING
if ( bitList.count(1) )
{
- // Make the log predictive, but do not mask the FIR
- io_sc.service_data->setServiceCall();
-
// Adjust warning threshold.
uint16_t warnThReg = NVDIMM::i2cReg::ES_LIFETIME_WARNING_THRESHOLD;
uint16_t errThReg = NVDIMM::i2cReg::ES_LIFETIME_ERROR_THRESHOLD;
@@ -1186,6 +1183,9 @@ uint32_t __analyzeWarningThrStatusReg(STEP_CODE_DATA_STRUCT & io_sc,
firstWarn, statusErr );
if ( SUCCESS != o_rc ) break;
+ // Make the log predictive, but do not mask the FIR
+ io_sc.service_data->setServiceCall();
+
// If we got a set event notification status error, add the
// signature for that before adding the signature for the warning.
// Also do not take our normal callout action since we already will
@@ -1342,6 +1342,25 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip,
// BIT 0: Persistency Lost
if ( bitList.count(0) )
{
+ // Analyze Health Status0 Reg, Health Status1 Reg,
+ // and Error Theshold Status Reg
+ l_rc = __analyzeHealthStatus0Reg( io_sc, dimm, errFound );
+ if ( SUCCESS != l_rc ) continue;
+ l_rc = __analyzeHealthStatus1Reg( io_sc, dimm, errFound );
+ if ( SUCCESS != l_rc ) continue;
+ l_rc = __analyzeErrorThrStatusReg( io_sc, dimm, errFound );
+ if ( SUCCESS != l_rc ) continue;
+
+ // If we didn't find any error, then keep the log hidden.
+ if ( !errFound )
+ {
+ io_sc.service_data->setSignature( i_chip->getHuid(),
+ PRDFSIG_FirEvntGone );
+ // Callout NVDIMM
+ io_sc.service_data->SetCallout( dimm, MRU_MED, NO_GARD );
+ continue;
+ }
+
// EVENT_N cannot be retriggered on a new PERSISTENCY_LOST_ERROR
// if a previous PERSISTENCY_LOST_ERROR still exists. Meaning, we
// cannot detect/report multiple errors that happen at different
@@ -1351,23 +1370,24 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip,
// Send message to PHYP that save/restore may work
l_rc = PlatServices::nvdimmNotifyProtChange( dimm,
- NVDIMM::NVDIMM_RISKY_HW_ERROR );
+ NVDIMM::NVDIMM_RISKY_HW_ERROR );
if ( SUCCESS != l_rc ) continue;
- // Analyze Health Status0 Reg, Health Status1 Reg,
- // and Error Theshold Status Reg
- l_rc = __analyzeHealthStatus0Reg( io_sc, dimm, errFound );
- if ( SUCCESS != l_rc ) continue;
- l_rc = __analyzeHealthStatus1Reg( io_sc, dimm, errFound );
- if ( SUCCESS != l_rc ) continue;
- l_rc = __analyzeErrorThrStatusReg( io_sc, dimm, errFound );
- if ( SUCCESS != l_rc ) continue;
}
// BIT 1: Warning Threshold Exceeded
else if ( bitList.count(1) )
{
l_rc = __analyzeWarningThrStatusReg( io_sc, dimm, errFound );
if ( SUCCESS != l_rc ) continue;
+
+ if ( !errFound )
+ {
+ io_sc.service_data->setSignature( i_chip->getHuid(),
+ PRDFSIG_FirEvntGone );
+ // Callout NVDIMM
+ io_sc.service_data->SetCallout( dimm, MRU_MED, NO_GARD );
+ continue;
+ }
}
// BIT 2: Persistency Restored
else if ( bitList.count(2) )
OpenPOWER on IntegriCloud