diff options
author | Caleb Palmer <cnpalmer@us.ibm.com> | 2019-04-05 08:26:33 -0500 |
---|---|---|
committer | Zane C. Shelley <zshelle@us.ibm.com> | 2019-04-10 10:09:13 -0500 |
commit | 8941c2cd6251da7e090ef18201f6e3a0a8f5a597 (patch) | |
tree | a8896f63e6182c5134843e8997f39769d0d1991d | |
parent | 6fc227ed83cee3c349e264dd9f5104c79b78f17f (diff) | |
download | talos-hostboot-8941c2cd6251da7e090ef18201f6e3a0a8f5a597.tar.gz talos-hostboot-8941c2cd6251da7e090ef18201f6e3a0a8f5a597.zip |
PRD: NVDIMM mask EVENT_N bit on persistency lost
Change-Id: I49d56221729e9a1a5e63544527c10a0f48a81d4a
CQ: SW461975
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/75604
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Paul Greenwood <paul.greenwood@ibm.com>
Reviewed-by: Benjamen G. Tyner <ben.tyner@ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/75793
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
-rw-r--r-- | src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule | 2 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfP9Mca.C | 21 |
2 files changed, 19 insertions, 4 deletions
diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule index 09f85eef0..da3a73f82 100644 --- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule +++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule @@ -36,8 +36,8 @@ actionclass verify_chip_mark_7 { funccall("AnalyzeFetchMpe_7"); }; /** Analyze NVDIMM Health Registers */ actionclass analyzeNvdimms { - funccall("AnalyzeNvdimmHealthStatRegs"); threshold32pday; + funccall("AnalyzeNvdimmHealthStatRegs"); }; /** Mainline NCE/TCE handling */ diff --git a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C index 1722314f1..5f7efa274 100644 --- a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C +++ b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C @@ -775,8 +775,12 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip, // BIT 0: Persistency Lost if ( bitList.count(0) ) { - // Make the log predictive - io_sc.service_data->setServiceCall(); + // EVENT_N cannot be retriggered on a new PERSISTENCY_LOST_ERROR + // if a previous PERSISTENCY_LOST_ERROR still exists. Meaning, we + // cannot detect/report multiple errors that happen at different + // points in time. As such, mask the EVENT_N bit here (MCACALFIR[8]) + // and make the log predictive. + io_sc.service_data->SetThresholdMaskId(0); // Send persistency lost message to PHYP l_rc = PlatServices::nvdimmNotifyPhypProtChange( dimm, @@ -796,8 +800,19 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip, // BIT 2: Persistency Restored if ( bitList.count(2) ) { - // hidden log + // It would be rare to have an intermittent error that comes and + // goes so fast we only see PERSISTENCY_RESTORED and not + // PERSISTENCY_LOST_ERROR. Set predictive on threshold of 32 + // per day (rule code handles the thresholding), else just keep + // as a hidden log. io_sc.service_data->AddSignatureList( dimm, PRDFSIG_NvdimmPersRes ); + + // callout NVDIMM high, cable high, BPM high, no gard + io_sc.service_data->SetCallout( dimm, MRU_HIGH, NO_GARD ); + l_rc = __addBpmCallout( dimm, HWAS::SRCI_PRIORITY_HIGH ); + if ( SUCCESS != l_rc ) continue; + l_rc = __addNvdimmCableCallout( HWAS::SRCI_PRIORITY_HIGH ); + if ( SUCCESS != l_rc ) continue; } // BIT 3: Below Warning Threshold -- ignore // BIT 4: Hardware Failure -- ignore |