summaryrefslogtreecommitdiffstats
path: root/src/usr/diag/prdf
diff options
context:
space:
mode:
authorCaleb Palmer <cnpalmer@us.ibm.com>2017-03-29 15:44:39 -0500
committerZane C. Shelley <zshelle@us.ibm.com>2017-04-10 12:05:03 -0400
commit4a0692811d82840b2c6eed9c892c9a866586b989 (patch)
tree36821da007432dd8b7b22c04c2899165b3dced2c /src/usr/diag/prdf
parent6e5d170315054ed04c7a3ba7144990fe3860e882 (diff)
downloadtalos-hostboot-4a0692811d82840b2c6eed9c892c9a866586b989.tar.gz
talos-hostboot-4a0692811d82840b2c6eed9c892c9a866586b989.zip
PRD: Add support for memory IMPE analysis
Change-Id: Icb2e6b8f4c87c38a47f46ecb5d97d36533296ba9 RTC: 165384 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/38902 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/39002 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr/diag/prdf')
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C107
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H9
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/mem/prdfMemThresholds.C21
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/mem/prdfMemThresholds.H5
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfP9McaDataBundle.H6
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C15
-rw-r--r--src/usr/diag/prdf/common/plat/p9/p9_mca.rule4
-rw-r--r--src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule4
-rw-r--r--src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule7
9 files changed, 173 insertions, 5 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C
index d2746f138..491b1f63b 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C
@@ -832,6 +832,113 @@ uint32_t analyzeMaintIue<TYPE_MCA, McaDataBundle*>(ExtensibleChip * i_chip,
//------------------------------------------------------------------------------
+template<TARGETING::TYPE T, typename D>
+uint32_t analyzeImpe( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc )
+{
+
+ #define PRDF_FUNC "[MemEcc::analyzeImpe] "
+
+ PRDF_ASSERT( T == i_chip->getType() );
+
+ uint32_t o_rc = SUCCESS;
+
+ #ifdef __HOSTBOOT_MODULE
+
+ do
+ {
+ // get data bundle from chip
+ D db = static_cast<D>( i_chip->getDataBundle() );
+
+ // get the mark shadow register
+ SCAN_COMM_REGISTER_CLASS * msr = i_chip->getRegister("MSR");
+
+ o_rc = msr->Read();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "Read() failed on MSR: i_chip=0x%08x",
+ i_chip->getHuid() );
+ break;
+ }
+
+ TargetHandle_t trgt = i_chip->getTrgt();
+
+ // get galois field code - bits 8:15 of MSR
+ uint8_t galois = msr->GetBitFieldJustified( 8, 8 );
+
+ // get rank - bits 16:18 of MSR
+ uint8_t mrnk = msr->GetBitFieldJustified( 16, 3 );
+ MemRank rank( mrnk );
+
+ // get symbol and DRAM
+ MemSymbol symbol = MemSymbol::fromGalois( trgt, rank, galois );
+ uint8_t dram = symbol.getDram();
+
+ // Add the DIMM to the callout list
+ MemoryMru memmru( trgt, rank, MemoryMruData::CALLOUT_RANK );
+ io_sc.service_data->SetCallout( memmru );
+
+ // if at any point there is more than one dram reporting an IMPE on a
+ // rank within the timebase of the threshold we make the error log
+ // predictive
+
+ // clear our vector of drams if the threshold time has elapsed
+ if ( db->iv_impeThMap[rank].timeElapsed(io_sc) )
+ {
+ db->iv_impeDramMap[rank].clear();
+ }
+
+ // if this DRAM hasn't already reported an IMPE on this rank
+ if ( std::find( db->iv_impeDramMap[rank].begin(),
+ db->iv_impeDramMap[rank].end(), dram ) ==
+ db->iv_impeDramMap[rank].end() )
+ {
+ // if there is another DRAM reporting an IMPE on this rank as well
+ if ( 0 != db->iv_impeDramMap[rank].size() )
+ {
+ // Make the error log predictive
+ io_sc.service_data->setServiceCall();
+ }
+
+ // add the DRAM to the map
+ db->iv_impeDramMap[rank].push_back( dram );
+ }
+
+ // Initialize threshold if it doesn't exist yet
+ if ( 0 == db->iv_impeThMap.count(rank) )
+ {
+ db->iv_impeThMap[rank] = TimeBasedThreshold( getImpeTh() );
+ }
+
+ // increment count for the given rank - check if at threshold
+ if ( db->iv_impeThMap[rank].inc(io_sc) )
+ {
+ // place a chip mark on the failing DRAM
+ MemMark chipMark( trgt, rank, galois );
+ o_rc = MarkStore::writeChipMark<T>( i_chip, rank, chipMark );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "MarkStore::writeChipMark(0x%08x,m%ds%d) "
+ "failed", i_chip->getHuid(), rank.getMaster(),
+ rank.getSlave() );
+ break;
+ }
+ }
+
+ }while(0);
+
+ #endif
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+template
+uint32_t analyzeImpe<TYPE_MCA, McaDataBundle*>( ExtensibleChip * i_chip,
+ STEP_CODE_DATA_STRUCT & io_sc );
+
+//------------------------------------------------------------------------------
+
} // end namespace MemEcc
} // end namespace PRDF
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H
index 4855abeb6..1ddc31d55 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H
@@ -120,6 +120,15 @@ template<TARGETING::TYPE T, typename D>
uint32_t analyzeMaintIue( ExtensibleChip * i_chip,
STEP_CODE_DATA_STRUCT & io_sc );
+/**
+ * @brief Analyzes a maint or mainline IMPE attention.
+ * @param i_chip MCA or MBA.
+ * @param io_sc The step code data struct.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
+ */
+template<TARGETING::TYPE T, typename D>
+uint32_t analyzeImpe( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc );
+
#ifdef __HOSTBOOT_RUNTIME
/**
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.C b/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.C
index 136635d94..e0cc1790f 100755
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.C
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.C
@@ -46,6 +46,7 @@ using namespace PlatServices;
enum DefaultThresholds
{
MCA_RCD_PARITY_NON_MNFG_TH = 32, ///< Non-MNFG RCD parity error TH
+ MCA_IMPE_NON_MNFG_TH = 32, ///< Non-MNFG IMPE TH
MCA_IUE_NON_MNFG_TH = 8, ///< Non-MNFG IUE TH
MBA_RCE_NON_MNFG_TH = 8, ///< Non-MNFG RCE TH
MBA_SCRUB_CE_NON_MNFG_TH = 80, ///< Non-MNFG Scrub soft/inter CE TH
@@ -114,6 +115,26 @@ ThresholdResolution::ThresholdPolicy getIueTh()
//------------------------------------------------------------------------------
+#ifdef __HOSTBOOT_MODULE
+
+ThresholdResolution::ThresholdPolicy getImpeTh()
+{
+ uint32_t th = MCA_IMPE_NON_MNFG_TH;
+
+ if ( mfgMode() )
+ {
+ th = MfgThresholdMgr::getInstance()->
+ getThreshold( ATTR_MNFG_TH_MEMORY_IMPES );
+ }
+
+ return ThresholdResolution::ThresholdPolicy( th,
+ ThresholdResolution::ONE_DAY );
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+
ThresholdResolution::ThresholdPolicy getRceThreshold()
{
uint32_t th = MBA_RCE_NON_MNFG_TH;
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.H b/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.H
index 35acf95cf..1c3769ea1 100755
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.H
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.H
@@ -65,6 +65,11 @@ ThresholdResolution::ThresholdPolicy getRcdParityTh();
*/
ThresholdResolution::ThresholdPolicy getIueTh();
+/**
+ * @brief Returns IMPE threshold policy.
+ */
+ThresholdResolution::ThresholdPolicy getImpeTh();
+
#endif
/**
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfP9McaDataBundle.H b/src/usr/diag/prdf/common/plat/mem/prdfP9McaDataBundle.H
index 9dd9f632a..ed505c1d7 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfP9McaDataBundle.H
+++ b/src/usr/diag/prdf/common/plat/mem/prdfP9McaDataBundle.H
@@ -123,6 +123,12 @@ class McaDataBundle : public DataBundle
/** Threshold table for IUEs. Threshold per DIMM */
std::map<uint8_t, TimeBasedThreshold> iv_iueTh;
+ /** Threshold table for IMPEs. Threshold per rank */
+ std::map<MemRank, TimeBasedThreshold> iv_impeThMap;
+
+ /** Map to keep track of which DRAMs on which ranks have reported IMPEs */
+ std::map<MemRank, std::vector<uint8_t>> iv_impeDramMap;
+
#endif
};
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C b/src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C
index 6a064bf87..cac225d23 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C
+++ b/src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C
@@ -257,6 +257,21 @@ PRDF_PLUGIN_DEFINE( p9_mca, AnalyzeMaintIue );
//------------------------------------------------------------------------------
+/**
+ * @brief MCAECCFIR[19,39] - Mainline and Maint IMPE
+ * @param i_chip MCA chip.
+ * @param io_sc The step code data struct.
+ * @return SUCCESS
+ */
+int32_t AnalyzeImpe( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc )
+{
+ MemEcc::analyzeImpe<TYPE_MCA, McaDataBundle *>( i_chip, io_sc );
+ return SUCCESS; // nothing to return to rule code
+}
+PRDF_PLUGIN_DEFINE( p9_mca, AnalyzeImpe );
+
+//------------------------------------------------------------------------------
+
} // end namespace p9_mca
} // end namespace PRDF
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule
index 4f54882b0..cb7b1b83b 100644
--- a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule
+++ b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule
@@ -424,7 +424,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14, 17, 37 )
/** MCAECCFIR[19]
* Mainline read IMPE
*/
- (rMCAECCFIR, bit(19)) ? mainline_impe_handling;
+ (rMCAECCFIR, bit(19)) ? impe_handling;
/** MCAECCFIR[20:27]
* Maintenance MPE
@@ -489,7 +489,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14, 17, 37 )
/** MCAECCFIR[39]
* Maintenance IMPE
*/
- (rMCAECCFIR, bit(39)) ? maintenance_impe_handling;
+ (rMCAECCFIR, bit(39)) ? impe_handling;
/** MCAECCFIR[40]
* spare
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule
index d3ebc436e..95d591c49 100644
--- a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule
+++ b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule
@@ -85,8 +85,6 @@ actionclass mainline_iue_handling
funccall("AnalyzeMainlineIue");
};
-actionclass mainline_impe_handling { TBDDefaultCallout; }; # TODO RTC 165384
-
/** Handle Maintenance IUEs */
actionclass maintenance_iue_handling
{
@@ -95,7 +93,7 @@ actionclass maintenance_iue_handling
funccall("AnalyzeMaintIue");
};
-actionclass maintenance_impe_handling { TBDDefaultCallout; }; # TODO RTC 165384
+actionclass impe_handling { funccall("AnalyzeImpe"); };
/** MCA/UE algroithm, threshold 5 per day */
actionclass mca_ue_algorithm_th_5perDay
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule
index bf2fd3fd1..d146517d1 100644
--- a/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule
+++ b/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule
@@ -220,3 +220,10 @@
capture group default;
};
+ register MSR
+ {
+ name "P9 Mark Shadow Register";
+ scomaddr 0x07010A0C;
+ capture group default;
+ };
+
OpenPOWER on IntegriCloud