summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorZane Shelley <zshelle@us.ibm.com>2018-04-15 20:19:44 -0500
committerZane C. Shelley <zshelle@us.ibm.com>2018-04-20 10:33:07 -0400
commiteaeec3734f39adf53bb4ee34579a8b303cd49b50 (patch)
tree44ef807e07b463a91c47c49509b40fb03a281c1a /src
parentbaa092dc02b4f8aae9f615ae500d9b0bf6d8c4b3 (diff)
downloadtalos-hostboot-eaeec3734f39adf53bb4ee34579a8b303cd49b50.tar.gz
talos-hostboot-eaeec3734f39adf53bb4ee34579a8b303cd49b50.zip
PRD: Generic TpsEvent::analyzeEccErrors() function for all targets
Change-Id: I6b71f95377bc0c5ea43684d2956af9cfeb491326 RTC: 191195 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/57237 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com> Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com> Reviewed-by: Matt Derksen <mderkse1@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/57505 CI-Ready: Zane C. Shelley <zshelle@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src')
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTps.H12
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C160
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C4
3 files changed, 116 insertions, 60 deletions
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTps.H b/src/usr/diag/prdf/plat/mem/prdfMemTps.H
index c06b759cf..22eaf5c50 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTps.H
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTps.H
@@ -69,6 +69,18 @@ class TpsEvent : public TdEntry
private: // functions
/**
+ * @brief Does analysis on any ECC errors found during the current phase.
+ * @param i_eccAttns Mask of all currently active maintenance attentions.
+ * See enum MaintEccAttns for values.
+ * @param io_sc The step code data struct.
+ * @param o_done True if the procedure is complete or has aborted.
+ * False otherwise.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
+ */
+ uint32_t analyzeEccErrors( const uint32_t & i_eccAttns,
+ STEP_CODE_DATA_STRUCT & io_sc, bool & o_done );
+
+ /**
* @brief Starts the appropriate maintenance command based on iv_phase.
* @pre iv_phase must be set appropriately before calling this function.
* @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C
index 71cda0b65..15f880e05 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C
@@ -98,66 +98,15 @@ uint32_t TpsEvent<TYPE_MCA>::nextStep( STEP_CODE_DATA_STRUCT & io_sc,
break;
}
- //if there was a UE or IUE
- if ( (eccAttns & MAINT_UE) || (eccAttns & MAINT_IUE) )
+ // Analyze the ECC errors, if needed.
+ o_rc = analyzeEccErrors( eccAttns, io_sc, o_done );
+ if ( SUCCESS != o_rc )
{
- PRDF_TRAC( PRDF_FUNC "UE Detected. Aborting this procedure." );
- //UE
- if ( eccAttns & MAINT_UE )
- {
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_MaintUE );
- }
- //IUE
- else
- {
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_MaintIUE );
- }
-
- // At this point we don't actually have an address for the UE.
- // The best we can do is get the address in which the command
- // stopped.
- MemAddr addr;
- o_rc = getMemMaintAddr<TYPE_MCA>( iv_chip, addr );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed",
- iv_chip->getHuid() );
- break;
- }
-
- // Do memory UE handling.
- o_rc = MemEcc::handleMemUe<TYPE_MCA>(iv_chip, addr,
- UE_TABLE::SCRUB_UE, io_sc);
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "handleMemUe<T>(0x%08x) failed",
- iv_chip->getHuid() );
- break;
- }
-
- //Abort this procedure
- o_done = true;
+ PRDF_ERR( PRDF_FUNC "analyzeEccErrors() failed" );
+ break;
}
- //else if there was an MPE
- else if ( eccAttns & MAINT_MPE )
- {
- // Do memory MPE handling.
- o_rc = MemEcc::handleMpe<TYPE_MCA>( iv_chip, iv_rank,
- UE_TABLE::SCRUB_MPE, io_sc);
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "handleMpe(0x%08x,0x%02x) failed",
- iv_chip->getHuid(), getKey() );
- break;
- }
+ if ( o_done ) break; // abort the procedure.
- //Abort this procedure
- o_done = true;
- }
- else
- {
//Add the rank to the callout list
MemoryMru memmru(iv_chip->getTrgt(), iv_rank,
MemoryMruData::CALLOUT_RANK);
@@ -180,7 +129,6 @@ uint32_t TpsEvent<TYPE_MCA>::nextStep( STEP_CODE_DATA_STRUCT & io_sc,
//Abort this procedure
o_done = true;
}
- }
}
}while(0);
@@ -216,6 +164,102 @@ uint32_t TpsEvent<TYPE_MBA>::nextStep( STEP_CODE_DATA_STRUCT & io_sc,
//
//##############################################################################
+template<TARGETING::TYPE T>
+bool __iueCheck( uint32_t i_eccAttns );
+
+template<> inline
+bool __iueCheck<TYPE_MCA>( uint32_t i_eccAttns )
+{
+ return ( 0 != (i_eccAttns & MAINT_IUE) );
+}
+
+template<> inline
+bool __iueCheck<TYPE_MBA>( uint32_t i_eccAttns )
+{
+ // IUES are reported via RCE ETE on Centaur
+ return ( 0 != (i_eccAttns & MAINT_RCE_ETE) );
+}
+
+template<TARGETING::TYPE T>
+uint32_t TpsEvent<T>::analyzeEccErrors( const uint32_t & i_eccAttns,
+ STEP_CODE_DATA_STRUCT & io_sc,
+ bool & o_done )
+{
+ #define PRDF_FUNC "[TpsEvent::analyzeEccErrors] "
+
+ uint32_t o_rc = SUCCESS;
+
+ do
+ {
+ // At this point we don't actually have an address for any ECC errors.
+ // The best we can do is get the address in which the command stopped.
+ MemAddr addr;
+ o_rc = getMemMaintAddr<T>( iv_chip, addr );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed",
+ iv_chip->getHuid() );
+ break;
+ }
+
+ // IUEs are reported as UEs during read operations. Therefore, we will
+ // treat IUEs like UEs for these scrub operations simply to maintain
+ // consistency during all of Memory Diagnostics.
+ if ( (i_eccAttns & MAINT_UE) || __iueCheck<T>(i_eccAttns) )
+ {
+ PRDF_TRAC( PRDF_FUNC "UE Detected: 0x%08x,0x%02x",
+ iv_chip->getHuid(), getKey() );
+
+ // Add the signature to the multi-signature list. Also, since
+ // this will be a predictive callout, change the primary
+ // signature as well.
+ uint32_t sig = (i_eccAttns & MAINT_UE) ? PRDFSIG_MaintUE
+ : PRDFSIG_MaintIUE;
+ io_sc.service_data->AddSignatureList( iv_chip->getTrgt(), sig );
+ io_sc.service_data->setSignature( iv_chip->getHuid(), sig );
+
+ o_rc = MemEcc::handleMemUe<T>( iv_chip, addr, UE_TABLE::SCRUB_UE,
+ io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "MemEcc::handleMemUe(0x%08x,0x%02x) failed",
+ iv_chip->getHuid(), getKey() );
+ break;
+ }
+
+ // Leave the mark in place and abort this procedure.
+ o_done = true; break;
+ }
+ else if ( i_eccAttns & MAINT_MPE )
+ {
+ PRDF_TRAC( PRDF_FUNC "MPE Detected: 0x%08x,0x%02x",
+ iv_chip->getHuid(), getKey() );
+
+ io_sc.service_data->AddSignatureList( iv_chip->getTrgt(),
+ PRDFSIG_MaintMPE );
+
+ o_rc = MemEcc::handleMpe<T>( iv_chip, addr, UE_TABLE::SCRUB_MPE,
+ io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "MemEcc::handleMpe(0x%08x,0x%02x) failed",
+ iv_chip->getHuid(), getKey() );
+ break;
+ }
+
+ // Leave the mark in place and abort this procedure.
+ o_done = true; break;
+ }
+
+ } while(0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
+
template <TARGETING::TYPE T>
uint32_t TpsEvent<T>::startNextPhase( STEP_CODE_DATA_STRUCT & io_sc )
{
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C
index 2d9458815..69e8a26e6 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C
@@ -125,13 +125,13 @@ uint32_t VcmEvent<T>::cleanup( STEP_CODE_DATA_STRUCT & io_sc )
template<TARGETING::TYPE T>
bool __iueCheck( uint32_t i_eccAttns );
-template<>
+template<> inline
bool __iueCheck<TYPE_MCA>( uint32_t i_eccAttns )
{
return ( 0 != (i_eccAttns & MAINT_IUE) );
}
-template<>
+template<> inline
bool __iueCheck<TYPE_MBA>( uint32_t i_eccAttns )
{
// IUES are reported via RCE ETE on Centaur
OpenPOWER on IntegriCloud