summaryrefslogtreecommitdiffstats
path: root/src/usr/diag/prdf/plat
diff options
context:
space:
mode:
authorCaleb Palmer <cnpalmer@us.ibm.com>2017-08-07 14:31:12 -0500
committerZane C. Shelley <zshelle@us.ibm.com>2017-08-18 10:50:09 -0400
commita6109ca9369683e80c2d3e20bd3e1435c594b25f (patch)
treec8b488ae50c275638c884fc4ae9440400f8b42f2 /src/usr/diag/prdf/plat
parentacfd9f12bd91cdb7cb295f6633fe590b67c31ceb (diff)
downloadtalos-hostboot-a6109ca9369683e80c2d3e20bd3e1435c594b25f.tar.gz
talos-hostboot-a6109ca9369683e80c2d3e20bd3e1435c594b25f.zip
PRD: TPS Analyze ECC attentions
Change-Id: I2326623087bf8d8b1c6d3a4d79472837e5c5857e RTC: 171914 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/44368 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/44765 Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr/diag/prdf/plat')
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C21
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C21
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTps.H12
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C138
4 files changed, 152 insertions, 40 deletions
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C
index 40bb9df83..64719bf5f 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C
@@ -181,28 +181,13 @@ uint32_t __checkEcc( ExtensibleChip * i_chip, TdQueue & io_queue,
{
io_sc.service_data->AddSignatureList( trgt, PRDFSIG_MaintMPE );
- // Read the chip mark from markstore.
- MemMark chipMark;
- o_rc = MarkStore::readChipMark<T>( i_chip, rank, chipMark );
+ o_rc = MemEcc::handleMpe<T,D>( i_chip, rank, io_sc );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "readChipMark<T>(0x%08x,%d) failed",
- huid, rank.getMaster() );
+ PRDF_ERR( PRDF_FUNC "handleMpe<T>(0x%08x, 0x%02x) failed",
+ i_chip->getHuid(), rank.getKey() );
break;
}
-
- // If the chip mark is not valid, then somehow the chip mark was
- // placed on a rank other than the rank in which the command
- // stopped. This would most likely be a code bug.
- PRDF_ASSERT( chipMark.isValid() );
-
- // Add the mark to the callout list.
- MemoryMru mm { trgt, rank, chipMark.getSymbol() };
- io_sc.service_data->SetCallout( mm );
-
- // Add a new VCM procedure to the queue.
- TdEntry * e = new VcmEvent<T>{ i_chip, rank, chipMark };
- io_queue.push( e );
}
else if ( isMfgCeCheckingEnabled() &&
(0 != (eccAttns & MAINT_HARD_NCE_ETE)) )
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
index 069cd9ca6..8741c183d 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
@@ -664,28 +664,13 @@ uint32_t __checkEcc( ExtensibleChip * i_chip, TdQueue & io_queue,
D db = static_cast<D>(i_chip->getDataBundle());
db->iv_ueTable.addEntry( UE_TABLE::SCRUB_MPE, i_addr );
- // Read the chip mark from markstore.
- MemMark chipMark;
- o_rc = MarkStore::readChipMark<T>( i_chip, rank, chipMark );
+ o_rc = MemEcc::handleMpe<T,D>( i_chip, rank, io_sc );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "readChipMark<T>(0x%08x,%d) failed",
- huid, rank.getMaster() );
+ PRDF_ERR( PRDF_FUNC "handleMpe<T>(0x%08x, 0x%02x) failed",
+ i_chip->getHuid(), rank.getKey() );
break;
}
-
- // If the chip mark is not valid, then somehow the chip mark was
- // placed on a rank other than the rank in which the command
- // stopped. This would most likely be a code bug.
- PRDF_ASSERT( chipMark.isValid() );
-
- // Add the mark to the callout list.
- MemoryMru mm { trgt, rank, chipMark.getSymbol() };
- io_sc.service_data->SetCallout( mm );
-
- // Add a VCM procedure to the queue.
- TdEntry * e = new VcmEvent<T>{ i_chip, rank, chipMark };
- io_queue.push( e );
}
if ( 0 != (eccAttns & MAINT_RCE_ETE) )
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTps.H b/src/usr/diag/prdf/plat/mem/prdfMemTps.H
index 478cf9c47..360cb55c5 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTps.H
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTps.H
@@ -78,6 +78,18 @@ class TpsEvent : public TdEntry
*/
uint32_t analyzeTpsPhase1_rt(STEP_CODE_DATA_STRUCT & io_sc, bool & o_done);
+ /**
+ * @brief Does isolation for ECC attentions.
+ * @param i_eccAttns Mask of all currently active maintenance attentions.
+ * See enum MaintEccAttns for values.
+ * @param io_sc The step code data struct.
+ * @param o_done True if the procedure is complete or has aborted.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
+ */
+ uint32_t analyzeEcc( const uint32_t & i_eccAttns,
+ STEP_CODE_DATA_STRUCT & io_sc,
+ bool & o_done );
+
#endif // __HOSTBOOT_RUNTIME
};
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C
index d5af05261..c0172a9cd 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C
@@ -26,16 +26,21 @@
/** @file prdfMemTps_rt.C */
// Platform includes
+#include <prdfMemEccAnalysis.H>
+#include <prdfMemScrubUtils.H>
#include <prdfMemTdFalseAlarm.H>
#include <prdfMemTps.H>
#include <prdfP9McaExtraSig.H>
#include <prdfP9McaDataBundle.H>
+#include <prdfTargetServices.H>
using namespace TARGETING;
namespace PRDF
{
+using namespace PlatServices;
+
//------------------------------------------------------------------------------
template <TARGETING::TYPE T>
@@ -86,11 +91,32 @@ uint32_t TpsEvent<T>::analyzeTpsPhase1_rt( STEP_CODE_DATA_STRUCT & io_sc,
uint32_t o_rc = SUCCESS;
// TODO RTC 171914
- // Analyze Ecc Attentions
- // Analyze CEs
+ do
+ {
+ // Analyze Ecc Attentions
+ uint32_t eccAttns;
+ o_rc = checkEccFirs<T>( iv_chip, eccAttns );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "checkEccFirs(0x%08x) failed",
+ iv_chip->getHuid() );
+ break;
+ }
- o_done = true;
- PRDF_ERR( PRDF_FUNC "function not implemented yet" );
+ o_rc = analyzeEcc( eccAttns, io_sc, o_done );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "analyzeEcc() failed." );
+ break;
+ }
+ if ( o_done ) break;
+
+ // Analyze CEs
+
+ o_done = true;
+ PRDF_ERR( PRDF_FUNC "function not implemented yet" );
+
+ }while(0);
return o_rc;
@@ -99,6 +125,110 @@ uint32_t TpsEvent<T>::analyzeTpsPhase1_rt( STEP_CODE_DATA_STRUCT & io_sc,
//------------------------------------------------------------------------------
+template <>
+uint32_t TpsEvent<TYPE_MCA>::analyzeEcc( const uint32_t & i_eccAttns,
+ STEP_CODE_DATA_STRUCT & io_sc,
+ bool & o_done )
+{
+ #define PRDF_FUNC "[TpsEvent<TYPE_MCA>::analyzeEcc] "
+
+ uint32_t o_rc = SUCCESS;
+
+ do
+ {
+ // If there was a UE.
+ if ( i_eccAttns & MAINT_UE )
+ {
+ PRDF_TRAC( PRDF_FUNC "UE Detected: 0x%08x,0x%02x",
+ iv_chip->getHuid(), getKey() );
+
+ io_sc.service_data->setSignature( iv_chip->getHuid(),
+ PRDFSIG_MaintUE );
+
+ // At this point we don't actually have an address for the UE. The
+ // best we can do is get the address in which the command stopped.
+ MemAddr addr;
+ o_rc = getMemMaintAddr<TYPE_MCA>( iv_chip, addr );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed",
+ iv_chip->getHuid() );
+ break;
+ }
+
+ o_rc = MemEcc::handleMemUe<TYPE_MCA>( iv_chip, addr,
+ UE_TABLE::SCRUB_UE, io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "handleMemUe(0x%08x,0x%02x) failed",
+ iv_chip->getHuid(), getKey() );
+ break;
+ }
+
+ // Abort this procedure because additional repairs will likely
+ // not help (also avoids complication of having UE and MPE at
+ // the same time).
+ o_done = true; break;
+ }
+
+ // If there was an IUE (MNFG only).
+ if ( mfgMode() && (i_eccAttns & MAINT_IUE) )
+ {
+ io_sc.service_data->setSignature( iv_chip->getHuid(),
+ PRDFSIG_MaintIUE );
+
+ o_rc = MemEcc::handleMemIue<TYPE_MCA, McaDataBundle *>( iv_chip,
+ iv_rank,
+ io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "handleMemIue(0x%08x,0x%02x) failed",
+ iv_chip->getHuid(), getKey() );
+ break;
+ }
+
+ // If service call is set, then IUE threshold was reached.
+ if ( io_sc.service_data->queryServiceCall() )
+ {
+ PRDF_TRAC( PRDF_FUNC "IUE threshold detected: 0x%08x,0x%02x",
+ iv_chip->getHuid(), getKey() );
+
+ // Abort this procedure because port failure will be triggered
+ // after analysis is complete.
+ o_done = true; break;
+ }
+ }
+
+ // If there was an MPE.
+ if ( i_eccAttns & MAINT_MPE )
+ {
+ io_sc.service_data->setSignature( iv_chip->getHuid(),
+ PRDFSIG_MaintMPE );
+
+ o_rc = MemEcc::handleMpe<TYPE_MCA, McaDataBundle *>( iv_chip,
+ iv_rank, io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "handleMpe<T>(0x%08x, 0x%02x) failed",
+ iv_chip->getHuid(), iv_rank.getKey() );
+ break;
+ }
+
+ // Abort this procedure because the chip mark may have fixed the
+ // symbol that triggered TPS
+ o_done = true; break;
+ }
+
+ }while(0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+
+}
+
+//------------------------------------------------------------------------------
+
// TODO: RTC 171914 Actual implementation of this procedure will be done later.
template<>
uint32_t TpsEvent<TYPE_MCA>::nextStep( STEP_CODE_DATA_STRUCT & io_sc,
OpenPOWER on IntegriCloud