diff options
author | Caleb Palmer <cnpalmer@us.ibm.com> | 2017-08-07 14:31:12 -0500 |
---|---|---|
committer | Zane C. Shelley <zshelle@us.ibm.com> | 2017-08-18 10:50:09 -0400 |
commit | a6109ca9369683e80c2d3e20bd3e1435c594b25f (patch) | |
tree | c8b488ae50c275638c884fc4ae9440400f8b42f2 /src/usr/diag/prdf/plat | |
parent | acfd9f12bd91cdb7cb295f6633fe590b67c31ceb (diff) | |
download | talos-hostboot-a6109ca9369683e80c2d3e20bd3e1435c594b25f.tar.gz talos-hostboot-a6109ca9369683e80c2d3e20bd3e1435c594b25f.zip |
PRD: TPS Analyze ECC attentions
Change-Id: I2326623087bf8d8b1c6d3a4d79472837e5c5857e
RTC: 171914
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/44368
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/44765
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr/diag/prdf/plat')
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C | 21 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C | 21 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfMemTps.H | 12 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C | 138 |
4 files changed, 152 insertions, 40 deletions
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C index 40bb9df83..64719bf5f 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C @@ -181,28 +181,13 @@ uint32_t __checkEcc( ExtensibleChip * i_chip, TdQueue & io_queue, { io_sc.service_data->AddSignatureList( trgt, PRDFSIG_MaintMPE ); - // Read the chip mark from markstore. - MemMark chipMark; - o_rc = MarkStore::readChipMark<T>( i_chip, rank, chipMark ); + o_rc = MemEcc::handleMpe<T,D>( i_chip, rank, io_sc ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "readChipMark<T>(0x%08x,%d) failed", - huid, rank.getMaster() ); + PRDF_ERR( PRDF_FUNC "handleMpe<T>(0x%08x, 0x%02x) failed", + i_chip->getHuid(), rank.getKey() ); break; } - - // If the chip mark is not valid, then somehow the chip mark was - // placed on a rank other than the rank in which the command - // stopped. This would most likely be a code bug. - PRDF_ASSERT( chipMark.isValid() ); - - // Add the mark to the callout list. - MemoryMru mm { trgt, rank, chipMark.getSymbol() }; - io_sc.service_data->SetCallout( mm ); - - // Add a new VCM procedure to the queue. - TdEntry * e = new VcmEvent<T>{ i_chip, rank, chipMark }; - io_queue.push( e ); } else if ( isMfgCeCheckingEnabled() && (0 != (eccAttns & MAINT_HARD_NCE_ETE)) ) diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C index 069cd9ca6..8741c183d 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C @@ -664,28 +664,13 @@ uint32_t __checkEcc( ExtensibleChip * i_chip, TdQueue & io_queue, D db = static_cast<D>(i_chip->getDataBundle()); db->iv_ueTable.addEntry( UE_TABLE::SCRUB_MPE, i_addr ); - // Read the chip mark from markstore. - MemMark chipMark; - o_rc = MarkStore::readChipMark<T>( i_chip, rank, chipMark ); + o_rc = MemEcc::handleMpe<T,D>( i_chip, rank, io_sc ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "readChipMark<T>(0x%08x,%d) failed", - huid, rank.getMaster() ); + PRDF_ERR( PRDF_FUNC "handleMpe<T>(0x%08x, 0x%02x) failed", + i_chip->getHuid(), rank.getKey() ); break; } - - // If the chip mark is not valid, then somehow the chip mark was - // placed on a rank other than the rank in which the command - // stopped. This would most likely be a code bug. - PRDF_ASSERT( chipMark.isValid() ); - - // Add the mark to the callout list. - MemoryMru mm { trgt, rank, chipMark.getSymbol() }; - io_sc.service_data->SetCallout( mm ); - - // Add a VCM procedure to the queue. - TdEntry * e = new VcmEvent<T>{ i_chip, rank, chipMark }; - io_queue.push( e ); } if ( 0 != (eccAttns & MAINT_RCE_ETE) ) diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTps.H b/src/usr/diag/prdf/plat/mem/prdfMemTps.H index 478cf9c47..360cb55c5 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTps.H +++ b/src/usr/diag/prdf/plat/mem/prdfMemTps.H @@ -78,6 +78,18 @@ class TpsEvent : public TdEntry */ uint32_t analyzeTpsPhase1_rt(STEP_CODE_DATA_STRUCT & io_sc, bool & o_done); + /** + * @brief Does isolation for ECC attentions. + * @param i_eccAttns Mask of all currently active maintenance attentions. + * See enum MaintEccAttns for values. + * @param io_sc The step code data struct. + * @param o_done True if the procedure is complete or has aborted. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + uint32_t analyzeEcc( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ); + #endif // __HOSTBOOT_RUNTIME }; diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C index d5af05261..c0172a9cd 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C @@ -26,16 +26,21 @@ /** @file prdfMemTps_rt.C */ // Platform includes +#include <prdfMemEccAnalysis.H> +#include <prdfMemScrubUtils.H> #include <prdfMemTdFalseAlarm.H> #include <prdfMemTps.H> #include <prdfP9McaExtraSig.H> #include <prdfP9McaDataBundle.H> +#include <prdfTargetServices.H> using namespace TARGETING; namespace PRDF { +using namespace PlatServices; + //------------------------------------------------------------------------------ template <TARGETING::TYPE T> @@ -86,11 +91,32 @@ uint32_t TpsEvent<T>::analyzeTpsPhase1_rt( STEP_CODE_DATA_STRUCT & io_sc, uint32_t o_rc = SUCCESS; // TODO RTC 171914 - // Analyze Ecc Attentions - // Analyze CEs + do + { + // Analyze Ecc Attentions + uint32_t eccAttns; + o_rc = checkEccFirs<T>( iv_chip, eccAttns ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "checkEccFirs(0x%08x) failed", + iv_chip->getHuid() ); + break; + } - o_done = true; - PRDF_ERR( PRDF_FUNC "function not implemented yet" ); + o_rc = analyzeEcc( eccAttns, io_sc, o_done ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "analyzeEcc() failed." ); + break; + } + if ( o_done ) break; + + // Analyze CEs + + o_done = true; + PRDF_ERR( PRDF_FUNC "function not implemented yet" ); + + }while(0); return o_rc; @@ -99,6 +125,110 @@ uint32_t TpsEvent<T>::analyzeTpsPhase1_rt( STEP_CODE_DATA_STRUCT & io_sc, //------------------------------------------------------------------------------ +template <> +uint32_t TpsEvent<TYPE_MCA>::analyzeEcc( const uint32_t & i_eccAttns, + STEP_CODE_DATA_STRUCT & io_sc, + bool & o_done ) +{ + #define PRDF_FUNC "[TpsEvent<TYPE_MCA>::analyzeEcc] " + + uint32_t o_rc = SUCCESS; + + do + { + // If there was a UE. + if ( i_eccAttns & MAINT_UE ) + { + PRDF_TRAC( PRDF_FUNC "UE Detected: 0x%08x,0x%02x", + iv_chip->getHuid(), getKey() ); + + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_MaintUE ); + + // At this point we don't actually have an address for the UE. The + // best we can do is get the address in which the command stopped. + MemAddr addr; + o_rc = getMemMaintAddr<TYPE_MCA>( iv_chip, addr ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed", + iv_chip->getHuid() ); + break; + } + + o_rc = MemEcc::handleMemUe<TYPE_MCA>( iv_chip, addr, + UE_TABLE::SCRUB_UE, io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "handleMemUe(0x%08x,0x%02x) failed", + iv_chip->getHuid(), getKey() ); + break; + } + + // Abort this procedure because additional repairs will likely + // not help (also avoids complication of having UE and MPE at + // the same time). + o_done = true; break; + } + + // If there was an IUE (MNFG only). + if ( mfgMode() && (i_eccAttns & MAINT_IUE) ) + { + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_MaintIUE ); + + o_rc = MemEcc::handleMemIue<TYPE_MCA, McaDataBundle *>( iv_chip, + iv_rank, + io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "handleMemIue(0x%08x,0x%02x) failed", + iv_chip->getHuid(), getKey() ); + break; + } + + // If service call is set, then IUE threshold was reached. + if ( io_sc.service_data->queryServiceCall() ) + { + PRDF_TRAC( PRDF_FUNC "IUE threshold detected: 0x%08x,0x%02x", + iv_chip->getHuid(), getKey() ); + + // Abort this procedure because port failure will be triggered + // after analysis is complete. + o_done = true; break; + } + } + + // If there was an MPE. + if ( i_eccAttns & MAINT_MPE ) + { + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_MaintMPE ); + + o_rc = MemEcc::handleMpe<TYPE_MCA, McaDataBundle *>( iv_chip, + iv_rank, io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "handleMpe<T>(0x%08x, 0x%02x) failed", + iv_chip->getHuid(), iv_rank.getKey() ); + break; + } + + // Abort this procedure because the chip mark may have fixed the + // symbol that triggered TPS + o_done = true; break; + } + + }while(0); + + return o_rc; + + #undef PRDF_FUNC + +} + +//------------------------------------------------------------------------------ + // TODO: RTC 171914 Actual implementation of this procedure will be done later. template<> uint32_t TpsEvent<TYPE_MCA>::nextStep( STEP_CODE_DATA_STRUCT & io_sc, |