diff options
author | Zane Shelley <zshelle@us.ibm.com> | 2018-05-14 16:01:38 -0500 |
---|---|---|
committer | Zane C. Shelley <zshelle@us.ibm.com> | 2018-05-18 10:39:34 -0400 |
commit | c1c584f04be0bb7b1340d013b9b9d6147ffa2960 (patch) | |
tree | 11f9d6c614364cee10e96fe840d260a0f5af2dd8 /src/usr/diag/prdf/plat | |
parent | 5370984de38992719a693391a8c19444eea80f0b (diff) | |
download | talos-hostboot-c1c584f04be0bb7b1340d013b9b9d6147ffa2960.tar.gz talos-hostboot-c1c584f04be0bb7b1340d013b9b9d6147ffa2960.zip |
PRD: scrub resume counter for MBA runtime scrub commands
Change-Id: I27153ed86a3db5ab2477d8bdd9fa9b560e8d31f6
RTC: 191647
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58810
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Matt Derksen <mderkse1@us.ibm.com>
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/59011
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr/diag/prdf/plat')
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.H | 12 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices.C | 152 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices_rt.C | 75 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices_rt.H | 11 |
4 files changed, 165 insertions, 85 deletions
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.H b/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.H index 3fbc4a895..0dc884f46 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.H +++ b/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.H @@ -37,6 +37,18 @@ namespace PRDF { +/** Simple container to keep track of the number of time a scrub command has + * been resumed. */ +class ScrubResumeCounter +{ + public: + void reset() { iv_count = 0; } + void inc() { if ( 255 != iv_count ) iv_count++; } + bool atTh() const { return 16 <= iv_count; } + private: + uint8_t iv_count = 0; +}; + /** * @brief Clears the command complete and WAT workaround attentions. * @param i_chip MBA, MCA, or MCBIST. diff --git a/src/usr/diag/prdf/plat/prdfPlatServices.C b/src/usr/diag/prdf/plat/prdfPlatServices.C index fc6ca377b..c1a09b62e 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices.C @@ -38,6 +38,7 @@ #include <prdfTrace.H> #include <prdfAssert.h> +#include <prdfCenMbaDataBundle.H> #include <prdfMemScrubUtils.H> #include <iipServiceDataCollector.h> @@ -664,12 +665,80 @@ uint32_t startTdScrub<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } -//------------------------------------------------------------------------------ - //############################################################################## //## Centaur Maintenance Command wrappers //############################################################################## +template<TARGETING::TYPE T> +uint32_t __startScrub( ExtensibleChip * i_chip, const MemRank & i_rank, + AddrRangeType i_rangeType, uint32_t i_stopCond, + mss_MaintCmd::TimeBaseSpeed i_cmdSpeed ); + +template<> +uint32_t __startScrub<TYPE_MBA>( ExtensibleChip * i_chip, + const MemRank & i_rank, + AddrRangeType i_rangeType, + uint32_t i_stopCond, + mss_MaintCmd::TimeBaseSpeed i_cmdSpeed ) +{ + #define PRDF_FUNC "[PlatServices::__startScrub<TYPE_MBA>] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_MBA == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + fapi2::Target<fapi2::TARGET_TYPE_MBA> fapiTrgt ( i_chip->getTrgt() ); + errlHndl_t errl = nullptr; + + do + { + #ifdef __HOSTBOOT_RUNTIME + // Starting a new command. So clear the resume counter. + getMbaDataBundle(i_chip)->iv_scrubResumeCounter.reset(); + #endif + + // Get the address range of the given rank. + fapi2::buffer<uint64_t> saddr, eaddr; + o_rc = getMemAddrRange<TYPE_MBA>( i_chip, i_rank, saddr, eaddr, + i_rangeType ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getMemAddrRange(0x%08x,0x%2x) failed", + i_chip->getHuid(), i_rank.getKey() ); + break; + } + + // Clear all of the counters and maintenance ECC attentions. + o_rc = prepareNextCmd<TYPE_MBA>( i_chip ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "prepareNextCmd(0x%08x) failed", + i_chip->getHuid() ); + break; + } + + // Start the scrub command. + mss_TimeBaseScrub cmd { fapiTrgt, saddr, eaddr, i_cmdSpeed, + i_stopCond, false }; + FAPI_INVOKE_HWP( errl, cmd.setupAndExecuteCmd ); + if ( nullptr != errl ) + { + PRDF_ERR( PRDF_FUNC "setupAndExecuteCmd() on 0x%08x,0x%02x failed", + i_chip->getHuid(), i_rank.getKey() ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; break; + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + template<> uint32_t startBgScrub<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank ) @@ -708,17 +777,6 @@ uint32_t startBgScrub<TYPE_MBA>( ExtensibleChip * i_chip, : mss_MaintCmd::BG_SCRUB; do { - // Get the first address of the given rank. - fapi2::buffer<uint64_t> saddr, eaddr; - o_rc = getMemAddrRange<TYPE_MBA>( i_chip, i_rank, saddr, eaddr, - SLAVE_RANK ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "getMemAddrRange(0x%08x,0x%2x) failed", - i_chip->getHuid(), i_rank.getKey() ); - break; - } - // Set the required thresholds for background scrubbing. o_rc = setBgScrubThresholds<TYPE_MBA>( i_chip, i_rank ); if ( SUCCESS != o_rc ) @@ -728,27 +786,8 @@ uint32_t startBgScrub<TYPE_MBA>( ExtensibleChip * i_chip, break; } - // Clear all of the counters and maintenance ECC attentions. - o_rc = prepareNextCmd<TYPE_MBA>( i_chip ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "prepareNextCmd(0x%08x) failed", - i_chip->getHuid() ); - break; - } - - // Start the background scrub command. - mss_TimeBaseScrub cmd { fapiTrgt, saddr, eaddr, cmdSpeed, - stopCond, false }; - errlHndl_t errl = nullptr; - FAPI_INVOKE_HWP( errl, cmd.setupAndExecuteCmd ); - if ( nullptr != errl ) - { - PRDF_ERR( PRDF_FUNC "setupAndExecuteCmd() on 0x%08x,0x%02x failed", - i_chip->getHuid(), i_rank.getKey() ); - PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); - o_rc = FAIL; break; - } + o_rc = __startScrub<TYPE_MBA>( i_chip, i_rank, SLAVE_RANK, stopCond, + cmdSpeed ); } while (0); @@ -774,8 +813,6 @@ uint32_t startTdScrub<TYPE_MBA>( ExtensibleChip * i_chip, PRDF_ASSERT( nullptr != i_chip ); PRDF_ASSERT( TYPE_MBA == i_chip->getType() ); - uint32_t o_rc = SUCCESS; - // Make sure there is a command complete attention when the command stops. i_stopCond |= mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION; @@ -811,47 +848,8 @@ uint32_t startTdScrub<TYPE_MBA>( ExtensibleChip * i_chip, #endif - do - { - // Get the address range of the given rank. - fapi2::buffer<uint64_t> saddr, eaddr; - o_rc = getMemAddrRange<TYPE_MBA>( i_chip, i_rank, saddr, eaddr, - i_rangeType ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "getMemAddrRange(0x%08x,0x%2x) failed", - i_chip->getHuid(), i_rank.getKey() ); - break; - } - - // Clear all of the counters and maintenance ECC attentions. - o_rc = prepareNextCmd<TYPE_MBA>( i_chip ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "prepareNextCmd(0x%08x) failed", - i_chip->getHuid() ); - break; - } - - // Get the MBA fapi target. - fapi2::Target<fapi2::TARGET_TYPE_MBA> fapiTrgt ( i_chip->getTrgt() ); - - // Start the background scrub command. - mss_TimeBaseScrub cmd { fapiTrgt, saddr, eaddr, cmdSpeed, - i_stopCond, false }; - errlHndl_t errl = nullptr; - FAPI_INVOKE_HWP( errl, cmd.setupAndExecuteCmd ); - if ( nullptr != errl ) - { - PRDF_ERR( PRDF_FUNC "setupAndExecuteCmd() on 0x%08x,0x%02x failed", - i_chip->getHuid(), i_rank.getKey() ); - PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); - o_rc = FAIL; break; - } - - } while (0); - - return o_rc; + return __startScrub<TYPE_MBA>( i_chip, i_rank, i_rangeType, i_stopCond, + cmdSpeed ); #undef PRDF_FUNC } diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C index 0a504c5ea..52ca3ef46 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C @@ -36,6 +36,7 @@ #include <prdfTrace.H> // Platform includes +#include <prdfCenMbaDataBundle.H> #include <prdfMemScrubUtils.H> #include <prdfPlatServices.H> @@ -243,27 +244,85 @@ uint32_t stopBgScrub<TYPE_MBA>( ExtensibleChip * i_chip ) //------------------------------------------------------------------------------ +template<TARGETING::TYPE T> +uint32_t __resumeScrub( ExtensibleChip * i_chip, + AddrRangeType i_rangeType, uint32_t i_stopCond, + mss_MaintCmd::TimeBaseSpeed i_cmdSpeed ); + template<> -uint32_t resumeBgScrub<TYPE_MBA>( ExtensibleChip * i_chip ) +uint32_t __resumeScrub<TYPE_MBA>( ExtensibleChip * i_chip, + AddrRangeType i_rangeType, + uint32_t i_stopCond, + mss_MaintCmd::TimeBaseSpeed i_cmdSpeed ) { - #define PRDF_FUNC "[PlatServices::resumeBgScrub<TYPE_MBA>] " + #define PRDF_FUNC "[PlatServices::__resumeScrub<TYPE_MBA>] " PRDF_ASSERT( nullptr != i_chip ); PRDF_ASSERT( TYPE_MBA == i_chip->getType() ); - uint32_t rc = SUCCESS; + uint32_t o_rc = SUCCESS; - PRDF_ERR( PRDF_FUNC "function not implemented yet" ); + if ( getMbaDataBundle(i_chip)->iv_scrubResumeCounter.atTh() ) + { + // We have resumed scrubbing on this rank too many times. We still want + // the scrub to continue to the end of the rank, if possible, but we + // need to prevent flooding. So mask off all the CE/UE stop-on-error + // conditions. Note that there is only one chip mark per rank so we + // don't need to worry about getting flooded with those attentions. + + i_stopCond &= ~mss_MaintCmd::STOP_ON_HARD_NCE_ETE; + i_stopCond &= ~mss_MaintCmd::STOP_ON_INT_NCE_ETE; + i_stopCond &= ~mss_MaintCmd::STOP_ON_SOFT_NCE_ETE; + i_stopCond &= ~mss_MaintCmd::STOP_ON_RETRY_CE_ETE; + i_stopCond &= ~mss_MaintCmd::STOP_ON_UE; + } + + do + { + // TODO: Clear ECC counters/FIRs. Increment the current address. Clear + // FIRs again. Start the command from the current address to the + // end of the rank. - /* TODO: RTC 157888 - Not entirely sure how to do this. Will require a inc - * command followed by a start command. May need the stop conditions - * for the start command. */ + // Resume successful. So increment the resume counter. + getMbaDataBundle(i_chip)->iv_scrubResumeCounter.inc(); - return rc; + } while (0); + + return o_rc; #undef PRDF_FUNC } +//------------------------------------------------------------------------------ + +template<> +uint32_t resumeBgScrub<TYPE_MBA>( ExtensibleChip * i_chip ) +{ + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_MBA == i_chip->getType() ); + + /* TODO: + return __resumeScrub<TYPE_MBA>( i_chip, SLAVE_RANK, stopCond, cmdSpeed ); + */ + return SUCCESS; +} + +//------------------------------------------------------------------------------ + +template<> +uint32_t resumeTdScrub<TYPE_MBA>( ExtensibleChip * i_chip, + AddrRangeType i_rangeType, + uint32_t i_stopCond ) +{ + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_MBA == i_chip->getType() ); + + /* TODO: + return __resumeScrub<TYPE_MBA>( i_chip, i_rangeType, i_stopCond, cmdSpeed ); + */ + return SUCCESS; +} + //############################################################################## //## Line Delete Functions //############################################################################## diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_rt.H b/src/usr/diag/prdf/plat/prdfPlatServices_rt.H index 90b24b2c2..df7cf3156 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_rt.H +++ b/src/usr/diag/prdf/plat/prdfPlatServices_rt.H @@ -95,6 +95,17 @@ uint32_t stopBgScrub( ExtensibleChip * i_chip ); template<TARGETING::TYPE T> uint32_t resumeBgScrub( ExtensibleChip * i_chip ); +/** + * @brief Resumes TD scrubbing after it has paused on error. + * @param i_chip MBA chip. + * @param i_rangeType See enum AddrRangeType. + * @param i_stopCond The stop conditions for the targeted scrub. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ +template<TARGETING::TYPE T, typename SCT> +uint32_t resumeTdScrub( ExtensibleChip * i_chip, AddrRangeType i_rangeType, + SCT i_stopCond ); + //############################################################################## //## Line delete functions //############################################################################## |