diff options
author | Zane Shelley <zshelle@us.ibm.com> | 2018-08-07 16:32:46 -0500 |
---|---|---|
committer | Zane C. Shelley <zshelle@us.ibm.com> | 2018-08-15 22:04:47 -0500 |
commit | 4f0f9f1534a110cbd369e7ee3f57ce7cfd6719b8 (patch) | |
tree | 8642fc1b8625ff9aa38e203787591796b07996af /src/usr/diag/prdf | |
parent | 04712b91e355a07c3b354c3632f7efc6fe8284ef (diff) | |
download | talos-hostboot-4f0f9f1534a110cbd369e7ee3f57ce7cfd6719b8.tar.gz talos-hostboot-4f0f9f1534a110cbd369e7ee3f57ce7cfd6719b8.zip |
PRD: resume super fast read support for Row Repair
Change-Id: I7735becb3a6b8caf5d52d3ce9be7e6b1f50b4f21
RTC: 196073
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/64072
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/64556
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr/diag/prdf')
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices.C | 98 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices.H | 9 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices_ipl.C | 163 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices_ipl.H | 23 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices_rt.C | 78 |
5 files changed, 300 insertions, 71 deletions
diff --git a/src/usr/diag/prdf/plat/prdfPlatServices.C b/src/usr/diag/prdf/plat/prdfPlatServices.C index ad742e30d..96729796b 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices.C @@ -37,6 +37,7 @@ #include <prdfErrlUtil.H> #include <prdfTrace.H> #include <prdfAssert.h> +#include <prdfRegisterCache.H> #include <prdfCenMbaDataBundle.H> #include <prdfMemScrubUtils.H> @@ -927,6 +928,103 @@ uint32_t startTdScrub<TYPE_MBA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +//------------------------------------------------------------------------------ + +template<> +uint32_t incMaintAddr<TYPE_MBA>( ExtensibleChip * i_chip, + MemAddr & o_addr ) +{ + #define PRDF_FUNC "[PlatServices::incMaintAddr<TYPE_MBA>] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_MBA == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + fapi2::Target<fapi2::TARGET_TYPE_MBA> fapiTrgt ( i_chip->getTrgt() ); + errlHndl_t errl = nullptr; + + do + { + // Manually clear the CE counters based on the error type and clear the + // maintenance FIRs. Note that we only want to clear counters that are + // at attention to allow the other CE types the opportunity to reach + // threshold, if possible. + o_rc = conditionallyClearEccCounters<TYPE_MBA>( i_chip ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "conditionallyClearEccCounters(0x%08x) failed", + i_chip->getHuid() ); + break; + } + + o_rc = clearEccFirs<TYPE_MBA>( i_chip ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "clearEccFirs(0x%08x) failed", + i_chip->getHuid() ); + break; + } + + o_rc = clearCmdCompleteAttn<TYPE_MBA>( i_chip ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "clearCmdCompleteAttn(0x%08x) failed", + i_chip->getHuid() ); + break; + } + + // Increment the current maintenance address. + mss_IncrementAddress incCmd { fapiTrgt }; + FAPI_INVOKE_HWP( errl, incCmd.setupAndExecuteCmd ); + if ( nullptr != errl ) + { + PRDF_ERR( PRDF_FUNC "mss_IncrementAddress setupAndExecuteCmd() on " + "0x%08x failed", i_chip->getHuid() ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; + break; + } + + // Clear the maintenance FIRs again. This time do not clear the CE + // counters. + o_rc = clearEccFirs<TYPE_MBA>( i_chip ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "clearEccFirs(0x%08x) failed", + i_chip->getHuid() ); + break; + } + + o_rc = clearCmdCompleteAttn<TYPE_MBA>( i_chip ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "clearCmdCompleteAttn(0x%08x) failed", + i_chip->getHuid() ); + break; + } + + // The address register has been updated so we need to clear our cache + // to ensure we can do a new read. + SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister( "MBMACA" ); + RegDataCache::getCachedRegisters().flush( i_chip, reg ); + + // Read the new start address from hardware. + o_rc = getMemMaintAddr<TYPE_MBA>( i_chip, o_addr ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed", + i_chip->getHuid() ); + break; + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + //############################################################################## //## Core/cache trace array functions //############################################################################## diff --git a/src/usr/diag/prdf/plat/prdfPlatServices.H b/src/usr/diag/prdf/plat/prdfPlatServices.H index bd20bd239..28dd3d536 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices.H +++ b/src/usr/diag/prdf/plat/prdfPlatServices.H @@ -186,6 +186,15 @@ template<TARGETING::TYPE TT, typename SCT> uint32_t startTdScrub( ExtensibleChip * i_chip, const MemRank & i_rank, AddrRangeType i_rangeType, SCT i_stopCond ); +/** + * @brief Increment the current maintenance address. + * @param i_chip MBA chip. + * @param o_newAddr The new address from the maintenance address register. + * @return Non-SUCCESS if an internal function fails, otherwise SUCCESS. + */ +template<TARGETING::TYPE T> +uint32_t incMaintAddr( ExtensibleChip * i_chip, MemAddr & o_newAddr ); + //############################################################################## //## Core/cache trace array functions //############################################################################## diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C index d3c0729a9..e8a22e1af 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C @@ -558,6 +558,83 @@ uint32_t startTdSfRead<TYPE_MBA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ template<> +uint32_t resumeTdSfRead<TYPE_MBA>( ExtensibleChip * i_chip, + AddrRangeType i_rangeType, + uint32_t i_stopCond ) +{ + #define PRDF_FUNC "[PlatServices::startTdSfRead<TYPE_MBA>] " + + PRDF_ASSERT( isInMdiaMode() ); // MDIA must be running. + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_MBA == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + // Make sure there is a command complete attention when the command stops. + i_stopCond |= mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION; + + // Make sure the command stops on the end address if there are no errors. + i_stopCond |= mss_MaintCmd::STOP_ON_END_ADDRESS; + + fapi2::Target<fapi2::TARGET_TYPE_MBA> fapiTrgt ( i_chip->getTrgt() ); + errlHndl_t errl = nullptr; + + do + { + // Increment the address that is currently in hardware. Note that the CE + // counters will be conditionally cleared. Also, all of the appropriate + // attentions will be cleared as well. + MemAddr memSaddr; + o_rc = incMaintAddr<TYPE_MBA>( i_chip, memSaddr ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "incMaintAddr(0x%08x) failed", + i_chip->getHuid() ); + break; + } + fapi2::buffer<uint64_t> saddr = memSaddr.toMaintAddr<TYPE_MBA>(); + + // Get the address range of the given rank. + fapi2::buffer<uint64_t> junk, eaddr; + MemRank rank = memSaddr.getRank(); + o_rc = getMemAddrRange<TYPE_MBA>( i_chip, rank, junk, eaddr, + i_rangeType ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getMemAddrRange(0x%08x,0x%2x) failed", + i_chip->getHuid(), rank.getKey() ); + break; + } + + // Create the new command. Store a pointer to the command in the MBA + // data bundle so that we can call the cleanup function after the + // command has completed. + MbaDataBundle * db = getMbaDataBundle( i_chip ); + PRDF_ASSERT( nullptr == db->iv_sfCmd ); // Code bug. + db->iv_sfCmd = new mss_SuperFastRead { fapiTrgt, saddr, eaddr, + i_stopCond, false }; + + // Start the super fast read command. + FAPI_INVOKE_HWP( errl, db->iv_sfCmd->setupAndExecuteCmd ); + if ( nullptr != errl ) + { + PRDF_ERR( PRDF_FUNC "setupAndExecuteCmd() on 0x%08x,0x%02x failed", + i_chip->getHuid(), rank.getKey() ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; break; + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<> uint32_t startTdSteerCleanup<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, AddrRangeType i_rangeType, @@ -616,7 +693,7 @@ uint32_t startTdSteerCleanup<TYPE_MBA>( ExtensibleChip * i_chip, // Get the MBA fapi target. fapi2::Target<fapi2::TARGET_TYPE_MBA> fapiTrgt ( i_chip->getTrgt() ); - // Start the background scrub command. + // Start the steer cleanup command. mss_TimeBaseSteerCleanup cmd { fapiTrgt, saddr, eaddr, cmdSpeed, i_stopCond, false }; errlHndl_t errl = nullptr; @@ -638,6 +715,90 @@ uint32_t startTdSteerCleanup<TYPE_MBA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ +template<> +uint32_t resumeTdSteerCleanup<TYPE_MBA>( ExtensibleChip * i_chip, + AddrRangeType i_rangeType, + uint32_t i_stopCond ) +{ + #define PRDF_FUNC "[PlatServices::startTdSteerCleanup<TYPE_MBA>] " + + PRDF_ASSERT( isInMdiaMode() ); // MDIA must be running. + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_MBA == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + // Make sure there is a command complete attention when the command stops. + i_stopCond |= mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION; + + // Make sure the command stops on the end address if there are no errors. + i_stopCond |= mss_MaintCmd::STOP_ON_END_ADDRESS; + + // Default speed is to run as fast as possible. + mss_MaintCmd::TimeBaseSpeed cmdSpeed = mss_MaintCmd::FAST_MAX_BW_IMPACT; + + // IUEs (reported via RCE ETE) are reported as UEs during read operations. + // Therefore, we will treat IUEs like UEs for scrub operations simply to + // maintain consistency during all of Memory Diagnostics. Note that since we + // set the stop on RCE ETE flag, this requires a threshold in the MBSTR. + // Fortunately, MDIA sets the threshold to 1 when it starts the first + // command on this MBA and that threshold should never change throughout all + // of Memory Diagnostics. + + i_stopCond |= mss_MaintCmd::STOP_ON_RETRY_CE_ETE; + + fapi2::Target<fapi2::TARGET_TYPE_MBA> fapiTrgt ( i_chip->getTrgt() ); + errlHndl_t errl = nullptr; + + do + { + // Increment the address that is currently in hardware. Note that the CE + // counters will be conditionally cleared. Also, all of the appropriate + // attentions will be cleared as well. + MemAddr newAddr; + o_rc = incMaintAddr<TYPE_MBA>( i_chip, newAddr ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "incMaintAddr(0x%08x) failed", + i_chip->getHuid() ); + break; + } + fapi2::buffer<uint64_t> saddr = newAddr.toMaintAddr<TYPE_MBA>(); + + // Get the address range of the given rank. + fapi2::buffer<uint64_t> junk, eaddr; + MemRank rank = newAddr.getRank(); + o_rc = getMemAddrRange<TYPE_MBA>( i_chip, rank, junk, eaddr, + i_rangeType ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getMemAddrRange(0x%08x,0x%2x) failed", + i_chip->getHuid(), rank.getKey() ); + break; + } + + // Start the steer cleanup command. + mss_TimeBaseSteerCleanup cmd { fapiTrgt, saddr, eaddr, cmdSpeed, + i_stopCond, false }; + FAPI_INVOKE_HWP( errl, cmd.setupAndExecuteCmd ); + if ( nullptr != errl ) + { + PRDF_ERR( PRDF_FUNC "setupAndExecuteCmd() on 0x%08x,0x%02x failed", + i_chip->getHuid(), rank.getKey() ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; break; + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + } // end namespace PlatServices } // end namespace PRDF diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.H b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.H index 6bf884795..90724f3a3 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.H +++ b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.H @@ -145,6 +145,17 @@ uint32_t startTdSfRead( ExtensibleChip * i_chip, const MemRank & i_rank, AddrRangeType i_rangeType, VT i_stopCond ); /** + * @brief Resumes TD super fast read after it has paused on error. + * @param i_chip MBA chip. + * @param i_rangeType See enum AddrRangeType. + * @param i_stopCond The stop conditions for the targeted scrub. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ +template<TARGETING::TYPE T, typename SCT> +uint32_t resumeTdSfRead( ExtensibleChip * i_chip, AddrRangeType i_rangeType, + SCT i_stopCond ); + +/** * @brief Starts a steer cleanup scrub command on the target rank. * @param i_chip MBA chip. * @param i_rank Target rank. @@ -156,6 +167,18 @@ template<TARGETING::TYPE TT, typename VT> uint32_t startTdSteerCleanup( ExtensibleChip * i_chip, const MemRank & i_rank, AddrRangeType i_rangeType, VT i_stopCond ); +/** + * @brief Resumes TD steer cleanup after it has paused on error. + * @param i_chip MBA chip. + * @param i_rangeType See enum AddrRangeType. + * @param i_stopCond The stop conditions for the targeted scrub. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ +template<TARGETING::TYPE T, typename SCT> +uint32_t resumeTdSteerCleanup( ExtensibleChip * i_chip, + AddrRangeType i_rangeType, + SCT i_stopCond ); + } // end namespace PlatServices } // end namespace PRDF diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C index eb2db3e78..e04eff9c5 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C @@ -34,7 +34,6 @@ // Framework includes #include <prdfErrlUtil.H> #include <prdfTrace.H> -#include <prdfRegisterCache.H> // Platform includes #include <prdfCenMbaDataBundle.H> @@ -290,83 +289,22 @@ uint32_t __resumeScrub<TYPE_MBA>( ExtensibleChip * i_chip, do { - // Manually clear the CE counters based on the error type and clear the - // maintenance FIRs. Note that we only want to clear counters that are - // at attention to allow the other CE types the opportunity to reach - // threshold, if possible. - o_rc = conditionallyClearEccCounters<TYPE_MBA>( i_chip ); + // Increment the address that is currently in hardware. Note that the CE + // counters will be conditionally cleared. Also, all of the appropriate + // attentions will be cleared as well. + MemAddr memSaddr; + o_rc = incMaintAddr<TYPE_MBA>( i_chip, memSaddr ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "conditionallyClearEccCounters(0x%08x) failed", + PRDF_ERR( PRDF_FUNC "incMaintAddr(0x%08x) failed", i_chip->getHuid() ); break; } - - o_rc = clearEccFirs<TYPE_MBA>( i_chip ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "clearEccFirs(0x%08x) failed", - i_chip->getHuid() ); - break; - } - - o_rc = clearCmdCompleteAttn<TYPE_MBA>( i_chip ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "clearCmdCompleteAttn(0x%08x) failed", - i_chip->getHuid() ); - break; - } - - // Increment the current maintenance address. - mss_IncrementAddress incCmd { fapiTrgt }; - FAPI_INVOKE_HWP( errl, incCmd.setupAndExecuteCmd ); - if ( nullptr != errl ) - { - PRDF_ERR( PRDF_FUNC "mss_IncrementAddress setupAndExecuteCmd() on " - "0x%08x failed", i_chip->getHuid() ); - PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); - o_rc = FAIL; - break; - } - - // Clear the maintenance FIRs again. This time do not clear the CE - // counters. - o_rc = clearEccFirs<TYPE_MBA>( i_chip ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "clearEccFirs(0x%08x) failed", - i_chip->getHuid() ); - break; - } - - o_rc = clearCmdCompleteAttn<TYPE_MBA>( i_chip ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "clearCmdCompleteAttn(0x%08x) failed", - i_chip->getHuid() ); - break; - } - - // The address register has been updated so we need to clear our cache - // to ensure we can do a new read. - SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister( "MBMACA" ); - RegDataCache::getCachedRegisters().flush( i_chip, reg ); - - // Read the new start address from hardware. - MemAddr addr; - o_rc = getMemMaintAddr<TYPE_MBA>( i_chip, addr ); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed", - i_chip->getHuid() ); - break; - } - fapi2::buffer<uint64_t> saddr = addr.toMaintAddr<TYPE_MBA>(); + fapi2::buffer<uint64_t> saddr = memSaddr.toMaintAddr<TYPE_MBA>(); // Get the end address of the current rank. fapi2::buffer<uint64_t> eaddr, junk; - MemRank rank = addr.getRank(); + MemRank rank = memSaddr.getRank(); o_rc = getMemAddrRange<TYPE_MBA>( i_chip, rank, junk, eaddr, i_rangeType ); if ( SUCCESS != o_rc ) |