diff options
Diffstat (limited to 'src/usr/diag')
-rw-r--r-- | src/usr/diag/prdf/common/framework/config/prdfMbaDomain_common.H (renamed from src/usr/diag/prdf/common/framework/config/prdfMbaDomain.H) | 12 | ||||
-rwxr-xr-x | src/usr/diag/prdf/common/framework/service/prdfPlatServices_common.C | 49 | ||||
-rwxr-xr-x | src/usr/diag/prdf/common/framework/service/prdfPlatServices_common.H | 59 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C | 111 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.H | 37 | ||||
-rw-r--r-- | src/usr/diag/prdf/framework/config/prdfMbaDomain.C | 73 | ||||
-rw-r--r-- | src/usr/diag/prdf/framework/config/prdfMbaDomain.H | 51 | ||||
-rwxr-xr-x | src/usr/diag/prdf/makefile | 2 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C | 229 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H | 16 | ||||
-rwxr-xr-x | src/usr/diag/prdf/prdfMain.C | 69 | ||||
-rw-r--r-- | src/usr/diag/prdf/prdf_hb_only.mk | 16 |
12 files changed, 530 insertions, 194 deletions
diff --git a/src/usr/diag/prdf/common/framework/config/prdfMbaDomain.H b/src/usr/diag/prdf/common/framework/config/prdfMbaDomain_common.H index 6aabd3019..b0a3fc727 100644 --- a/src/usr/diag/prdf/common/framework/config/prdfMbaDomain.H +++ b/src/usr/diag/prdf/common/framework/config/prdfMbaDomain_common.H @@ -1,7 +1,7 @@ /* IBM_PROLOG_BEGIN_TAG */ /* This is an automatically generated prolog. */ /* */ -/* $Source: src/usr/diag/prdf/common/framework/config/prdfMbaDomain.H $ */ +/* $Source: src/usr/diag/prdf/common/framework/config/prdfMbaDomain_common.H $ */ /* */ /* IBM CONFIDENTIAL */ /* */ @@ -21,15 +21,15 @@ /* */ /* IBM_PROLOG_END_TAG */ -#ifndef __PRDFMBADOMAIN_H -#define __PRDFMBADOMAIN_H +#ifndef __prdfMbaDomain_common_H +#define __prdfMbaDomain_common_H #include <prdfRuleChipDomain.H> namespace PRDF { -class MbaDomain : public RuleChipDomain +class MbaDomainCommon : public RuleChipDomain { public: @@ -38,7 +38,7 @@ class MbaDomain : public RuleChipDomain * @param i_did The domain ID * @param i_size The projected size of the domain */ - MbaDomain( DOMAIN_ID i_did, uint32_t i_size = MBA_DOMAIN_SIZE ) : + MbaDomainCommon( DOMAIN_ID i_did, uint32_t i_size = MBA_DOMAIN_SIZE ) : RuleChipDomain( i_did, i_size ) {} @@ -56,5 +56,5 @@ class MbaDomain : public RuleChipDomain } // end namespace PRDF -#endif /* __PRDFMBADOMAIN_H */ +#endif /* __prdfMbaDomain_common_H */ diff --git a/src/usr/diag/prdf/common/framework/service/prdfPlatServices_common.C b/src/usr/diag/prdf/common/framework/service/prdfPlatServices_common.C index f2dc9753a..6ef26e577 100755 --- a/src/usr/diag/prdf/common/framework/service/prdfPlatServices_common.C +++ b/src/usr/diag/prdf/common/framework/service/prdfPlatServices_common.C @@ -746,39 +746,40 @@ mss_MaintCmdWrapper * createMssCmd( mss_MaintCmdWrapper::CmdType i_cmdType, //------------------------------------------------------------------------------ mss_MaintCmdWrapper * createMssCmd( mss_MaintCmdWrapper::CmdType i_cmdType, - TargetHandle_t i_mba, uint32_t i_stopCond, - bool i_isFastSpeed ) + TargetHandle_t i_mba, + const CenRank & i_rank, uint32_t i_stopCond, + uint32_t i_flags ) { mss_MaintCmdWrapper * o_cmd = NULL; - ecmdDataBufferBase sAddr(64), eAddr(64); - int32_t l_rc = getMemAddrRange( i_mba, MSS_ALL_RANKS, sAddr, eAddr ); - if ( SUCCESS == l_rc ) + bool slaveOnly = ( 0 != (i_flags & mss_MaintCmdWrapper::SLAVE_RANK_ONLY) ); + bool allMemory = ( 0 != (i_flags & mss_MaintCmdWrapper::END_OF_MEMORY ) ); + bool fastScrub = ( 0 == (i_flags & mss_MaintCmdWrapper::BG_SCRUB ) ); + + do { - o_cmd = createMssCmd( i_cmdType, i_mba, i_stopCond, i_isFastSpeed, - sAddr, eAddr ); - } - return o_cmd; -} + int32_t l_rc = SUCCESS; -//------------------------------------------------------------------------------ + // Get the address range of i_rank. + ecmdDataBufferBase sAddr(64), eAddr(64); + l_rc = getMemAddrRange( i_mba, i_rank.getMaster(), sAddr, eAddr, + i_rank.getSlave(), slaveOnly ); + if ( SUCCESS != l_rc ) break; -mss_MaintCmdWrapper * createMssCmd( mss_MaintCmdWrapper::CmdType i_cmdType, - TargetHandle_t i_mba, - const CenRank & i_rank, uint32_t i_stopCond, - bool i_isFastSpeed, bool i_slaveOnly ) -{ - mss_MaintCmdWrapper * o_cmd = NULL; + // Get the last address in memory, if needed. + if ( allMemory ) + { + ecmdDataBufferBase junk(64); + l_rc = getMemAddrRange( i_mba, MSS_ALL_RANKS, junk, eAddr ); + if ( SUCCESS != l_rc ) break; + } - ecmdDataBufferBase sAddr(64), eAddr(64); - int32_t l_rc = getMemAddrRange( i_mba, i_rank.getMaster(), sAddr, eAddr, - i_rank.getSlave(), i_slaveOnly ); - if ( SUCCESS == l_rc ) - { - o_cmd = createMssCmd( i_cmdType, i_mba, i_stopCond, i_isFastSpeed, + // Create the command + o_cmd = createMssCmd( i_cmdType, i_mba, i_stopCond, fastScrub, sAddr, eAddr ); - } + + } while (0); return o_cmd; } diff --git a/src/usr/diag/prdf/common/framework/service/prdfPlatServices_common.H b/src/usr/diag/prdf/common/framework/service/prdfPlatServices_common.H index fee317cca..e8bc0fd95 100755 --- a/src/usr/diag/prdf/common/framework/service/prdfPlatServices_common.H +++ b/src/usr/diag/prdf/common/framework/service/prdfPlatServices_common.H @@ -344,6 +344,25 @@ class mss_MaintCmdWrapper SUPERFAST_READ, }; + /** Input flags to control how, and on what, the command runs. **/ + enum CtrlFlags + { + /** Run all defaults. See each individual flag of default behavior */ + NO_FLAGS = 0x00, + + /** When set, the command will run from the beginning of the given rank + * to the end of memory. Default is to run to the end of the rank. */ + END_OF_MEMORY = 0x01, + + /** When set, the command will run once every 12/24 hours. Default is to + * run as fast as possible. */ + BG_SCRUB = 0x02, + + /** When set, the command will run on the slave rank only. Default is to + * run on the entire master rank. */ + SLAVE_RANK_ONLY = 0x04, + }; + public: // functions /** @@ -388,41 +407,21 @@ class mss_MaintCmdWrapper }; // class mss_MaintCmdWrapper /** - * @brief Create a maintenance command object that will run on all memory - * behind this MBA. - * @param i_cmdType Maintenance command type which we want to create. - * @param i_mba An MBA target. - * @param i_stopCond Bit mask for conditions in which to stop command. - * @param i_isFastSpeed false = slow (12 H), true = fast (default). - * @return A mss_MaintCmdWrapper object, NULL if an internal function failed. - * @note This function allocates memory on heap for mss_MaintCmdWrapper - * object. It is the caller's responsibilty to delete this object. - * @note i_isFastSpeed will be ignored for super fast commands. - */ -mss_MaintCmdWrapper * createMssCmd( mss_MaintCmdWrapper::CmdType i_cmdType, - TARGETING::TargetHandle_t i_mba, - uint32_t i_stopCond, - bool i_isFastSpeed = true ); - -/** - * @brief Create a maintenance command object that will run on all memory - * behind the given rank. - * @param i_cmdType Maintenance command type which we want to create. - * @param i_rank The target rank. - * @param i_mba An MBA target. - * @param i_stopCond Bit mask for conditions in which to stop command. - * @param i_isFastSpeed false = slow (12 H), true = fast (default). - * @param i_slaveOnly true = slave rank only, false = master rank (default). + * @brief Create a maintenance command object. + * @param i_cmdType Maintenance command type which we want to create. + * @param i_mba An MBA target. + * @param i_rank The first rank to start with (see enum CtrlFlags for + * more details). + * @param i_stopCond Bit mask for conditions in which to stop command. + * @param i_flags See enum CtrlFlags for details. * @return A mss_MaintCmdWrapper object, NULL if an internal function failed. * @note This function allocates memory on heap for mss_MaintCmdWrapper * object. It is the caller's responsibilty to delete this object. - * @note i_isFastSpeed will be ignored for super fast commands. */ mss_MaintCmdWrapper * createMssCmd( mss_MaintCmdWrapper::CmdType i_cmdType, - TARGETING::TargetHandle_t i_mba, - const CenRank & i_rank, uint32_t i_stopCond, - bool i_isFastSpeed = true, - bool i_slaveOnly = false ); + TARGETING::TargetHandle_t i_mba, + const CenRank & i_rank, uint32_t i_stopCond, + uint32_t i_flags = mss_MaintCmdWrapper::NO_FLAGS ); } // end namespace PlatServices diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C index b6bc34e73..680a5f6c5 100644 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C @@ -94,16 +94,6 @@ int32_t CenMbaTdCtlrCommon::prepareNextCmd() do { - CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip ); - ExtensibleChip * membChip = mbadb->getMembChip(); - if ( NULL == membChip ) - { - PRDF_ERR( PRDF_FUNC"getMembChip() failed" ); - o_rc = FAIL; break; - } - - uint32_t mbaPos = getTargetPosition( iv_mbaChip->GetChipHandle() ); - //---------------------------------------------------------------------- // Clean up previous command //---------------------------------------------------------------------- @@ -119,8 +109,8 @@ int32_t CenMbaTdCtlrCommon::prepareNextCmd() // Clear ECC counters //---------------------------------------------------------------------- - const char * reg_str = ( 0 == mbaPos ) ? "MBA0_MBSTR" : "MBA1_MBSTR"; - SCAN_COMM_REGISTER_CLASS * mbstr = membChip->getRegister( reg_str ); + const char * reg_str = (0 == iv_mbaPos) ? "MBA0_MBSTR" : "MBA1_MBSTR"; + SCAN_COMM_REGISTER_CLASS * mbstr = iv_membChip->getRegister( reg_str ); o_rc = mbstr->Read(); if ( SUCCESS != o_rc ) { @@ -141,18 +131,24 @@ int32_t CenMbaTdCtlrCommon::prepareNextCmd() // the register cache to avoid clearing the counters again with a write // from the out-of-date cached copy. RegDataCache & cache = RegDataCache::getCachedRegisters(); - cache.flush( membChip, mbstr ); + cache.flush( iv_membChip, mbstr ); //---------------------------------------------------------------------- // Clear ECC FIRs //---------------------------------------------------------------------- - reg_str = ( 0 == mbaPos ) ? "MBA0_MBSECCFIR_AND" : "MBA1_MBSECCFIR_AND"; - SCAN_COMM_REGISTER_CLASS * firand = membChip->getRegister( reg_str ); + reg_str = (0 == iv_mbaPos) ? "MBA0_MBSECCFIR_AND" + : "MBA1_MBSECCFIR_AND"; + SCAN_COMM_REGISTER_CLASS * firand = iv_membChip->getRegister( reg_str ); firand->setAllBits(); - // Clear MPE bit for this rank. - firand->ClearBit( 20 + iv_rank.getMaster() ); + // Clear all MPE bits. + // This will need to be done when starting a TD procedure or background + // scrubbing. iv_rank may not be set when starting background scrubbing + // and technically there should only be one of these MPE bits on at a + // time so we should not have to worry about losing an attention by + // clearing them all. + firand->SetBitFieldJustified( 20, 8, 0 ); // Clear NCE, SCE, MCE, RCE, SUE, UE bits (36-41) firand->SetBitFieldJustified( 36, 6, 0 ); @@ -168,7 +164,7 @@ int32_t CenMbaTdCtlrCommon::prepareNextCmd() iv_mbaChip->getRegister("MBASPA_AND"); spaAnd->setAllBits(); - // clear threshold exceeded attentions + // Clear threshold exceeded attentions spaAnd->SetBitFieldJustified( 1, 4, 0 ); o_rc = spaAnd->Write(); @@ -226,23 +222,12 @@ int32_t CenMbaTdCtlrCommon::checkEccErrors( uint16_t & o_eccErrorMask ) o_eccErrorMask = NO_ERROR; - TargetHandle_t mba = iv_mbaChip->GetChipHandle(); - do { - CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip ); - ExtensibleChip * membChip = mbadb->getMembChip(); - if ( NULL == membChip ) - { - PRDF_ERR( PRDF_FUNC"getMembChip() failed: MBA=0x%08x", - getHuid(mba) ); - o_rc = FAIL; break; - } - - const char * reg_str = ( 0 == getTargetPosition(mba) ) - ? "MBA0_MBSECCFIR" : "MBA1_MBSECCFIR"; - SCAN_COMM_REGISTER_CLASS * mbsEccFir = membChip->getRegister( reg_str ); - + const char * reg_str = (0 == iv_mbaPos) ? "MBA0_MBSECCFIR" + : "MBA1_MBSECCFIR"; + SCAN_COMM_REGISTER_CLASS * mbsEccFir + = iv_membChip->getRegister( reg_str ); o_rc = mbsEccFir->Read(); if ( SUCCESS != o_rc ) { @@ -307,8 +292,6 @@ int32_t CenMbaTdCtlrCommon::handleMCE_VCM2( STEP_CODE_DATA_STRUCT & io_sc ) io_sc.service_data->SetErrorSig( PRDFSIG_VcmVerified ); - CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc ); - if ( areDramRepairsDisabled() ) { iv_tdState = NO_OP; // The TD procedure is complete. @@ -524,6 +507,60 @@ int32_t CenMbaTdCtlrCommon::handleMCE_DSD2( STEP_CODE_DATA_STRUCT & io_sc ) //------------------------------------------------------------------------------ +int32_t CenMbaTdCtlrCommon::setRtEteThresholds() +{ + #define PRDF_FUNC "[CenMbaTdCtlrCommon::setRtEteThresholds] " + + int32_t o_rc = SUCCESS; + + do + { + const char * reg_str = (0 == iv_mbaPos) ? "MBA0_MBSTR" : "MBA1_MBSTR"; + SCAN_COMM_REGISTER_CLASS * mbstr = iv_membChip->getRegister( reg_str ); + o_rc = mbstr->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"Read() failed on %s", reg_str ); + break; + } + + // TODO: RTC 88720 The soft and intermittent CE thresholds will be + // calculated based on the per DRAM threshold similar to the IPL + // CE analysis. + uint32_t softIntCe = 1; + + // Only care about retry CEs if there are a lot of them. So the + // threshold will be high in the field. However, in MNFG the retry CEs + // will be handled differently by putting every occurrence in the RCE + // table and doing targeted diagnostics when needed. + uint32_t retryCe = mfgMode() ? 1 : 2047; + + uint32_t hardCe = 1; // Always stop on first occurrence. + + mbstr->SetBitFieldJustified( 4, 12, softIntCe ); + mbstr->SetBitFieldJustified( 16, 12, softIntCe ); + mbstr->SetBitFieldJustified( 28, 12, hardCe ); + mbstr->SetBitFieldJustified( 40, 12, retryCe ); + + // Set the per symbol counters to count soft, intermittent, and hard CEs + mbstr->SetBitFieldJustified( 55, 3, 0x7 ); + + o_rc = mbstr->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"Write() failed on %s", reg_str ); + break; + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + void CenMbaTdCtlrCommon::badPathErrorHandling( STEP_CODE_DATA_STRUCT & io_sc ) { #define PRDF_FUNC "[CenMbaTdCtlrCommon::badPathErrorHandling] " @@ -538,10 +575,6 @@ void CenMbaTdCtlrCommon::badPathErrorHandling( STEP_CODE_DATA_STRUCT & io_sc ) iv_tdState = NO_OP; - int32_t l_rc = cleanupPrevCmd(); // Just in case. - if ( SUCCESS != l_rc ) - PRDF_ERR( PRDF_FUNC"cleanupPrevCmd() failed" ); - io_sc.service_data->SetErrorSig( PRDFSIG_MaintCmdComplete_ERROR ); io_sc.service_data->SetServiceCall(); diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.H b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.H index 50af6b034..3d5635ea8 100644 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.H +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.H @@ -95,6 +95,24 @@ class CenMbaTdCtlrCommon RETRY_CTE = 0x40, ///< Retry CE threshold exceeed }; + // Common stop conditions + enum StopConditions + { + COND_TARGETED_CMD = + mss_MaintCmd::STOP_ON_END_ADDRESS | + mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION, + + COND_BG_SCRUB = + mss_MaintCmd::STOP_ON_HARD_NCE_ETE | + mss_MaintCmd::STOP_ON_INT_NCE_ETE | + mss_MaintCmd::STOP_ON_SOFT_NCE_ETE | + mss_MaintCmd::STOP_ON_RETRY_CE_ETE | + mss_MaintCmd::STOP_ON_MPE | + mss_MaintCmd::STOP_ON_UE | + mss_MaintCmd::STOP_IMMEDIATE | + mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION, + }; + public: // functions /** @@ -283,6 +301,7 @@ class CenMbaTdCtlrCommon /** * @brief Preforms cleanup tasks that need to be done before starting the * next maintenance command (i.e. clear scrub counter). + * @note Will call cleanupPrevCmd() as part of the preparations. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ virtual int32_t prepareNextCmd(); @@ -320,6 +339,16 @@ class CenMbaTdCtlrCommon virtual int32_t handleMCE_DSD2( STEP_CODE_DATA_STRUCT & io_sc ); /** + * @brief Will set the threshold for all runtime ETE attentions in + * hardware. + * @note This only sets the runtime thresholds but is a common function + * because these thresholds will need to be set before starting the + * initial fast scrub at the end of Hostboot. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + virtual int32_t setRtEteThresholds(); + + /** * @brief This class is designed such that all functions will eventually * return any bad error code to the top level public functions such * as handleCmdCompleteEvent() and handleTdEvent(). This is a common @@ -331,9 +360,15 @@ class CenMbaTdCtlrCommon protected: // instance variables - /** The memory controller chip that this TD controller acts on. */ + /** The MBA chip that this TD controller acts on. */ ExtensibleChip * iv_mbaChip; + /** The MEMBUF chip connected iv_mbaChip. */ + ExtensibleChip * iv_membChip; + + /** The position number (0-1) relative to the connected MEMBUF. */ + uint32_t iv_mbaPos; + /** Indicates if TD controller is initialized. */ bool iv_initialized; diff --git a/src/usr/diag/prdf/framework/config/prdfMbaDomain.C b/src/usr/diag/prdf/framework/config/prdfMbaDomain.C new file mode 100644 index 000000000..fc4d12153 --- /dev/null +++ b/src/usr/diag/prdf/framework/config/prdfMbaDomain.C @@ -0,0 +1,73 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/framework/config/prdfMbaDomain.C $ */ +/* */ +/* IBM CONFIDENTIAL */ +/* */ +/* COPYRIGHT International Business Machines Corp. 2013 */ +/* */ +/* p1 */ +/* */ +/* Object Code Only (OCO) source materials */ +/* Licensed Internal Code Source Materials */ +/* IBM HostBoot Licensed Internal Code */ +/* */ +/* The source code for this program is not published or otherwise */ +/* divested of its trade secrets, irrespective of what has been */ +/* deposited with the U.S. Copyright Office. */ +/* */ +/* Origin: 30 */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#include <prdfMbaDomain.H> + +// Framework includes +#include <prdfExtensibleChip.H> +#include <prdfPlatServices.H> +#include <prdfTrace.H> + +// Pegasus includes +#include <prdfCenMbaDataBundle.H> + +using namespace TARGETING; + +namespace PRDF +{ + +using namespace PlatServices; + +int32_t MbaDomain::startScrub() +{ + #define PRDF_FUNC "[MbaDomain::startScrub] " + + int32_t o_rc = SUCCESS; + + do + { + // Iterate all MBAs in the domain. + for ( uint32_t i = 0; i < GetSize(); ++i ) + { + RuleChip * mbaChip = LookUp(i); + + // Start background scrub + CenMbaDataBundle * mbadb = getMbaDataBundle( mbaChip ); + int32_t l_rc = mbadb->iv_tdCtlr.startInitialBgScrub(); + if ( SUCCESS != l_rc ) + { + PRDF_ERR( PRDF_FUNC"startInitialBgScrub() failed: MBA=0x%08x", + mbaChip->GetId() ); + o_rc = FAIL; continue; // Keep going. + } + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +} // end namespace PRDF + diff --git a/src/usr/diag/prdf/framework/config/prdfMbaDomain.H b/src/usr/diag/prdf/framework/config/prdfMbaDomain.H new file mode 100644 index 000000000..6d8bed1ad --- /dev/null +++ b/src/usr/diag/prdf/framework/config/prdfMbaDomain.H @@ -0,0 +1,51 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/framework/config/prdfMbaDomain.H $ */ +/* */ +/* IBM CONFIDENTIAL */ +/* */ +/* COPYRIGHT International Business Machines Corp. 2012,2013 */ +/* */ +/* p1 */ +/* */ +/* Object Code Only (OCO) source materials */ +/* Licensed Internal Code Source Materials */ +/* IBM HostBoot Licensed Internal Code */ +/* */ +/* The source code for this program is not published or otherwise */ +/* divested of its trade secrets, irrespective of what has been */ +/* deposited with the U.S. Copyright Office. */ +/* */ +/* Origin: 30 */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#ifndef __prdfMbaDomain_H +#define __prdfMbaDomain_H + +#include <prdfMbaDomain_common.H> + +namespace PRDF +{ + +class MbaDomain : public MbaDomainCommon +{ + public: + + MbaDomain( DOMAIN_ID i_did, uint32_t i_size = MBA_DOMAIN_SIZE ) : + MbaDomainCommon( i_did, i_size ) + {} + + /** + * @brief Starts memory background scrubbing for each MBA in the node. + * @param Non-SUCCESS if an internal function failed, SUCCESS otherwise. + */ + int32_t startScrub(); + +}; + +} // end namespace PRDF + +#endif /* __prdfMbaDomain_H */ + diff --git a/src/usr/diag/prdf/makefile b/src/usr/diag/prdf/makefile index 398facc45..f71992eb0 100755 --- a/src/usr/diag/prdf/makefile +++ b/src/usr/diag/prdf/makefile @@ -47,6 +47,7 @@ VPATH = \ common/mnfgtools \ mnfgtools \ framework \ + framework/config \ framework/rule \ framework/service \ plat \ @@ -78,6 +79,7 @@ EXTRAINCDIR += ${PRD_USR_PATH}/common/plat/pegasus EXTRAINCDIR += ${PRD_USR_PATH}/common/mnfgtools EXTRAINCDIR += ${PRD_USR_PATH}/mnfgtools EXTRAINCDIR += ${PRD_USR_PATH}/framework/service +EXTRAINCDIR += ${PRD_USR_PATH}/framework/config EXTRAINCDIR += ${PRD_USR_PATH}/plat/pegasus # Conditionally enables profiling of flyweight register and resolution object diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C index 569a07fd2..e65387ee3 100644 --- a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C +++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C @@ -119,12 +119,32 @@ int32_t CenMbaTdCtlr::handleCmdCompleteEvent( STEP_CODE_DATA_STRUCT & io_sc ) // Do some cleanup if the TD procedure is complete. if ( !isInTdMode() ) { - o_rc = exitTdSequence(); + // Clean up the previous command + // PRD is not starting another command but MDIA might be so clear + // the counters and FIRs as well. + o_rc = prepareNextCmd(); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC"exitTdSequence() failed" ); + PRDF_ERR( PRDF_FUNC"prepareNextCmd() failed" ); break; } + + // Inform MDIA about command complete + // Note that we only want to send the command complete message if + // everything above is successful because a bad return code will + // result in a SKIP_MBA message sent. There is no need to send + // redundant messages. + o_rc = signalMdiaCmdComplete(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"signalMdiaCmdComplete() failed" ); + break; + } + + // Clear out the mark, just in case. This is so we don't + // accidentally callout this mark on another rank in an error path + // scenario. + iv_mark = CenMark(); } } while(0); @@ -134,8 +154,12 @@ int32_t CenMbaTdCtlr::handleCmdCompleteEvent( STEP_CODE_DATA_STRUCT & io_sc ) PRDF_ERR( PRDF_FUNC"Failed." ); badPathErrorHandling( io_sc ); + int32_t l_rc = cleanupPrevCmd(); // Just in case. + if ( SUCCESS != l_rc ) + PRDF_ERR( PRDF_FUNC"cleanupPrevCmd() failed" ); + // Tell MDIA to skip further analysis on this MBA. - int32_t l_rc = mdiaSendEventMsg( mba, MDIA::SKIP_MBA ); + l_rc = mdiaSendEventMsg( mba, MDIA::SKIP_MBA ); if ( SUCCESS != l_rc ) PRDF_ERR( PRDF_FUNC"mdiaSendEventMsg(SKIP_MBA) failed" ); } @@ -167,6 +191,98 @@ int32_t CenMbaTdCtlr::handleTdEvent( STEP_CODE_DATA_STRUCT & io_sc, } //------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::startInitialBgScrub() +{ + #define PRDF_FUNC "[CenMbaTdCtlr::startInitialBgScrub] " + + int32_t o_rc = SUCCESS; + + iv_tdState = NO_OP; + + // NOTE: It is possible for a chip mark to have been placed between MDIA + // and the initial start scrub. Those unverified chip marks will be + // found in the runtime TD controller's initialize() function. The + // chip marks will then be verified after the initial fast scrub is + // complete. + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + // Should have been initialized during MDIA. If not, there is a serious + // logic issue. + if ( !iv_initialized ) + { + PRDF_ERR( PRDF_FUNC"TD controller not initialized." ); + break; + } + + // Cleanup hardware before starting the maintenance command. This will + // clear the ECC counters, which must be done before setting the ETE + // thresholds. + o_rc = prepareNextCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"prepareNextCmd() failed" ); + break; + } + + // Set the default thresholds for all ETE attentions. + o_rc = setRtEteThresholds(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"setRtEteThresholds() failed" ); + break; + } + + // Need the first rank in memory. + CenAddr startAddr, junk; + o_rc = getMemAddrRange( mba, startAddr, junk ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"getMemAddrRange() failed" ); + break; + } + + // Start the initial fast scrub. + iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_SCRUB, + mba, startAddr.getRank(), COND_TARGETED_CMD, + mss_MaintCmdWrapper::END_OF_MEMORY ); + if ( NULL == iv_mssCmd ) + { + PRDF_ERR( PRDF_FUNC"createMssCmd() failed" ); + o_rc = FAIL; break; + } + + o_rc = iv_mssCmd->setupAndExecuteCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"setupAndExecuteCmd() failed" ); + break; + } + + } while (0); + + if ( SUCCESS != o_rc ) + { + // Can't use badPathErrorHandling() because there is no SDC created when + // this function is called. + + PRDF_ERR( PRDF_FUNC"iv_mbaChip:0x%08x iv_initialized:%c", + iv_mbaChip->GetId(), iv_initialized ? 'T' : 'F' ); + + int32_t l_rc = cleanupPrevCmd(); // Just in case. + if ( SUCCESS != l_rc ) + PRDF_ERR( PRDF_FUNC"cleanupPrevCmd() failed" ); + } + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ // Private Functions //------------------------------------------------------------------------------ @@ -189,6 +305,23 @@ int32_t CenMbaTdCtlr::initialize() o_rc = FAIL; break; } + // Set iv_membChip. + CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip ); + iv_membChip = mbadb->getMembChip(); + if ( NULL == iv_membChip ) + { + PRDF_ERR( PRDF_FUNC"getMembChip() failed" ); + o_rc = FAIL; break; + } + + // Set iv_mbaPos. + iv_mbaPos = getTargetPosition( mba ); + if ( MAX_MBA_PER_MEMBUF <= iv_mbaPos ) + { + PRDF_ERR( PRDF_FUNC"iv_mbaPos=%d is invalid", iv_mbaPos ); + o_rc = FAIL; break; + } + iv_initialized = true; } while (0); @@ -718,11 +851,8 @@ int32_t CenMbaTdCtlr::startVcmPhase1( STEP_CODE_DATA_STRUCT & io_sc ) } // Start phase 1. - uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | - mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); - iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_STEER_CLEANUP, - mba, iv_rank, stopCond ); + mba, iv_rank, COND_TARGETED_CMD ); if ( NULL == iv_mssCmd ) { PRDF_ERR( PRDF_FUNC"createMssCmd() failed"); @@ -766,11 +896,8 @@ int32_t CenMbaTdCtlr::startVcmPhase2( STEP_CODE_DATA_STRUCT & io_sc ) } // Start phase 2. - uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | - mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); - iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::SUPERFAST_READ, - mba, iv_rank, stopCond ); + mba, iv_rank, COND_TARGETED_CMD ); if ( NULL == iv_mssCmd ) { PRDF_ERR( PRDF_FUNC"createMssCmd() failed"); @@ -822,11 +949,8 @@ int32_t CenMbaTdCtlr::startDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc ) } // Start phase 1. - uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | - mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); - iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_STEER_CLEANUP, - mba, iv_rank, stopCond ); + mba, iv_rank, COND_TARGETED_CMD ); if ( NULL == iv_mssCmd ) { PRDF_ERR( PRDF_FUNC"createMssCmd() failed"); @@ -870,11 +994,8 @@ int32_t CenMbaTdCtlr::startDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc ) } // Start phase 2. - uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | - mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); - iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::SUPERFAST_READ, - mba, iv_rank, stopCond ); + mba, iv_rank, COND_TARGETED_CMD ); if ( NULL == iv_mssCmd ) { PRDF_ERR( PRDF_FUNC"createMssCmd() failed"); @@ -927,11 +1048,9 @@ int32_t CenMbaTdCtlr::startTpsPhase1( STEP_CODE_DATA_STRUCT & io_sc ) } // Start phase 1. - uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | - mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); - iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_SCRUB, - mba, iv_rank, stopCond, true, true ); + mba, iv_rank, COND_TARGETED_CMD, + mss_MaintCmdWrapper::SLAVE_RANK_ONLY ); if ( NULL == iv_mssCmd ) { PRDF_ERR( PRDF_FUNC"createMssCmd() failed"); @@ -984,11 +1103,9 @@ int32_t CenMbaTdCtlr::startTpsPhase2( STEP_CODE_DATA_STRUCT & io_sc ) } // Start phase 2. - uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | - mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); - iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_SCRUB, - mba, iv_rank, stopCond, true, true ); + mba, iv_rank, COND_TARGETED_CMD, + mss_MaintCmdWrapper::SLAVE_RANK_ONLY ); if ( NULL == iv_mssCmd ) { PRDF_ERR( PRDF_FUNC"createMssCmd() failed"); @@ -1161,48 +1278,6 @@ int32_t CenMbaTdCtlr::handleMPE( STEP_CODE_DATA_STRUCT & io_sc ) #undef PRDF_FUNC } -//------------------------------------------------------------------------------ - -int32_t CenMbaTdCtlr::exitTdSequence() -{ - #define PRDF_FUNC "[CenMbaTdCtlr::exitTdSequence] " - - int32_t o_rc = SUCCESS; - - do - { - // Clean up the previous command - // PRD is not starting another command but MDIA might be so clear the - // counters and FIRs as well. - o_rc = prepareNextCmd(); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC"prepareNextCmd() failed" ); - break; - } - - // Inform MDIA about command complete - // Note that we only want to send the command complete message if - // everything above is successful because a bad return code will result - // in a SKIP_MBA message sent. There is no need to send redundant - // messages. - o_rc = signalMdiaCmdComplete(); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC"signalMdiaCmdComplete() failed" ); - break; - } - - // Clear out the mark, just in case. This is so we don't accidentally - // callout this mark on another rank in an error path scenario. - iv_mark = CenMark(); - - } while (0); - - return o_rc; - - #undef PRDF_FUNC -} //------------------------------------------------------------------------------ @@ -1250,6 +1325,8 @@ int32_t CenMbaTdCtlr::signalMdiaCmdComplete() #undef PRDF_FUNC } +//------------------------------------------------------------------------------ + // Do the setup for mnfg IPL CE int32_t CenMbaTdCtlr::mnfgCeSetup() { @@ -1259,18 +1336,8 @@ int32_t CenMbaTdCtlr::mnfgCeSetup() do { - CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip ); - ExtensibleChip * membChip = mbadb->getMembChip(); - if ( NULL == membChip ) - { - PRDF_ERR( PRDF_FUNC"getMembChip() failed" ); - o_rc = FAIL; break; - } - - uint32_t mbaPos = getTargetPosition( iv_mbaChip->GetChipHandle() ); - - const char * reg_str = ( 0 == mbaPos ) ? "MBA0_MBSTR" : "MBA1_MBSTR"; - SCAN_COMM_REGISTER_CLASS * mbstr = membChip->getRegister( reg_str ); + const char * reg_str = (0 == iv_mbaPos) ? "MBA0_MBSTR" : "MBA1_MBSTR"; + SCAN_COMM_REGISTER_CLASS * mbstr = iv_membChip->getRegister( reg_str ); o_rc = mbstr->Read(); if ( SUCCESS != o_rc ) { diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H index 0fdb3e47b..f314f0fe9 100644 --- a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H +++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H @@ -84,6 +84,15 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon int32_t startTpsPhase1( STEP_CODE_DATA_STRUCT & io_sc ); int32_t startTpsPhase2( STEP_CODE_DATA_STRUCT & io_sc ); + public: // functions + + /** + * @brief Start the initial background scrub. This is done at the very end + * of Hostboot after the last call to checkForIplAttns(). + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + int32_t startInitialBgScrub(); + private: // functions /** @@ -102,13 +111,6 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon int32_t handleMPE( STEP_CODE_DATA_STRUCT & io_sc ); /** - * @brief Handle cleanup when TD sequence is complete and TD state machine - * will reset. - * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. - */ - int32_t exitTdSequence(); - - /** * @brief Sends a message to MDIA that a maintenance command has completed. * @note If for some reason PRD needed to do some targeted diagnotics and * on a rank that was not the last rank behind the MBA, this diff --git a/src/usr/diag/prdf/prdfMain.C b/src/usr/diag/prdf/prdfMain.C index 79806b071..5adde629b 100755 --- a/src/usr/diag/prdf/prdfMain.C +++ b/src/usr/diag/prdf/prdfMain.C @@ -34,6 +34,7 @@ #include <prdfExtensibleChip.H> #include <prdfErrlUtil.H> #include <prdfPlatServices.H> +#include <prdfMbaDomain.H> using namespace TARGETING; using namespace HWAS; @@ -103,4 +104,72 @@ int32_t analyzeIplCEStats( TargetHandle_t i_mba, bool &o_calloutMade ) #undef PRDF_FUNC } +//------------------------------------------------------------------------------ + +errlHndl_t startScrub() +{ + #define PRDF_FUNC "[PRDF::startScrub] " + + errlHndl_t o_errl = NULL; + + int32_t l_rc = SUCCESS; + HUID nodeId = INVALID_HUID; + + do + { + if ( isMemoryPreservingIpl() ) + { + PRDF_INF( PRDF_FUNC"Not supported during MP-IPLs." ); + break; + } + + // This is run in Hostboot so there should only be one node. + TargetHandleList list = getFunctionalTargetList( TYPE_NODE ); + if ( 1 != list.size() ) + { + PRDF_ERR( PRDF_FUNC"getFunctionalTargetList(TYPE_NODE) failed" ); + l_rc = FAIL; break; + } + nodeId = getHuid(list[0]); + + PRDF_ENTER( PRDF_FUNC"HUID=0x%08x", nodeId ); + + // Start scrubbing on all MBAs. + MbaDomain * domain = (MbaDomain *)systemPtr->GetDomain(MBA_DOMAIN); + if ( NULL == domain ) + { + PRDF_ERR( PRDF_FUNC"MBA_DOMAIN not found. nodeId=0x%08x", nodeId ); + l_rc = FAIL; break; + } + l_rc = domain->startScrub(); + + PRDF_EXIT( PRDF_FUNC"HUID=0x%08x", nodeId ); + + } while (0); + + if ( SUCCESS != l_rc ) + { + // Get user data + uint64_t ud12 = PRDF_GET_UINT64_FROM_UINT32( nodeId, __LINE__ ); + uint64_t ud34 = PRDF_GET_UINT64_FROM_UINT32( 0, 0 ); + + // Create error log + o_errl = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, // severity + PRDF_START_SCRUB, // module ID + PRDF_DETECTED_FAIL_SOFTWARE, // reason code + ud12, ud34 ); // user data 1-4 + + // Add 2nd level support + o_errl->addProcedureCallout( EPUB_PRC_LVL_SUPP, SRCI_PRIORITY_HIGH ); + + // Add traces + o_errl->collectTrace( PRDF_COMP_NAME, 512 ); + } + + return o_errl; + + #undef PRDF_FUNC +} + } // end namespace PRDF diff --git a/src/usr/diag/prdf/prdf_hb_only.mk b/src/usr/diag/prdf/prdf_hb_only.mk index ddb9c8543..199e8ca70 100644 --- a/src/usr/diag/prdf/prdf_hb_only.mk +++ b/src/usr/diag/prdf/prdf_hb_only.mk @@ -32,15 +32,19 @@ PRDF_RULE_PLUGINS_PEGASUS_HB = \ # PRD object files (Hostboot only). ################################################################################ -prd_pegasus_specific_HB = \ - prdfCenMbaIplCeStats.o \ - prdfDramRepairs.o \ - prdfPlatCalloutUtil.o +prd_config_HB = \ + prdfMbaDomain.o \ prd_mnfgtools_HB = \ prdfMfgThresholdFile.o \ prdfMfgThresholdSync.o +prd_plat_HB = \ + prdfCenMbaIplCeStats.o \ + prdfDramRepairs.o \ + prdfPlatCalloutUtil.o + prd_object_files_HB = \ - ${prd_pegasus_specific_HB} \ - ${prd_mnfgtools_HB} + ${prd_config_HB} \ + ${prd_mnfgtools_HB} \ + ${prd_plat_HB} |