diff options
Diffstat (limited to 'src/usr/diag')
24 files changed, 1969 insertions, 81 deletions
diff --git a/src/usr/diag/prdf/common/framework/service/prdfTargetServices.C b/src/usr/diag/prdf/common/framework/service/prdfTargetServices.C index 80084807e..a8045a2ed 100755 --- a/src/usr/diag/prdf/common/framework/service/prdfTargetServices.C +++ b/src/usr/diag/prdf/common/framework/service/prdfTargetServices.C @@ -1099,6 +1099,44 @@ bool isDramWidthX4( TargetHandle_t i_mba ) i_mba->getAttr<ATTR_EFF_DRAM_WIDTH>() ); } +//------------------------------------------------------------------------------ + +uint8_t getRanksPerDimm( TargetHandle_t i_mba, uint8_t i_ds ) +{ + #define PRDF_FUNC "[PlatServices::getRanksPerDimm] " + + uint8_t rankCount = 0; // default if something fails + + do + { + if ( MAX_DIMM_PER_PORT <= i_ds ) + { + PRDF_ERR( PRDF_FUNC"Invalid parameters i_ds:%u", i_ds ); + break; + } + + // NOTE: Unable to use getAttr() because it is not able to return an + // array. Otherwise, all of the following would be able to fit in + // one line of code. The targeting may fix this later. + + ATTR_EFF_NUM_RANKS_PER_DIMM_type attr; + if ( !i_mba->tryGetAttr<ATTR_EFF_NUM_RANKS_PER_DIMM>(attr) ) + { + PRDF_ERR( PRDF_FUNC"failed to get ATTR_EFF_NUM_RANKS_PER_DIMM" ); + break; + } + + // Note that DIMMs are plugged in pairs so the rank numbers should be + // the same for each port. + rankCount = attr[0][i_ds]; + + } while(0); + + return rankCount; + + #undef PRDF_FUNC +} + //############################################################################## //## //## Clock specific functions @@ -1240,6 +1278,9 @@ bool areDramRepairsDisabled() bool mnfgSpareDramDeploy() { return isMnfgFlagSet( MNFG_FLAG_BIT_MNFG_TEST_DRAM_REPAIRS ); } +bool isMfgCeCheckingEnabled() +{ return isMnfgFlagSet( MNFG_FLAG_BIT_MNFG_IPL_MEMORY_CE_CHECKINGE ); } + } // end namespace PlatServices } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/framework/service/prdfTargetServices.H b/src/usr/diag/prdf/common/framework/service/prdfTargetServices.H index 8a20b967c..2c5db38d6 100755 --- a/src/usr/diag/prdf/common/framework/service/prdfTargetServices.H +++ b/src/usr/diag/prdf/common/framework/service/prdfTargetServices.H @@ -331,6 +331,15 @@ int32_t getMbaDimm( TARGETING::TargetHandle_t i_dimmTarget, uint8_t & o_dimm ); */ bool isDramWidthX4(TARGETING::TargetHandle_t i_mbaTarget); +/** + * @brief Obtain ranks per DIMM select on an MBA. + * @param i_mbaTarget MBA target. + * @param i_ds DIMM select for DIMM. + * @return Number of ranks confgured per DIMM select. If internal function + * fails it will return 0. + */ +uint8_t getRanksPerDimm( TARGETING::TargetHandle_t i_mbaTarget, uint8_t i_ds ); + //############################################################################## //## //## Clock specific functions @@ -406,6 +415,11 @@ bool areDramRepairsDisabled(); */ bool mnfgSpareDramDeploy(); +/** + * @brief Returns the state of the MNFG_IPL_MEMORY_CE_CHECKINGE policy flag. + * @return TRUE if MNFG_IPL_MEMORY_CE_CHECKINGE is set, FALSE otherwise. + */ +bool isMfgCeCheckingEnabled(); } // end namespace PlatServices diff --git a/src/usr/diag/prdf/common/mnfgtools/prdfMfgThresholds.lst b/src/usr/diag/prdf/common/mnfgtools/prdfMfgThresholds.lst index 2fa9871f4..2b64ee1ed 100755 --- a/src/usr/diag/prdf/common/mnfgtools/prdfMfgThresholds.lst +++ b/src/usr/diag/prdf/common/mnfgtools/prdfMfgThresholds.lst @@ -16,3 +16,6 @@ P8EX_MFG_L3_COL_REPAIRS 0 P8CHIP_ONNODE_BUS_CES 1 P8CHIP_OFFNODE_BUS_CES 1 +# P8 Centaur Mba thresholds +CEN_MBA_RT_SOFT_CE_TH_ALGO 2 +CEN_MBA_IPL_SOFT_CE_TH_ALGO 2 diff --git a/src/usr/diag/prdf/common/plat/pegasus/Membuf_regs_NEST.rule b/src/usr/diag/prdf/common/plat/pegasus/Membuf_regs_NEST.rule index 6bf4bd177..134282142 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/Membuf_regs_NEST.rule +++ b/src/usr/diag/prdf/common/plat/pegasus/Membuf_regs_NEST.rule @@ -821,3 +821,21 @@ capture group default; }; + ############################################################################ + # MBA Address Translate Control Register + ############################################################################ + + register MBA0_MBAXCR + { + name "MBU.MBS.ARB.RXLT.MBAXCR01Q"; + scomaddr 0x0201140B; + capture group never; + }; + + register MBA1_MBAXCR + { + name "MBU.MBS.ARB.RXLT.MBAXCR23Q"; + scomaddr 0x0201140C; + capture group never; + }; + diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaDataBundle_common.H b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaDataBundle_common.H index fe2eb5c4c..ecda1af5b 100644 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaDataBundle_common.H +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaDataBundle_common.H @@ -83,9 +83,11 @@ class CenMbaDataBundleCommon : public DataBundle CenMbaDataBundleCommon( const CenMbaDataBundleCommon & ); const CenMbaDataBundleCommon & operator=( const CenMbaDataBundleCommon & ); - private: // instance variables + protected: // instance variables ExtensibleChip * iv_mbaChip; ///< This MBA chip + + private: // instance variables ExtensibleChip * iv_membChip; ///< The connected MEMBUF chip public: // instance variables diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaExtraSig.H b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaExtraSig.H index 26cc84bdd..91ff82c90 100644 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaExtraSig.H +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaExtraSig.H @@ -33,6 +33,9 @@ PRDR_ERROR_SIGNATURE(StartVcmPhase1, 0xffff0000, "", "Starting VCM phase 1"); PRDR_ERROR_SIGNATURE(StartVcmPhase2, 0xffff0001, "", "Starting VCM phase 2"); PRDR_ERROR_SIGNATURE(StartDsdPhase1, 0xffff0002, "", "Starting DSD phase 1"); PRDR_ERROR_SIGNATURE(StartDsdPhase2, 0xffff0003, "", "Starting DSD phase 2"); +PRDR_ERROR_SIGNATURE(StartTpsPhase1, 0xffff0004, "", "Starting TPS phase 1"); +PRDR_ERROR_SIGNATURE(StartTpsPhase2, 0xffff0005, "", "Starting TPS phase 2"); +PRDR_ERROR_SIGNATURE(EndTpsPhase2, 0xffff0006, "", "TPS Phase 2 completed"); PRDR_ERROR_SIGNATURE(MaintUE, 0xffff0010, "", "Maintenance UE"); @@ -54,6 +57,11 @@ PRDR_ERROR_SIGNATURE(RdrRepairsUsed, 0xffff0043, "", PRDR_ERROR_SIGNATURE(RdrRepairUnavail, 0xffff0044, "", "RDR: Repairs needed but unavailable"); - +PRDR_ERROR_SIGNATURE(MnfgIplFail, 0xffff0050, "", + "MNFG IPL Internal failure"); +PRDR_ERROR_SIGNATURE(MnfgIplHardCE, 0xffff0051, "", "MNFG IPL hard CE"); +PRDR_ERROR_SIGNATURE(MnfgIplDramCTE, 0xffff0052, "", "MNFG IPL DRAM CTE"); +PRDR_ERROR_SIGNATURE(MnfgIplRankCTE, 0xffff0053, "", "MNFG IPL half-rank CTE"); +PRDR_ERROR_SIGNATURE(MnfgIplDsCTE, 0xffff0054, "", "MNFG IPL DIMM CTE"); #endif // __prdfCenMbaExtraSig_H diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C index 1e7a0ed7c..65628e581 100644 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C @@ -100,5 +100,12 @@ int32_t CenMbaTdCtlrCommon::chipMarkCleanup() #undef PRDF_FUNC } +//------------------------------------------------------------------------------ + +bool CenMbaTdCtlrCommon::isInTdMode() +{ + return ( (NO_OP != iv_tdState) && (MAX_TD_STATE > iv_tdState) ); +} + } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.H b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.H index 5107dc7f5..097928fc3 100644 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.H +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.H @@ -49,6 +49,26 @@ class ExtensibleChip; */ class CenMbaTdCtlrCommon { + protected: // constants, enums + + /** + * @brief Lists all possible states of TD controller + * @note These enums are used as array indexes to cv_cmdCompleteFuncs and + * the last entry will be used to get the size of the array. + */ + enum TdState + { + NO_OP = 0, ///< No TD procedures in place. + VCM_PHASE_1, ///< Verify Chip Mark phase 1. + VCM_PHASE_2, ///< Verify Chip Mark phase 2. + DSD_PHASE_1, ///< DRAM Spare Deploy phase 1. + DSD_PHASE_2, ///< DRAM Spare Deploy phase 2. + TPS_PHASE_1, ///< Two-Phase Scrub phase 1. + TPS_PHASE_2, ///< Two-Phase Scrub phase 2. + RANK_SCRUB, ///< Targetted fast scrub on a rank. + MAX_TD_STATE ///< The maximum number of TD states. + }; + public: // functions /** @@ -62,8 +82,8 @@ class CenMbaTdCtlrCommon * @param i_mbaChip An MBA chip. */ explicit CenMbaTdCtlrCommon( ExtensibleChip * i_mbaChip ) : - iv_mbaChip(i_mbaChip), iv_initialized(false), iv_rank(), iv_mark(), - iv_mssCmd(NULL) + iv_mbaChip(i_mbaChip), iv_initialized(false), iv_tdState(NO_OP), + iv_rank(), iv_mark(), iv_mssCmd(NULL) {} /** @brief Destructor */ @@ -140,6 +160,20 @@ class CenMbaTdCtlrCommon virtual int32_t analyzeDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc ) = 0; /** + * @brief Analyzes Tps Phase 1 results and moves state machine. + * @param io_sc The step code data struct. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + virtual int32_t analyzeTpsPhase1( STEP_CODE_DATA_STRUCT & io_sc ) = 0; + + /** + * @brief Analyzes Tps Phase 2 results and moves state machine. + * @param io_sc The step code data struct. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + virtual int32_t analyzeTpsPhase2( STEP_CODE_DATA_STRUCT & io_sc ) = 0; + + /** * @brief Starts VCM Phase 1. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ @@ -164,10 +198,22 @@ class CenMbaTdCtlrCommon virtual int32_t startDsdPhase2() = 0; /** + * @brief Starts Tps Phase 1. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + virtual int32_t startTpsPhase1() = 0; + + /** + * @brief Starts Tps Phase 2. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + virtual int32_t startTpsPhase2() = 0; + + /** * @return TRUE if currently running a targeted diagnositics procedure, * FALSE otherwise. */ - virtual bool isInTdMode() = 0; + virtual bool isInTdMode(); /** * @brief Calls the cleanupCmd() function of the command that had just @@ -195,6 +241,9 @@ class CenMbaTdCtlrCommon /** Indicates if TD controller is initialized. */ bool iv_initialized; + /** The targeted diagnostics state variable (see enum TdState). */ + TdState iv_tdState; + /** The current rank that is being targeted for diagnostics. */ CenRank iv_rank; diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaThresholds_common.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaThresholds_common.C new file mode 100755 index 000000000..58b664863 --- /dev/null +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaThresholds_common.C @@ -0,0 +1,111 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaThresholds_common.C $ */ +/* */ +/* IBM CONFIDENTIAL */ +/* */ +/* COPYRIGHT International Business Machines Corp. 2013 */ +/* */ +/* p1 */ +/* */ +/* Object Code Only (OCO) source materials */ +/* Licensed Internal Code Source Materials */ +/* IBM HostBoot Licensed Internal Code */ +/* */ +/* The source code for this program is not published or otherwise */ +/* divested of its trade secrets, irrespective of what has been */ +/* deposited with the U.S. Copyright Office. */ +/* */ +/* Origin: 30 */ +/* */ +/* IBM_PROLOG_END_TAG */ + +/** @file prdfCenMbaThresholds_common.C + * @brief Utility functions used to get specific Centaur thresholds. + */ + +// Framework includes +#include <prdfExtensibleChip.H> +#include <prdfMfgThresholds.H> +#include <prdfMfgThresholdMgr.H> +#include <prdfPlatServices.H> + +// Pegasus includes +#include <prdfCenMbaThresholds.H> +#include <prdfCenMemUtils.H> + +using namespace TARGETING; + +namespace PRDF +{ + +using namespace PlatServices; + +int32_t getMnfgMemCeTh( ExtensibleChip * i_mbaChip, const CenRank & i_rank, + uint16_t & o_cePerDram, uint16_t & o_cePerHalfRank, + uint16_t & o_cePerDimm ) +{ + #define PRDF_FUNC "[getMnfgMemCeTh] " + + int32_t o_rc = SUCCESS; + + do + { + // Get base threshold ( 2GB ). + uint8_t baseTh = getMnfgCeTh(); + + // A base threhold of 0 indicates there should be no thresholding. + if ( 0 == baseTh ) + { + o_cePerDram = o_cePerHalfRank = o_cePerDimm = + MfgThresholdMgr::INFINITE_LIMIT_THR; + break; + } + + // Get DRAM size + uint8_t size = 0; + o_rc = MemUtils::getDramSize( i_mbaChip, size ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "MemUtils::getDramSize() failed" ); + break; + } + + // Get number of ranks DIMM select. + uint8_t rankCount = getRanksPerDimm( i_mbaChip->GetChipHandle(), + i_rank.getDimmSlct() ); + if ( 0 == rankCount ) + { + PRDF_ERR( PRDF_FUNC "PlatServices::getRanksPerDimm() failed" ); + break; + } + + // Get number of allowed CEs. + uint8_t baseAllowed = baseTh - 1; + + // Calculate CEs per DRAM. + // The DRAM size is in MBAXCR[6:7], where 0 = 2Gb, 1 = 4Gb, 2 = 8Gb, + // and 3 = 16 Gb. So the allowed CEs per DRAM can be calculated with + // the following: + // perDram = base * 2^(MBAXCR[6:7]+1) * (9/16) + // or, perDram = (base << MBAXCR[6:7]+1) * (9/16) + uint32_t computeBase = (baseAllowed << (size+1)) * 9; + o_cePerDram = (computeBase + 8) / 16; + + // Calculate CEs per DIMM. + o_cePerDimm = ((computeBase * (2 + rankCount)) + 8) / 16; + + // Calculate CEs per half-rank. + // Same as perDimm where rankCount is 1; + o_cePerHalfRank = ((computeBase * (2 + 1)) + 8) / 16; + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +} // end namespace PRDF + diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaThresholds_common.H b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaThresholds_common.H new file mode 100755 index 000000000..4084ec614 --- /dev/null +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaThresholds_common.H @@ -0,0 +1,57 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaThresholds_common.H $ */ +/* */ +/* IBM CONFIDENTIAL */ +/* */ +/* COPYRIGHT International Business Machines Corp. 2013 */ +/* */ +/* p1 */ +/* */ +/* Object Code Only (OCO) source materials */ +/* Licensed Internal Code Source Materials */ +/* IBM HostBoot Licensed Internal Code */ +/* */ +/* The source code for this program is not published or otherwise */ +/* divested of its trade secrets, irrespective of what has been */ +/* deposited with the U.S. Copyright Office. */ +/* */ +/* Origin: 30 */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#ifndef __PRDF_CEN_MBA_COMMON_THRESHOLDS_H +#define __PRDF_CEN_MBA_COMMON_THRESHOLDS_H + +/** @file prdfCenMbaThresholds_common.H + * @brief Utility functions used to get specific Centaur thresholds. + */ + +#include <prdfThresholdResolutions.H> +#include <prdfCenAddress.H> + +namespace PRDF +{ + +class ExtensibleChip; + +/** + * @brief Returns number of allowed CEs for MNFG IPLs. + * @pre Must check if in manufacturing mode before calling this function. + * @post Must callout if the count is greater than the returned values. + * @param i_mbaChip MBA chip. + * @param i_rank The rank for which the threshold is needed. + * @param o_cePerDram CEs allowed per DRAM. + * @param o_cePerHalfRank CEs allowed per logical DIMM rank. + * @param o_cePerDimm CEs allowed per logical DIMM. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ +int32_t getMnfgMemCeTh( ExtensibleChip * i_mbaChip, const CenRank & i_rank, + uint16_t & o_cePerDram, uint16_t & o_cePerHalfRank, + uint16_t & o_cePerDimm ); + +} // end namespace PRDF + +#endif /* __PRDF_CEN_MBA_COMMON_THRESHOLDS_H */ + diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMemUtils.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMemUtils.C new file mode 100755 index 000000000..0701670cb --- /dev/null +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMemUtils.C @@ -0,0 +1,167 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/common/plat/pegasus/prdfCenMemUtils.C $ */ +/* */ +/* IBM CONFIDENTIAL */ +/* */ +/* COPYRIGHT International Business Machines Corp. 2013 */ +/* */ +/* p1 */ +/* */ +/* Object Code Only (OCO) source materials */ +/* Licensed Internal Code Source Materials */ +/* IBM HostBoot Licensed Internal Code */ +/* */ +/* The source code for this program is not published or otherwise */ +/* divested of its trade secrets, irrespective of what has been */ +/* deposited with the U.S. Copyright Office. */ +/* */ +/* Origin: 30 */ +/* */ +/* IBM_PROLOG_END_TAG */ + +/** @file prdfCenMemUtils.C + * @brief Utility functions related to Centaur + */ + +#include <prdfCenMemUtils.H> +#include <prdfExtensibleChip.H> +#include <prdfCenMbaDataBundle.H> +#include <prdfPlatServices.H> + +using namespace TARGETING; + +namespace PRDF +{ + +namespace MemUtils +{ + +using namespace PlatServices; + +const uint8_t CE_REGS_PER_MBA = 9; +const uint8_t SYMBOLS_PER_CE_REG = 8; + +static const char *mbsCeStatReg[][ CE_REGS_PER_MBA ] = { + { "MBA0_MBSSYMEC0", "MBA0_MBSSYMEC1","MBA0_MBSSYMEC2", + "MBA0_MBSSYMEC3", "MBA0_MBSSYMEC4", "MBA0_MBSSYMEC5", + "MBA0_MBSSYMEC6", "MBA0_MBSSYMEC7", "MBA0_MBSSYMEC8" }, + { "MBA1_MBSSYMEC0", "MBA1_MBSSYMEC1","MBA1_MBSSYMEC2", + "MBA1_MBSSYMEC3", "MBA1_MBSSYMEC4", "MBA1_MBSSYMEC5", + "MBA1_MBSSYMEC6", "MBA1_MBSSYMEC7", "MBA1_MBSSYMEC8" } + }; + +int32_t collectCeStats( ExtensibleChip *i_mbaChip, MaintSymbols &o_maintStats, + const CenRank & i_rank ) +{ + #define PRDF_FUNC "[MemUtils::collectCeStats] " + int32_t o_rc = SUCCESS; + do + { + TargetHandle_t mbaTrgt = i_mbaChip->GetChipHandle(); + CenMbaDataBundle * mbadb = getMbaDataBundle( i_mbaChip ); + ExtensibleChip * membufChip = mbadb->getMembChip(); + if ( NULL == membufChip ) + { + PRDF_ERR( PRDF_FUNC"getMembChip() failed: MBA=0x%08x", + getHuid(mbaTrgt) ); + o_rc = FAIL; break; + } + uint8_t mbaPos = getTargetPosition( mbaTrgt ); + + for( uint8_t regIdx = 0 ; regIdx < CE_REGS_PER_MBA; regIdx++) + { + SCAN_COMM_REGISTER_CLASS * ceReg = membufChip->getRegister( + mbsCeStatReg[mbaPos][regIdx] ); + + if( NULL == ceReg ) + { + PRDF_ERR( PRDF_FUNC"getRegister() Failed for register:%s", + mbsCeStatReg[mbaPos][regIdx]); + break; + } + o_rc = ceReg->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"%s Read() failed. Target=0x%08x", + mbsCeStatReg[mbaPos][regIdx], getHuid(mbaTrgt) ); + break; + } + uint8_t baseSymbol = SYMBOLS_PER_CE_REG*regIdx; + for(uint8_t i = 0 ; i < SYMBOLS_PER_CE_REG; i++) + { + uint8_t synCount = ceReg->GetBitFieldJustified( (i*8), 8 ); + + if ( 0 == synCount) + { + continue; + } + else + { + SymbolData symData; + symData.symbol = CenSymbol::fromSymbol( mbaTrgt, i_rank, + baseSymbol+i, CenSymbol::BOTH_SYMBOL_DQS ); + if ( !symData.symbol.isValid() ) + { + PRDF_ERR( PRDF_FUNC"CenSymbol() failed" ); + o_rc = FAIL; + break; + } + else + { + symData.count = synCount; + o_maintStats.push_back( symData ); + } + } + } + if( FAIL == o_rc) break; + } + if( FAIL == o_rc) break; + }while(0); + return o_rc; + #undef PRDF_FUNC +} + +int32_t getDramSize( ExtensibleChip *i_mbaChip, uint8_t & o_size ) +{ + #define PRDF_FUNC "[MemUtils::getDramSize] " + + int32_t o_rc = SUCCESS; + o_size = SIZE_2GB; + + do + { + TargetHandle_t mbaTrgt = i_mbaChip->GetChipHandle(); + CenMbaDataBundle * mbadb = getMbaDataBundle( i_mbaChip ); + ExtensibleChip * membufChip = mbadb->getMembChip(); + if ( NULL == membufChip ) + { + PRDF_ERR( PRDF_FUNC"getMembChip() failed: MBA=0x%08x", + getHuid(mbaTrgt) ); + o_rc = FAIL; break; + } + + uint32_t pos = getTargetPosition(mbaTrgt); + const char * reg_str = (0 == pos) ? "MBA0_MBAXCR" : "MBA1_MBAXCR"; + + SCAN_COMM_REGISTER_CLASS * reg = membufChip->getRegister( reg_str ); + o_rc = reg->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"Read() failed on %s. Target=0x%08x", + reg_str, getHuid(mbaTrgt) ); + break; + } + o_size = reg->GetBitFieldJustified( 6, 2 ); + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +} // end namespace MemUtils + +} // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMemUtils.H b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMemUtils.H new file mode 100755 index 000000000..1b77c607a --- /dev/null +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMemUtils.H @@ -0,0 +1,99 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/common/plat/pegasus/prdfCenMemUtils.H $ */ +/* */ +/* IBM CONFIDENTIAL */ +/* */ +/* COPYRIGHT International Business Machines Corp. 2013 */ +/* */ +/* p1 */ +/* */ +/* Object Code Only (OCO) source materials */ +/* Licensed Internal Code Source Materials */ +/* IBM HostBoot Licensed Internal Code */ +/* */ +/* The source code for this program is not published or otherwise */ +/* divested of its trade secrets, irrespective of what has been */ +/* deposited with the U.S. Copyright Office. */ +/* */ +/* Origin: 30 */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#ifndef prdfCenMemUtils_H +#define prdfCenMemUtils_H + +/** @file prdfCenMemUtils.H + * @brief General utility functions for Centaur + */ + +//------------------------------------------------------------------------------ +// Includes +//------------------------------------------------------------------------------ + +#include <vector> +#include <prdfCenSymbol.H> + + + +namespace PRDF +{ + +class ExtensibleChip; + +namespace MemUtils +{ + +//============================================================================== +// Constants and Enums +//============================================================================== + +/** + * @brief Lists all possible sizes for a DRAM. + * @note These values must match the defintion of MBAXCR[6:7]. They are + * important for calculations. + */ +enum DramSize +{ + SIZE_2GB = 0, + SIZE_4GB = 1, + SIZE_8GB = 2, + SIZE_16GB = 3, +}; + +//============================================================================== +// Maintenance statistics +//============================================================================== + +/** + * @brief Collects CE symbol data. + */ +struct SymbolData +{ + CenSymbol symbol; + uint8_t count; + SymbolData() : count(0) {} +}; + +typedef std::vector<SymbolData> MaintSymbols; +/** + * @brief Checks CE stats on a MBA. + * @param i_mbaChip MBA chip. + * @param o_maintStats Output vector. Contains symbols with CE count. + * @param i_rank Rank for which stat collection will be done. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ +int32_t collectCeStats( ExtensibleChip *i_mbaChip, MaintSymbols &o_maintStats, + const CenRank & i_rank ); +/** + * @brief Gets DRAM size for an MBA. + * @param i_mbaChip MBA chip. + * @param o_size size for a DRAM. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ +int32_t getDramSize( ExtensibleChip *i_mbaChip, uint8_t & o_size ); + +} // namespace MemUtils +} // end namespace PRDF +#endif diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenSymbol.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenSymbol.C index 57eaf1322..5b1e0444b 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenSymbol.C +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenSymbol.C @@ -245,4 +245,28 @@ int32_t CenSymbol::getSymbol( const CenRank & i_rank, WiringType i_wiringType, #undef PRDF_FUNC } +//------------------------------------------------------------------------------ + +int32_t CenSymbol::setPins( uint8_t i_pins ) +{ + #define PRDF_FUNC "[CenSymbol::setPins] " + int32_t o_rc = SUCCESS; + + do + { + if ( BOTH_SYMBOL_DQS < i_pins ) + { + PRDF_ERR( PRDF_FUNC"i_pins %u is invalid", i_pins ); + o_rc = FAIL; + break; + } + + iv_pins = i_pins; + + } while (0); + + return o_rc; + #undef PRDF_FUNC +} + } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenSymbol.H b/src/usr/diag/prdf/common/plat/pegasus/prdfCenSymbol.H index a43700e87..681bff7ee 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenSymbol.H +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenSymbol.H @@ -34,6 +34,7 @@ #include <prdfCenAddress.H> #include <prdfCenConst.H> +#include <prdfPlatServices.H> #include <prdfTargetFwdRef.H> //############################################################################## @@ -152,6 +153,16 @@ class CenSymbol /** @return The DRAM index for this symbol. */ uint8_t getDram() const { return symbol2Dram( iv_symbol, iv_x4Dram ); } + /** @return rank associated with this symbol. */ + CenRank getRank() const { return iv_rank; }; + + /** + * @brief Sets this symbol's pin to a new set of bad pins. + * @param The new pins. + * @return Non-SUCCESS if an internal function failed, SUCCESS otherwise. + */ + int32_t setPins( uint8_t i_pins ); + /** @return TRUE this symbol is on a x4 DRAM, FALSE otherwise. */ bool isX4Dram() const { return iv_x4Dram; } @@ -171,20 +182,27 @@ class CenSymbol * @brief Overrides the '<' operator. * @param i_symbol The symbol to compare with. * @return TRUE if this symbol is less than i_symbol, FALSE otherwise. + * @note Compares against iv_symbol and iv_rank. There is currently no + * need to compare against iv_mbaTarget. */ bool operator < ( const CenSymbol & i_symbol ) const { - return ( this->getSymbol() < i_symbol.getSymbol() ); + return ( (iv_symbol < i_symbol.iv_symbol) || + ( (iv_symbol == i_symbol.iv_symbol) && + (iv_rank < i_symbol.iv_rank ) ) ); } /** * @brief Overrides the '==' operator. * @param i_symbol The symbol to compare with. * @return TRUE if the two symbols are equivalent, FALSE otherwise. + * @note Compares against iv_symbol and iv_rank. There is currently no + * need to compare against iv_mbaTarget. */ bool operator == ( const CenSymbol & i_symbol ) const { - return ( this->getSymbol() == i_symbol.getSymbol() ); + return ( (iv_symbol == i_symbol.iv_symbol) && + (iv_rank == i_symbol.iv_rank ) ); } private: // functions diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfLaneRepair.H b/src/usr/diag/prdf/common/plat/pegasus/prdfLaneRepair.H index 1bf1d256c..77482bcc3 100644 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfLaneRepair.H +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfLaneRepair.H @@ -20,6 +20,7 @@ /* Origin: 30 */ /* */ /* IBM_PROLOG_END_TAG */ + #ifndef _PRDFLANEREPAIR_H #define _PRDFLANEREPAIR_H diff --git a/src/usr/diag/prdf/common/prd_pegasus.mk b/src/usr/diag/prdf/common/prd_pegasus.mk index daa68e9d7..026dd7228 100755 --- a/src/usr/diag/prdf/common/prd_pegasus.mk +++ b/src/usr/diag/prdf/common/prd_pegasus.mk @@ -50,4 +50,6 @@ prd_pegasus_specific = \ prdfLineDelete.o \ prdfMemoryMru.o \ prdfPegasusConfigurator.o \ + prdfCenMbaThresholds_common.o \ + prdfCenMemUtils.o \ prdfRegisterData.o diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaDataBundle.H b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaDataBundle.H index c6dcf5ccc..2c2ba1237 100644 --- a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaDataBundle.H +++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaDataBundle.H @@ -31,6 +31,7 @@ #include <prdfCenMbaDataBundle_common.H> #include <diag/mdia/mdia.H> +#include <prdfCenMbaIplCeStats.H> //------------------------------------------------------------------------------ @@ -50,13 +51,27 @@ class CenMbaDataBundle : public CenMbaDataBundleCommon */ explicit CenMbaDataBundle( ExtensibleChip * i_mbaChip ) : CenMbaDataBundleCommon(i_mbaChip), iv_sendCmdCompleteMsg(false), - iv_cmdCompleteMsgData() + iv_cmdCompleteMsgData(), iv_iplCeStats(NULL) {} /** * @brief Destructor. */ - ~CenMbaDataBundle() {} + ~CenMbaDataBundle() + { + delete iv_iplCeStats; iv_iplCeStats = NULL; + } + + /** @return The IPL CE statistics object. */ + CenMbaIplCeStats * getIplCeStats() + { + if ( NULL == iv_iplCeStats ) + { + iv_iplCeStats = new CenMbaIplCeStats( iv_mbaChip ); + } + + return iv_iplCeStats; + } private: // functions @@ -74,6 +89,10 @@ class CenMbaDataBundle : public CenMbaDataBundleCommon */ MDIA::MaintCommandEventType iv_cmdCompleteMsgData; + private: // instance variables + + CenMbaIplCeStats * iv_iplCeStats; ///< MNFG IPL CE statistics object + }; //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.C b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.C new file mode 100755 index 000000000..1b3541865 --- /dev/null +++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.C @@ -0,0 +1,506 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.C $ */ +/* */ +/* IBM CONFIDENTIAL */ +/* */ +/* COPYRIGHT International Business Machines Corp. 2013 */ +/* */ +/* p1 */ +/* */ +/* Object Code Only (OCO) source materials */ +/* Licensed Internal Code Source Materials */ +/* IBM HostBoot Licensed Internal Code */ +/* */ +/* The source code for this program is not published or otherwise */ +/* divested of its trade secrets, irrespective of what has been */ +/* deposited with the U.S. Copyright Office. */ +/* */ +/* Origin: 30 */ +/* */ +/* IBM_PROLOG_END_TAG */ + +/** @file prdfCenMbaIplCeStats.C + * @brief Contains IPL CE related code. + */ + +// Framework includes +#include <iipServiceDataCollector.h> +#include <prdfEnums.H> +#include <prdfErrlUtil.H> +#include <prdfExtensibleChip.H> +#include <prdfGlobal.H> +#include <prdfPfa5Data.h> +#include <prdf_service_codes.H> + +// Pegasus includes +#include <prdfCenMbaExtraSig.H> +#include <prdfCenMbaIplCeStats.H> +#include <prdfCenMbaThresholds.H> +#include <prdfCenMemUtils.H> +#include <prdfMemoryMru.H> +#include <prdfPlatCalloutUtil.H> + +using namespace TARGETING; + +namespace PRDF +{ + +using namespace PlatServices; +using namespace HWAS; + +//------------------------------------------------------------------------------ + +void CenMbaIplCeStats::banAnalysis( const CenRank & i_rank ) +{ + + for ( uint8_t i = 0; i < MAX_PORT_PER_MBA; i++ ) + { + HalfRankKey banKey = { i_rank, i }; + iv_bannedAnalysis[banKey] = true; + } +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaIplCeStats::banAnalysis( const CenRank & i_rank, + uint8_t i_portSlct ) +{ + int32_t o_rc = SUCCESS; + + do + { + if ( i_portSlct >= MAX_PORT_PER_MBA ) + { + PRDF_ERR("[banAnalysis] i_portSlct (0x%02x) is invalid", + i_portSlct ); + o_rc = FAIL; + break; + } + + HalfRankKey banKey = { i_rank, i_portSlct }; + iv_bannedAnalysis[banKey] = true; + + } while (0); + + return o_rc; +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaIplCeStats::collectStats( const CenRank & i_stopRank ) +{ + #define PRDF_FUNC "[CenMbaIplCeStats::collectStats] " + int32_t o_rc = SUCCESS; + do + { + MemUtils::MaintSymbols symData; + o_rc = MemUtils::collectCeStats( iv_mbaChip, symData, i_stopRank); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"MemUtils::collectCeStats() failed. MBA:0X%08X", + getHuid( iv_mbaChip->GetChipHandle() ) ); + break; + } + + // if size of stats collected is zero, it may mean some symbol + // has gone beyond maximum value. But this is only valid for DD1 + // and has a very low probability. So ignoring this case. + + for ( uint32_t i = 0; i < symData.size(); i++ ) + { + uint8_t dimmSlct = i_stopRank.getDimmSlct(); + uint8_t dram = symData[i].symbol.getDram(); + uint8_t portSlct = symData[i].symbol.getPortSlct(); + + // Check if analysis is banned. + HalfRankKey banKey = { i_stopRank, portSlct }; + if ( iv_bannedAnalysis[banKey] ) + continue; + + // Update iv_ceSymbols with the new symbol data. + SymbolKey symkey = { symData[i].symbol }; + iv_ceSymbols.push_back (symkey ); + + // Increment the soft CEs per DRAM. + DramKey dramKey = { i_stopRank, dram, portSlct }; + iv_dramMap[dramKey]++; + + // Increment the soft CEs per half rank. + HalfRankKey rankKey = { i_stopRank, portSlct }; + iv_rankMap[rankKey]++; + + // In case of dimm Slct , rank select does not matter + CenRank dimmRank( dimmSlct, 0); + // Increment the soft CEs per half dimm select. + HalfRankKey dsKey = { dimmRank, portSlct }; + iv_dsMap[dsKey]++; + } + + } while (0); + + // We have to clear all stats before giving control back to MDIA.. + // This is done by setting up MBSTRQ[53] bit + // We are doing cleanup in TdController code, + // So not clearing up stats here. + return o_rc; + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaIplCeStats::analyzeStats( bool & o_callOutsMade ) +{ + #define PRDF_FUNC "CenMbaIplCeStats::analyzeStats " + int32_t o_rc = SUCCESS; + + o_callOutsMade = false; + + do + { + TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle(); + + o_rc = calloutCePerDram( o_callOutsMade ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC" calloutCePerDram() failed. MBA:0X%08X", + getHuid( mbaTrgt ) ); + break; + } + + o_rc = calloutCePerRank( o_callOutsMade ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"calloutCePerRank() failed. MBA:0X%08X", + getHuid( mbaTrgt ) ); + break; + } + + o_rc = calloutCePerDs( o_callOutsMade ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC" calloutCePerDs() failed. MBA:0X%08X", + getHuid( mbaTrgt ) ); + break; + } + + } while (0); + + return o_rc; + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaIplCeStats::calloutHardCes( const CenRank & i_stopRank ) +{ + #define PRDF_FUNC "[CenMbaIplCeStats::calloutHardCes] " + TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle(); + int32_t o_rc = SUCCESS; + do + { + MemUtils::MaintSymbols symData; + o_rc = MemUtils::collectCeStats( iv_mbaChip, symData, i_stopRank); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"MemUtils::collectCeStats() failed. MBA:0X%08X", + getHuid( iv_mbaChip->GetChipHandle() ) ); + break; + } + + for ( uint32_t i = 0; i < symData.size(); i++ ) + { + uint8_t portSlct = symData[i].symbol.getPortSlct(); + + // Check if analysis is banned. + HalfRankKey banKey = { i_stopRank, portSlct }; + + bool& isBanned = iv_bannedAnalysis[banKey]; + + if ( isBanned ) + continue; + + // At this point a hard CE was found, callout the symbol. + MemoryMru memMru ( mbaTrgt, symData[i].symbol.getRank(), + symData[i].symbol ); + + // We are creating and committing error log here. It is different + // from rest of attention flow. We could have set the callout + // values in sdc but it would have created confusion in ffdc if + // we also get vcm/ue at same time. + errlHndl_t l_errl = NULL; + + PRDF_CREATE_ERRL( l_errl, + ERRL_SEV_PREDICTIVE, + ERRL_ETYPE_NOT_APPLICABLE, + SRCI_ERR_INFO, + SRCI_NO_ATTR, + PRDF_MNFG_IPL_CE_ANALYSIS, + LIC_REFCODE, + PRDF_DETECTED_FAIL_HARDWARE, + getHuid( mbaTrgt ), + 0, PRDFSIG_MnfgIplHardCE, 0); + addMruAndCommitErrl( memMru, l_errl); + + // Ban the half rank. + isBanned = true; + } + }while(0); + return o_rc; + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaIplCeStats::calloutCePerDram( bool & o_callOutsMade ) +{ + #define PRDF_FUNC "[CenMbaIplCeStats::calloutCePerDram] " + int32_t o_rc = SUCCESS; + + TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle(); + + for ( CePerDramMap::iterator dramIter = iv_dramMap.begin(); + dramIter != iv_dramMap.end(); dramIter++ ) + { + // First, check if this half rank is banned from analysis. + HalfRankKey banKey = { dramIter->first.rank, + dramIter->first.portSlct }; + + // Check if the rank has already been banned. Note that [] will create + // the an entry if one does not exist, so used find() instead to check + // for existence in the map. + if ( iv_bannedAnalysis.end() != iv_bannedAnalysis.find(banKey) ) + continue; + + // Get the CEs per DRAM threshold. + uint16_t dramTh, junk0, junk1; + o_rc = getMnfgMemCeTh( iv_mbaChip, dramIter->first.rank, dramTh, + junk0, junk1 ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getMnfgMemCeTh() failed. MBA:0x%08X", + getHuid( mbaTrgt ) ); + break; + } + + // Now, check if a threshold has been reached. If not, continue to the + // next entry in iv_dsMap. + if ( dramIter->second <= dramTh ) + continue; + + // At this point a threshold has been reached. Callout a single symbol + // found in this dram. + for ( CESymbols::iterator symIter = iv_ceSymbols.begin(); + symIter != iv_ceSymbols.end(); symIter++ ) + { + if ( (dramIter->first.rank == symIter->symbol.getRank() ) && + (dramIter->first.dram == symIter->symbol.getDram() ) ) + { + MemoryMru memMru ( mbaTrgt, symIter->symbol.getRank() , + symIter->symbol ); + + errlHndl_t l_errl = NULL; + + PRDF_CREATE_ERRL( l_errl, + ERRL_SEV_PREDICTIVE, + ERRL_ETYPE_NOT_APPLICABLE, + SRCI_ERR_INFO, + SRCI_NO_ATTR, + PRDF_MNFG_IPL_CE_ANALYSIS, + LIC_REFCODE, + PRDF_DETECTED_FAIL_HARDWARE, + getHuid( mbaTrgt ), + 0, PRDFSIG_MnfgIplDramCTE, 0); + + addMruAndCommitErrl( memMru, l_errl); + + // Ban the half rank. + iv_bannedAnalysis[banKey] = true; + o_callOutsMade = true; + + // Only one symbol needs to be called out, so exit on first + // occurance. + break; + } + } + } + return o_rc; + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaIplCeStats::calloutCePerRank( bool & o_callOutsMade ) +{ + #define PRDF_FUNC "[CenMbaIplCeStats::calloutCePerRank] " + int32_t o_rc = SUCCESS; + + TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle(); + + for ( CePerHalfRankMap::iterator rankIter = iv_rankMap.begin(); + rankIter != iv_rankMap.end(); rankIter++ ) + { + // First, check if this half rank is banned from analysis. + HalfRankKey banKey = { rankIter->first.rank, + rankIter->first.portSlct }; + + // Check if the rank has already been banned. Note that [] will create + // the an entry if one does not exist, so used find() instead to check + // for existence in the map. + if ( iv_bannedAnalysis.end() != iv_bannedAnalysis.find(banKey) ) + continue; + + // Get the CEs per rank threshold. + uint16_t junk0, rankTh, junk1; + o_rc = getMnfgMemCeTh( iv_mbaChip, rankIter->first.rank, junk0, + rankTh, junk1 ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getMnfgMemCeTh() failed. MBA:0x%08X", + getHuid( mbaTrgt ) ); + break; + } + + // Now, check if a threshold has been reached. If not, continue to the + // next entry in iv_rankMap. + if ( rankIter->second <= rankTh ) + continue; + + // At this point a threshold has been reached. Callout a single symbol + // found in this rank. + for ( CESymbols::iterator symIter = iv_ceSymbols.begin(); + symIter != iv_ceSymbols.end(); symIter++ ) + { + if ( (rankIter->first.rank == symIter->symbol.getRank() ) && + (rankIter->first.portSlct == + symIter->symbol.getPortSlct()) ) + { + MemoryMru memMru ( mbaTrgt, symIter->symbol.getRank() , + symIter->symbol ); + + errlHndl_t l_errl = NULL; + + PRDF_CREATE_ERRL( l_errl, + ERRL_SEV_PREDICTIVE, + ERRL_ETYPE_NOT_APPLICABLE, + SRCI_ERR_INFO, + SRCI_NO_ATTR, + PRDF_MNFG_IPL_CE_ANALYSIS, + LIC_REFCODE, + PRDF_DETECTED_FAIL_HARDWARE, + getHuid( mbaTrgt ), + 0, PRDFSIG_MnfgIplRankCTE, 0); + + addMruAndCommitErrl( memMru, l_errl); + // Ban the half rank. + iv_bannedAnalysis[banKey] = true; + o_callOutsMade = true; + + // Only one symbol needs to be called out, so exit on first + // occurance. + break; + } + } + } + return o_rc; + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaIplCeStats::calloutCePerDs( bool & o_callOutsMade ) +{ + #define PRDF_FUNC "[CenMbaIplCeStats::calloutCePerDs] " + int32_t o_rc = SUCCESS; + + TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle(); + + for ( CePerHalfDsMap::iterator dsIter = iv_dsMap.begin(); + dsIter != iv_dsMap.end(); dsIter++ ) + { + // First, check if this half dimm select is banned from analysis. + HalfRankKey banKey = { dsIter->first.rank, + dsIter->first.portSlct }; + + // Check if the rank has already been banned. Note that [] will create + // the an entry if one does not exist, so used find() instead to check + // for existence in the map. + if ( iv_bannedAnalysis.end() != iv_bannedAnalysis.find(banKey) ) + continue; + + // Get the CEs per dimm select threshold. + uint16_t junk0, junk1, dsTh; + o_rc = getMnfgMemCeTh( iv_mbaChip, dsIter->first.rank, junk0, + junk1, dsTh ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getMnfgMemCeTh() failed. MBA:0x%08X", + getHuid( mbaTrgt ) ); + break; + } + + // Now, check if a threshold has been reached. If not, continue to the + // next entry in iv_dsMap. + if ( dsIter->second <= dsTh ) + continue; + + // At this point a threshold has been reached. Callout a single symbol + // found in this dimm select. + for ( CESymbols::iterator symIter = iv_ceSymbols.begin(); + symIter != iv_ceSymbols.end(); symIter++ ) + { + if ( (dsIter->first.rank.getDimmSlct() == + symIter->symbol.getRank().getDimmSlct() ) + && (dsIter->first.portSlct == + symIter->symbol.getPortSlct()) ) + { + MemoryMru memMru ( mbaTrgt, symIter->symbol.getRank() , + symIter->symbol ); + + errlHndl_t l_errl = NULL; + PRDF_CREATE_ERRL( l_errl, + ERRL_SEV_PREDICTIVE, + ERRL_ETYPE_NOT_APPLICABLE, + SRCI_ERR_INFO, + SRCI_NO_ATTR, + PRDF_MNFG_IPL_CE_ANALYSIS, + LIC_REFCODE, + PRDF_DETECTED_FAIL_HARDWARE, + getHuid(mbaTrgt), + 0, PRDFSIG_MnfgIplDsCTE, 0); + + addMruAndCommitErrl( memMru, l_errl); + // Ban the half dimm select. + iv_bannedAnalysis[banKey] = true; + o_callOutsMade = true; + + // Only one symbol needs to be called out, so exit on first + // occurance. + break; + } + } + } + return o_rc; + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +void CenMbaIplCeStats::addMruAndCommitErrl( const MemoryMru & i_memmru, + errlHndl_t i_errl ) +{ + // Add MemoryMru callouts and FFDC + CalloutUtil::calloutMemoryMru( i_errl, i_memmru, + SRCI_PRIORITY_HIGH, + HWAS::DELAYED_DECONFIG, + HWAS::GARD_Predictive ); + + // Add traces + i_errl->collectTrace( PRDF_COMP_NAME, 512 ); + + // Commit the error log + ERRORLOG::errlCommit( i_errl, PRDF_COMP_ID ); +} + +} // end namespace PRDF diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.H b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.H new file mode 100755 index 000000000..af358dd6b --- /dev/null +++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.H @@ -0,0 +1,230 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.H $ */ +/* */ +/* IBM CONFIDENTIAL */ +/* */ +/* COPYRIGHT International Business Machines Corp. 2013 */ +/* */ +/* p1 */ +/* */ +/* Object Code Only (OCO) source materials */ +/* Licensed Internal Code Source Materials */ +/* IBM HostBoot Licensed Internal Code */ +/* */ +/* The source code for this program is not published or otherwise */ +/* divested of its trade secrets, irrespective of what has been */ +/* deposited with the U.S. Copyright Office. */ +/* */ +/* Origin: 30 */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#ifndef PRDF_CEN_MBA_IPL_CE_STATS_H +#define PRDF_CEN_MBA_IPL_CE_STATS_H + +/** @file prdfCenMbaIplCeStats.H */ + +//------------------------------------------------------------------------------ +#include <prdfExtensibleChip.H> +#include <prdfCenAddress.H> +#include <prdfCenSymbol.H> +#include <prdfCenAddress.H> +#include <map> + +namespace PRDF +{ +class ExtensibleChip; +class MemoryMru; +//------------------------------------------------------------------------------ + +/** + * This class is used for storing the CE statistics that are gathered during a + * manufacturing mode IPL for MDIA analysis. Only one instance of this object is + * meant to be used for each MBA and is stored in its data bundle. + * It is expected that when the IPL memory diagnostics is complete, MDIA will + * call the appropriate function to tell this object to analyze all statistics + * that were collected during the IPL. + */ +class CenMbaIplCeStats +{ + public: + + /** + * @brief Constructor + * @param i_mbaChip The MBA chip. + */ + explicit CenMbaIplCeStats( ExtensibleChip * i_mbaChip ) + :iv_mbaChip(i_mbaChip) {} + + /** + * @brief Destructor + */ + ~CenMbaIplCeStats() {} + + /** + * @brief Bans analysis of the given rank. + * @param i_rank The rank to ban. + */ + void banAnalysis( const CenRank & i_rank ); + + /** + * @brief Bans analysis of the given half rank. + * @param i_rank The rank. + * @param i_portSlct The port select. + * @return Non-SUCCESS if the parameters are invalid, SUCCESS otherwise. + */ + int32_t banAnalysis( const CenRank & i_rank, uint8_t i_portSlct ); + + /** + * @brief Will collect all the maintenance statistics and store them for + * analysis. Will also clear the scrub statistics counters when + * collection is complete. + * @param i_rank The rank the maintenance command stopped on. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + int32_t collectStats( const CenRank & i_rank ); + + /** + * @brief Analyzes the maintenance statistics that were gathered, making + * the appropriate callouts. + * @param o_callOutsDone TRUE if PRD made a hardware callout, FALSE + * otherwise. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + int32_t analyzeStats ( bool & o_callOutsDone ); + + /** @brief Calls out all symbols with hard CEs. + * @param i_stopRank The rank the maintenance command stopped on. + */ + int32_t calloutHardCes( const CenRank & i_stopRank ); + + private: // enums, structs, typedefs + + /** @brief The key type for each entry in iv_symMap. */ + struct SymbolKey + { + CenSymbol symbol; ///< The failing symbol + + /** @brief Overrides the '==' operator. */ + bool operator==( const SymbolKey & i ) const + { return ( symbol == i.symbol ); } + + /** @brief Overrides the '<' operator. */ + bool operator<( const SymbolKey & i ) const + { return (symbol < i.symbol ); } + }; + + + /** @brief The key to identify half rank. + * This key will be used to ban analysis, rank and dimm + * threshold analysis + */ + struct HalfRankKey + { + CenRank rank; ///< The rank + uint8_t portSlct; ///< The port select + + /** @brief Overrides the '==' operator. */ + bool operator==( const HalfRankKey & i ) const + { return ( (rank == i.rank) && (portSlct == i.portSlct) ); } + + /** @brief Overrides the '<' operator. */ + bool operator<( const HalfRankKey & i ) const + { + return ( (rank < i.rank) || + ((rank == i.rank) && (portSlct < i.portSlct)) ); + } + }; + + /** @brief The key type for each entry in iv_dramMap. */ + struct DramKey + { + CenRank rank; ///< The rank + uint8_t dram; ///< The DRAM (x8:0-17 x4:0-35) + uint8_t portSlct; ///< The port select (0-1) + + // Techinally, the port select can be derived from the DRAM value, + // however, it simplifies things to just store the port select here. + // Therefore, the port select does not need to be used in operator==() + // or operators<(). + + /** @brief Overrides the '==' operator. */ + bool operator==( const DramKey & i ) const + { return ( (rank == i.rank) && (dram == i.dram) ); } + + /** @brief Overrides the '<' operator. */ + bool operator<( const DramKey & i ) const + { return ( (rank < i.rank) || ((rank == i.rank) && (dram < i.dram)) ); } + }; + + // data type to collect all symbol statistics + typedef std::vector<SymbolKey> CESymbols; + + // data type to collect dimm specific statitics. While + // filling up data for this data type, we should ignore rank select + // and only consider dimm slct. + typedef std::map<HalfRankKey, uint32_t> CePerHalfDsMap; + // data type to collect all symbol statistics for a rank. + typedef std::map<HalfRankKey, uint32_t> CePerHalfRankMap; + // data type to store banned half ranks on which analysis is not required. + typedef std::map<HalfRankKey, bool> BannedAnalysisMap; + // data type to collect all symbol statistics for a dram. + typedef std::map<DramKey, uint32_t> CePerDramMap; + + private: // functions + + /** @brief Calls out all symbols on a dram that has exceeded threshold. + * @param o_callOutsMade TRUE if a callout was made, FALSE othewise. + * @return Non-SUCCESS if an internal function fails, SUCCESS othewise. + */ + int32_t calloutCePerDram( bool & o_callOutsMade ); + + /** @brief Calls out all symbols on a rank that has exceeded threshold. + * @param o_callOutsMade TRUE if a callout was made, FALSE othewise. + * @return Non-SUCCESS if an internal function fails, SUCCESS othewise. + */ + int32_t calloutCePerRank( bool & o_callOutsMade ); + + /** @brief Calls out all symbols on a dimm select that has exceeded + * threshold. + * @param o_callOutsMade TRUE if a callout was made, FALSE othewise. + * @return Non-SUCCESS if an internal function fails, SUCCESS othewise. + */ + int32_t calloutCePerDs( bool & o_callOutsMade ); + + /** + * @brief Add MemoryMru callout to error log and commit it. + * @param i_memmru Memory MRU. + * @param i_errl Error log. + */ + void addMruAndCommitErrl( const MemoryMru & i_memmru, errlHndl_t i_errl ); + + private: // instance variables + + /** The MBA chip. */ + ExtensibleChip * iv_mbaChip; + + /** The rank that this object is expecting to collect for. */ + CenRank iv_rankToAnalyze; + + /** A map to keep track of which half ranks have all ready been called out. + * This helps reduce excessive callouts for the same hardware. */ + BannedAnalysisMap iv_bannedAnalysis; + + /** A vector containing all data for every failing symbol. */ + CESymbols iv_ceSymbols; + + /** A map containing count for every failing dimm select. */ + CePerHalfDsMap iv_dsMap; + + /** A map containing count for every failing rank. */ + CePerHalfRankMap iv_rankMap; + + /** A map containing count for every failing DRAM. */ + CePerDramMap iv_dramMap; +}; + +} //end namespace PRDF +#endif /* PRDF_CEN_MBA_IPL_CE_STATS_H */ diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C index e329f15df..aada91165 100644 --- a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C +++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C @@ -52,24 +52,30 @@ using namespace PlatServices; enum EccErrorMask { - NO_ERROR = 0, ///< No ECC errors found - UE = 0x80, ///< UE - MPE = 0x40, ///< Chip mark placed - RCE = 0x20, ///< Retry CE - MCE = 0x10, ///< CE on chip mark + NO_ERROR = 0, ///< No ECC errors found + UE = 0x01, ///< UE + MPE = 0x02, ///< Chip mark placed + MCE = 0x04, ///< CE on chip mark + HARD_CTE = 0x08, ///< Hard CE threshold exceeed + SOFT_CTE = 0x10, ///< Soft CE threshold exceeed + INTER_CTE = 0x20, ///< Intermittent CE threshold exceeed + RETRY_CTE = 0x40, ///< Retry CE threshold exceeed }; //------------------------------------------------------------------------------ // Class Variables //------------------------------------------------------------------------------ -CenMbaTdCtlr::CMD_COMPLETE_FUNCS CenMbaTdCtlr::cv_cmdCompleteFuncs[] = +CenMbaTdCtlr::FUNCS CenMbaTdCtlr::cv_cmdCompleteFuncs[] = { &CenMbaTdCtlr::analyzeCmdComplete, // NO_OP &CenMbaTdCtlr::analyzeVcmPhase1, // VCM_PHASE_1 &CenMbaTdCtlr::analyzeVcmPhase2, // VCM_PHASE_2 &CenMbaTdCtlr::analyzeDsdPhase1, // DSD_PHASE_1 &CenMbaTdCtlr::analyzeDsdPhase2, // DSD_PHASE_2 + &CenMbaTdCtlr::analyzeTpsPhase1, // TPS_PHASE_1 + &CenMbaTdCtlr::analyzeTpsPhase2, // TPS_PHASE_2 + NULL, // RANK_SCRUB }; //------------------------------------------------------------------------------ @@ -109,6 +115,13 @@ int32_t CenMbaTdCtlr::handleCmdCompleteEvent( STEP_CODE_DATA_STRUCT & io_sc ) break; } + if ( NULL == cv_cmdCompleteFuncs[iv_tdState] ) + { + PRDF_ERR( PRDF_FUNC"Function for state %d not supported", + iv_tdState ); + o_rc = FAIL; break; + } + o_rc = (this->*cv_cmdCompleteFuncs[iv_tdState])( io_sc ); if ( SUCCESS != o_rc ) { @@ -206,8 +219,6 @@ int32_t CenMbaTdCtlr::analyzeCmdComplete( STEP_CODE_DATA_STRUCT & io_sc ) int32_t o_rc = SUCCESS; - TargetHandle_t mba = iv_mbaChip->GetChipHandle(); - do { if ( NO_OP != iv_tdState ) @@ -227,7 +238,7 @@ int32_t CenMbaTdCtlr::analyzeCmdComplete( STEP_CODE_DATA_STRUCT & io_sc ) iv_rank = CenRank( addr.getRank() ); // Get error condition which caused command to stop - uint8_t eccErrorMask = NO_ERROR; + uint16_t eccErrorMask = NO_ERROR; o_rc = checkEccErrors( eccErrorMask ); if ( SUCCESS != o_rc ) { @@ -247,30 +258,28 @@ int32_t CenMbaTdCtlr::analyzeCmdComplete( STEP_CODE_DATA_STRUCT & io_sc ) } else if ( eccErrorMask & MPE ) { - // Get the current marks in hardware. - o_rc = mssGetMarkStore( mba, iv_rank, iv_mark ); + o_rc = handleMPE( io_sc ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC"mssGetMarkStore() failed"); + PRDF_ERR( PRDF_FUNC"handleMPE() failed"); break; } - - if ( !iv_mark.getCM().isValid() ) - { - PRDF_ERR( PRDF_FUNC"No valid chip mark to verify"); - o_rc = FAIL; break; - } - - io_sc.service_data->SetErrorSig( PRDFSIG_StartVcmPhase1 ); - - CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc ); - - // Start VCM procedure - o_rc = startVcmPhase1(); - if ( SUCCESS != o_rc ) + } + else if ( isMfgCeCheckingEnabled() ) + { + // During MNFG IPL CE, we will get this condition. + // During SF read, all CE are reported as Hard CE. + // So we will only check for Hard CE threshold. + if ( eccErrorMask & HARD_CTE ) { - PRDF_ERR( PRDF_FUNC"startVcmPhase1() failed" ); - break; + io_sc.service_data->SetErrorSig( PRDFSIG_StartTpsPhase1 ); + // Start TPS Phase 1 + o_rc = startTpsPhase1(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"startTpsPhase1() failed" ); + break; + } } } else @@ -305,7 +314,7 @@ int32_t CenMbaTdCtlr::analyzeVcmPhase1( STEP_CODE_DATA_STRUCT & io_sc ) } // Get error condition which caused command to stop - uint8_t eccErrorMask = NO_ERROR; + uint16_t eccErrorMask = NO_ERROR; o_rc = checkEccErrors( eccErrorMask ); if ( SUCCESS != o_rc ) { @@ -313,7 +322,7 @@ int32_t CenMbaTdCtlr::analyzeVcmPhase1( STEP_CODE_DATA_STRUCT & io_sc ) break; } - if ( (eccErrorMask & UE) || (eccErrorMask & RCE) ) + if ( (eccErrorMask & UE) || (eccErrorMask & RETRY_CTE) ) { // Handle UE. Highest priority o_rc = handleUE( io_sc ); @@ -364,7 +373,7 @@ int32_t CenMbaTdCtlr::analyzeVcmPhase2( STEP_CODE_DATA_STRUCT & io_sc ) } // Get error condition which caused command to stop - uint8_t eccErrorMask = NO_ERROR; + uint16_t eccErrorMask = NO_ERROR; o_rc = checkEccErrors( eccErrorMask ); if ( SUCCESS != o_rc ) { @@ -448,7 +457,7 @@ int32_t CenMbaTdCtlr::analyzeDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc ) } // Get error condition which caused command to stop - uint8_t eccErrorMask = NO_ERROR; + uint16_t eccErrorMask = NO_ERROR; o_rc = checkEccErrors( eccErrorMask ); if ( SUCCESS != o_rc) { @@ -456,7 +465,7 @@ int32_t CenMbaTdCtlr::analyzeDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc ) break; } - if ( ( eccErrorMask & UE) || ( eccErrorMask & RCE ) ) + if ( ( eccErrorMask & UE) || ( eccErrorMask & RETRY_CTE ) ) { // Handle UE. Highest priority o_rc = handleUE( io_sc ); @@ -473,7 +482,7 @@ int32_t CenMbaTdCtlr::analyzeDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc ) CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc ); // Start DSD Phase 2 - startDsdPhase2(); + o_rc = startDsdPhase2(); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC"startDsdPhase2() failed" ); @@ -507,7 +516,7 @@ int32_t CenMbaTdCtlr::analyzeDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc ) } // Get error condition which caused command to stop - uint8_t eccErrorMask = NO_ERROR; + uint16_t eccErrorMask = NO_ERROR; o_rc = checkEccErrors( eccErrorMask ); if ( SUCCESS != o_rc ) { @@ -567,6 +576,144 @@ int32_t CenMbaTdCtlr::analyzeDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc ) //------------------------------------------------------------------------------ +int32_t CenMbaTdCtlr::analyzeTpsPhase1( STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[CenMbaTdCtlr::analyzeTpsPhase1] " + + int32_t o_rc = SUCCESS; + + do + { + if ( TPS_PHASE_1 != iv_tdState ) + { + PRDF_ERR( PRDF_FUNC"Invalid state machine configuration" ); + o_rc = FAIL; break; + } + + CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip ); + + o_rc = mbadb->getIplCeStats()->collectStats( iv_rank ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"collectStats() failed"); + break; + } + + // Get error condition which caused command to stop + uint16_t eccErrorMask = NO_ERROR; + o_rc = checkEccErrors( eccErrorMask ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"checkEccErrors() failed" ); + break; + } + + if ( ( eccErrorMask & UE ) || ( eccErrorMask & RETRY_CTE )) + { + // Handle UE. Highest priority + o_rc = handleUE( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"handleUE() failed" ); + break; + } + } + else if ( eccErrorMask & MPE ) + { + o_rc = handleMPE( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"handleMPE() failed"); + break; + } + } + else + { + // Start TPS Phase 2 + io_sc.service_data->SetErrorSig( PRDFSIG_StartTpsPhase2 ); + o_rc = startTpsPhase2(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"startTpsPhase2() failed" ); + break; + } + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::analyzeTpsPhase2( STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[CenMbaTdCtlr::analyzeTpsPhase2] " + + int32_t o_rc = SUCCESS; + + do + { + if ( TPS_PHASE_2 != iv_tdState ) + { + PRDF_ERR( PRDF_FUNC"Invalid state machine configuration" ); + o_rc = FAIL; break; + } + + CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip ); + + o_rc = mbadb->getIplCeStats()->calloutHardCes( iv_rank ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"calloutHardCes() failed"); + break; + } + + // Get error condition which caused command to stop + uint16_t eccErrorMask = NO_ERROR; + o_rc = checkEccErrors( eccErrorMask ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"checkEccErrors() failed" ); + break; + } + + if ( ( eccErrorMask & UE ) || ( eccErrorMask & RETRY_CTE )) + { + // Handle UE. Highest priority + o_rc = handleUE( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"handleUE() failed" ); + break; + } + } + else if ( eccErrorMask & MPE ) + { + o_rc = handleMPE( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"handleMPE() failed"); + break; + } + } + else + { + io_sc.service_data->SetErrorSig( PRDFSIG_EndTpsPhase2 ); + iv_tdState = NO_OP; + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + int32_t CenMbaTdCtlr::startVcmPhase1() { #define PRDF_FUNC "[CenMbaTdCtlr::startVcmPhase1] " @@ -587,8 +734,7 @@ int32_t CenMbaTdCtlr::startVcmPhase1() } // Start phase 1. - uint32_t stopCond = ( mss_MaintCmd::STOP_END_OF_RANK | - mss_MaintCmd::STOP_ON_END_ADDRESS | + uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_STEER_CLEANUP, @@ -635,8 +781,7 @@ int32_t CenMbaTdCtlr::startVcmPhase2() } // Start phase 2. - uint32_t stopCond = ( mss_MaintCmd::STOP_END_OF_RANK | - mss_MaintCmd::STOP_ON_END_ADDRESS | + uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::SUPERFAST_READ, @@ -691,8 +836,7 @@ int32_t CenMbaTdCtlr::startDsdPhase1() } // Start phase 1. - uint32_t stopCond = ( mss_MaintCmd::STOP_END_OF_RANK | - mss_MaintCmd::STOP_ON_END_ADDRESS | + uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_STEER_CLEANUP, @@ -739,8 +883,7 @@ int32_t CenMbaTdCtlr::startDsdPhase2() } // Start phase 2. - uint32_t stopCond = ( mss_MaintCmd::STOP_END_OF_RANK | - mss_MaintCmd::STOP_ON_END_ADDRESS | + uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::SUPERFAST_READ, @@ -767,14 +910,119 @@ int32_t CenMbaTdCtlr::startDsdPhase2() //------------------------------------------------------------------------------ -bool CenMbaTdCtlr::isInTdMode() +int32_t CenMbaTdCtlr::startTpsPhase1() { - return ( (NO_OP != iv_tdState) && (MAX_TD_STATE > iv_tdState) ); + #define PRDF_FUNC "[CenMbaTdCtlr::startTpsPhase1] " + + int32_t o_rc = SUCCESS; + + iv_tdState = TPS_PHASE_1; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + o_rc = prepareNextCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"prepareNextCmd() failed" ); + break; + } + + // We are using current state as input parameter in mnfgCeSetup. + // So it is mandatory to set iv_tdState before calling this function. + o_rc = mnfgCeSetup(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"mnfgCeSetup() failed" ); + break; + } + + // Start phase 1. + uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | + mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); + + iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_SCRUB, + mba, iv_rank, stopCond ); + if ( NULL == iv_mssCmd ) + { + PRDF_ERR( PRDF_FUNC"createMssCmd() failed"); + o_rc = FAIL; break; + } + + o_rc = iv_mssCmd->setupAndExecuteCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"setupAndExecuteCmd() failed" ); + break; + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC } //------------------------------------------------------------------------------ -int32_t CenMbaTdCtlr::checkEccErrors( uint8_t & o_eccErrorMask ) +int32_t CenMbaTdCtlr::startTpsPhase2() +{ + #define PRDF_FUNC "[CenMbaTdCtlr::startTpsPhase2] " + + int32_t o_rc = SUCCESS; + + iv_tdState = TPS_PHASE_2; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + o_rc = prepareNextCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"prepareNextCmd() failed" ); + break; + } + + // We are using current state as input parameter in mnfgCeSetup. + // So it is mandatory to set iv_tdState before calling this function. + o_rc = mnfgCeSetup(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"mnfgCeSetup() failed" ); + break; + } + + // Start phase 2. + uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | + mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); + + iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_SCRUB, + mba, iv_rank, stopCond ); + if ( NULL == iv_mssCmd ) + { + PRDF_ERR( PRDF_FUNC"createMssCmd() failed"); + o_rc = FAIL; break; + } + + o_rc = iv_mssCmd->setupAndExecuteCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"setupAndExecuteCmd() failed" ); + break; + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::checkEccErrors( uint16_t & o_eccErrorMask ) { #define PRDF_FUNC "[CenMbaTdCtlr::checkEccErrors] " @@ -821,7 +1069,20 @@ int32_t CenMbaTdCtlr::checkEccErrors( uint8_t & o_eccErrorMask ) if ( mbsEccFir->IsBitSet(38) ) o_eccErrorMask |= MCE; if ( mbsEccFir->IsBitSet(41) ) o_eccErrorMask |= UE; - if ( mbsEccFir->IsBitSet(42) ) o_eccErrorMask |= RCE; + + SCAN_COMM_REGISTER_CLASS * mbaSpaFir = + iv_mbaChip->getRegister("MBASPA"); + o_rc = mbaSpaFir->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"Failed to read MBASPA Regsiter"); + break; + } + + if ( mbaSpaFir->IsBitSet(1) ) o_eccErrorMask |= HARD_CTE; + if ( mbaSpaFir->IsBitSet(2) ) o_eccErrorMask |= SOFT_CTE; + if ( mbaSpaFir->IsBitSet(3) ) o_eccErrorMask |= INTER_CTE; + if ( mbaSpaFir->IsBitSet(4) ) o_eccErrorMask |= RETRY_CTE; } while(0); @@ -846,6 +1107,7 @@ int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc ) io_sc.service_data->SetServiceCall(); TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip ); do { @@ -892,6 +1154,13 @@ int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc ) } callouts.insert( callouts.end(), dimms.begin(), dimms.end() ); + + if ( isMfgCeCheckingEnabled() ) + { + // As we are doing callout for UE, we dont need to do callout + // during CE for this rank on given port + mbadb->getIplCeStats()->banAnalysis( iv_rank, ps ); + } } if ( SUCCESS != o_rc ) break; @@ -909,6 +1178,13 @@ int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc ) PRDF_ERR( PRDF_FUNC"getConnectedDimms() failed" ); o_rc = FAIL; break; } + + if ( isMfgCeCheckingEnabled() ) + { + // As we are doing callout for UE, we dont need to do callout + // during CE for this rank on both port + mbadb->getIplCeStats()->banAnalysis( iv_rank); + } } // Callout all DIMMs in the list. @@ -927,6 +1203,50 @@ int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc ) //------------------------------------------------------------------------------ +int32_t CenMbaTdCtlr::handleMPE( STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[CenMbaTdCtlr::handleMPE] " + + int32_t o_rc = SUCCESS; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + // Get the current marks in hardware. + o_rc = mssGetMarkStore( mba, iv_rank, iv_mark ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"mssGetMarkStore() failed"); + break; + } + + if ( !iv_mark.getCM().isValid() ) + { + PRDF_ERR( PRDF_FUNC"No valid chip mark to verify"); + o_rc = FAIL; break; + } + + io_sc.service_data->SetErrorSig( PRDFSIG_StartVcmPhase1 ); + + CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc ); + + // Start VCM procedure + o_rc = startVcmPhase1(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"startVcmPhase1() failed" ); + break; + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} +//------------------------------------------------------------------------------ + int32_t CenMbaTdCtlr::handleMCE_VCM2( STEP_CODE_DATA_STRUCT & io_sc ) { #define PRDF_FUNC "[CenMbaTdCtlr::handleMCE_VCM2] " @@ -1285,6 +1605,20 @@ int32_t CenMbaTdCtlr::prepareNextCmd() break; } + SCAN_COMM_REGISTER_CLASS * spaAnd = + iv_mbaChip->getRegister("MBASPA_AND"); + spaAnd->setAllBits(); + + // clear threshold exceeded attentions + spaAnd->SetBitFieldJustified( 1, 4, 0 ); + + o_rc = spaAnd->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"Write() failed on MBASPA_AND" ); + o_rc = FAIL; break; + } + } while (0); return o_rc; @@ -1338,5 +1672,69 @@ int32_t CenMbaTdCtlr::signalMdiaCmdComplete() #undef PRDF_FUNC } +// Do the setup for mnfg IPL CE +int32_t CenMbaTdCtlr::mnfgCeSetup() +{ + #define PRDF_FUNC "[CenMbaTdCtlr::mnfgCeSetup] " + + int32_t o_rc = SUCCESS; + + do + { + CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip ); + ExtensibleChip * membChip = mbadb->getMembChip(); + if ( NULL == membChip ) + { + PRDF_ERR( PRDF_FUNC"getMembChip() failed" ); + o_rc = FAIL; break; + } + + uint32_t mbaPos = getTargetPosition( iv_mbaChip->GetChipHandle() ); + + const char * reg_str = ( 0 == mbaPos ) ? "MBA0_MBSTR" : "MBA1_MBSTR"; + SCAN_COMM_REGISTER_CLASS * mbstr = membChip->getRegister( reg_str ); + o_rc = mbstr->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"Read() failed on %s", reg_str ); + break; + } + + if ( TPS_PHASE_1 == iv_tdState ) + { + // Enable per-symbol error counters to count soft CEs + mbstr->SetBit(55); + mbstr->SetBit(56); + // Disable per-symbol error counters to count hard CEs + mbstr->ClearBit(57); + } + else if ( TPS_PHASE_2 == iv_tdState ) + { + // Disable per-symbol error counters to count soft CEs + mbstr->ClearBit(55); + mbstr->ClearBit(56); + // Enable per-symbol error counters to count hard CEs + mbstr->SetBit(57); + } + else + { + PRDF_ERR( PRDF_FUNC"Inavlid State:%u", iv_tdState ); + o_rc = FAIL; break; + } + + o_rc = mbstr->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"Write() failed on %s", reg_str ); + break; + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + } // end namespace PRDF diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H index 807211e30..fd4ffd015 100644 --- a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H +++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H @@ -40,24 +40,8 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon { private: // constants, enums - /** - * @brief Lists all possible states of TD controller - * @note These enums are used as array indexes to cv_cmdCompleteFuncs and - * the last entry will be used to get the size of the array. - */ - enum TdState - { - NO_OP = 0, ///< No TD procedures in place. - VCM_PHASE_1, ///< Verify Chip Mark phase 1. - VCM_PHASE_2, ///< Verify Chip Mark phase 2. - DSD_PHASE_1, ///< DRAM Spare Deploy phase 1. - DSD_PHASE_2, ///< DRAM Spare Deploy phase 2. - MAX_TD_STATE ///< The maximum number of TD states. - }; - // Function pointers for maintenance command complete events. - typedef int32_t (CenMbaTdCtlr::*CMD_COMPLETE_FUNCS) - ( STEP_CODE_DATA_STRUCT & io_sc ); + typedef int32_t (CenMbaTdCtlr::*FUNCS)( STEP_CODE_DATA_STRUCT & io_sc ); public: // functions @@ -72,7 +56,7 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon * @param i_mbaChip An MBA chip. */ explicit CenMbaTdCtlr( ExtensibleChip * i_mbaChip ) : - CenMbaTdCtlrCommon(i_mbaChip), iv_tdState(NO_OP) + CenMbaTdCtlrCommon(i_mbaChip) {} public: // Overloaded functions @@ -88,13 +72,15 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon int32_t analyzeVcmPhase2( STEP_CODE_DATA_STRUCT & io_sc ); int32_t analyzeDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc ); int32_t analyzeDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc ); + int32_t analyzeTpsPhase1( STEP_CODE_DATA_STRUCT & io_sc ); + int32_t analyzeTpsPhase2( STEP_CODE_DATA_STRUCT & io_sc ); int32_t startVcmPhase1(); int32_t startVcmPhase2(); int32_t startDsdPhase1(); int32_t startDsdPhase2(); - - bool isInTdMode(); + int32_t startTpsPhase1(); + int32_t startTpsPhase2(); private: // functions @@ -104,7 +90,7 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon * occurred. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ - int32_t checkEccErrors( uint8_t & o_eccErrorMask ); + int32_t checkEccErrors( uint16_t & o_eccErrorMask ); /** * @brief Handle UEs during TD analysis. @@ -114,6 +100,14 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon int32_t handleUE( STEP_CODE_DATA_STRUCT & io_sc ); /** + * @brief Handle MPE event + * @param io_sc Service data collector. + * @note This will start VCM phase 1. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + int32_t handleMPE( STEP_CODE_DATA_STRUCT & io_sc ); + + /** * @brief Handle MCE event during VCM Phase 2 * @param io_sc Service data collector. * @note This will update bad bits information in VPD, set callouts, and @@ -155,16 +149,21 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon */ int32_t signalMdiaCmdComplete(); - private: // instance variables + /** + * @brief Does mnfg setup for CE threshold. + * @note Before calling this function, set current state to new + * value (TPS_PHASE_1/ TPS_PHASE_2). + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + int32_t mnfgCeSetup(); - /** The targeted diagnostics state variable (see enum TdState). */ - TdState iv_tdState; + private: // instance variables /** Array of functions pointers for TD controller states. This is used to * determine the next course of action after a maintenance command complete * attention. */ - static CMD_COMPLETE_FUNCS cv_cmdCompleteFuncs[MAX_TD_STATE]; + static FUNCS cv_cmdCompleteFuncs[MAX_TD_STATE]; }; // CenMbaTdCtlr diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaThresholds.H b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaThresholds.H new file mode 100755 index 000000000..45fdd3282 --- /dev/null +++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaThresholds.H @@ -0,0 +1,54 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/plat/pegasus/prdfCenMbaThresholds.H $ */ +/* */ +/* IBM CONFIDENTIAL */ +/* */ +/* COPYRIGHT International Business Machines Corp. 2013 */ +/* */ +/* p1 */ +/* */ +/* Object Code Only (OCO) source materials */ +/* Licensed Internal Code Source Materials */ +/* IBM HostBoot Licensed Internal Code */ +/* */ +/* The source code for this program is not published or otherwise */ +/* divested of its trade secrets, irrespective of what has been */ +/* deposited with the U.S. Copyright Office. */ +/* */ +/* Origin: 30 */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#ifndef __PRDF_CEN_MBA_THRESHOLDS_H +#define __PRDF_CEN_MBA_THRESHOLDS_H + +/** @file prdfCenMbaThresholds.H + * @brief Utility functions used to get specific Centaur thresholds + * during IPL time. + */ + +#include <prdfCenMbaThresholds_common.H> +#include <prdfMfgThresholdMgr.H> +#include <prdfMfgThresholds.H> + +namespace PRDF +{ + +class ExtensibleChip; + +/** + * @brief Returns the manufacturing memory CE thresholds Per 2GB ( base ). + */ +inline uint8_t getMnfgCeTh() +{ + return MfgThresholdMgr::getInstance()-> + getThreshold( PRDF_CEN_MBA_IPL_SOFT_CE_TH_ALGO ); + +} + +} // end namespace PRDF + +#endif /* __PRDF_CEN_MBA_THRESHOLDS_H */ + diff --git a/src/usr/diag/prdf/prdfMain.C b/src/usr/diag/prdf/prdfMain.C index 94bebcb31..79806b071 100755 --- a/src/usr/diag/prdf/prdfMain.C +++ b/src/usr/diag/prdf/prdfMain.C @@ -30,10 +30,19 @@ */ #include <prdfMain.H> +#include <prdfCenMbaDataBundle.H> +#include <prdfExtensibleChip.H> +#include <prdfErrlUtil.H> +#include <prdfPlatServices.H> + +using namespace TARGETING; +using namespace HWAS; namespace PRDF { +using namespace PlatServices; + //------------------------------------------------------------------------------ // Platform specific helper function for PRDF::initialize() //------------------------------------------------------------------------------ @@ -43,4 +52,55 @@ void initPlatSpecific() // Currently no-op in Hostboot. } +//------------------------------------------------------------------------------ +// External functions - declared in prdfMain.H +//------------------------------------------------------------------------------ + +int32_t analyzeIplCEStats( TargetHandle_t i_mba, bool &o_calloutMade ) +{ + #define PRDF_FUNC "PRDF::analyzeIplCEStats" + + PRDF_ENTER( PRDF_FUNC"(0x%08x)", getHuid(i_mba) ); + + int32_t o_rc = SUCCESS; + o_calloutMade = false; + + ExtensibleChip * mbaChip = (ExtensibleChip *)systemPtr->GetChip( i_mba ); + CenMbaDataBundle * mbadb = getMbaDataBundle( mbaChip ); + + o_rc = mbadb->getIplCeStats()->analyzeStats( o_calloutMade ); + + if ( SUCCESS != o_rc ) + { + PRDF_ERR( "["PRDF_FUNC"] analyzeStats() failed"); + + // Get user data + uint64_t ud12 = PRDF_GET_UINT64_FROM_UINT32( getHuid(i_mba), 0 ); + uint64_t ud34 = PRDF_GET_UINT64_FROM_UINT32( PRDFSIG_MnfgIplFail, 0 ); + + // Create error log + errlHndl_t errl = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, // severity + PRDF_MNFG_IPL_CE_ANALYSIS, // module ID + PRDF_DETECTED_FAIL_SOFTWARE, // reason code + ud12, ud34 ); // user data 1-4 + + // Add 2nd level support + errl->addProcedureCallout( EPUB_PRC_LVL_SUPP, SRCI_PRIORITY_HIGH ); + + // Add traces + errl->collectTrace( PRDF_COMP_NAME, 512 ); + + // Commit the error log + ERRORLOG::errlCommit( errl, PRDF_COMP_ID ); + } + + PRDF_EXIT( PRDF_FUNC"(0x%08x), o_calloutMade:%u", + getHuid(i_mba), o_calloutMade ); + + return o_rc; + + #undef PRDF_FUNC +} + } // end namespace PRDF diff --git a/src/usr/diag/prdf/prdf_hb_only.mk b/src/usr/diag/prdf/prdf_hb_only.mk index a6d44d64c..97318352d 100644 --- a/src/usr/diag/prdf/prdf_hb_only.mk +++ b/src/usr/diag/prdf/prdf_hb_only.mk @@ -33,6 +33,7 @@ PRDF_RULE_PLUGINS_PEGASUS_HB = \ ################################################################################ prd_pegasus_specific_HB = \ + prdfCenMbaIplCeStats.o \ prdfDramRepairs.o \ prdfPlatCalloutUtil.o |