diff options
Diffstat (limited to 'src/usr/diag/prdf/plat/pegasus')
-rw-r--r-- | src/usr/diag/prdf/plat/pegasus/prdfCenMbaDataBundle.H | 23 | ||||
-rwxr-xr-x | src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.C | 506 | ||||
-rwxr-xr-x | src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.H | 230 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C | 492 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H | 49 | ||||
-rwxr-xr-x | src/usr/diag/prdf/plat/pegasus/prdfCenMbaThresholds.H | 54 |
6 files changed, 1280 insertions, 74 deletions
diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaDataBundle.H b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaDataBundle.H index c6dcf5ccc..2c2ba1237 100644 --- a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaDataBundle.H +++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaDataBundle.H @@ -31,6 +31,7 @@ #include <prdfCenMbaDataBundle_common.H> #include <diag/mdia/mdia.H> +#include <prdfCenMbaIplCeStats.H> //------------------------------------------------------------------------------ @@ -50,13 +51,27 @@ class CenMbaDataBundle : public CenMbaDataBundleCommon */ explicit CenMbaDataBundle( ExtensibleChip * i_mbaChip ) : CenMbaDataBundleCommon(i_mbaChip), iv_sendCmdCompleteMsg(false), - iv_cmdCompleteMsgData() + iv_cmdCompleteMsgData(), iv_iplCeStats(NULL) {} /** * @brief Destructor. */ - ~CenMbaDataBundle() {} + ~CenMbaDataBundle() + { + delete iv_iplCeStats; iv_iplCeStats = NULL; + } + + /** @return The IPL CE statistics object. */ + CenMbaIplCeStats * getIplCeStats() + { + if ( NULL == iv_iplCeStats ) + { + iv_iplCeStats = new CenMbaIplCeStats( iv_mbaChip ); + } + + return iv_iplCeStats; + } private: // functions @@ -74,6 +89,10 @@ class CenMbaDataBundle : public CenMbaDataBundleCommon */ MDIA::MaintCommandEventType iv_cmdCompleteMsgData; + private: // instance variables + + CenMbaIplCeStats * iv_iplCeStats; ///< MNFG IPL CE statistics object + }; //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.C b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.C new file mode 100755 index 000000000..1b3541865 --- /dev/null +++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.C @@ -0,0 +1,506 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.C $ */ +/* */ +/* IBM CONFIDENTIAL */ +/* */ +/* COPYRIGHT International Business Machines Corp. 2013 */ +/* */ +/* p1 */ +/* */ +/* Object Code Only (OCO) source materials */ +/* Licensed Internal Code Source Materials */ +/* IBM HostBoot Licensed Internal Code */ +/* */ +/* The source code for this program is not published or otherwise */ +/* divested of its trade secrets, irrespective of what has been */ +/* deposited with the U.S. Copyright Office. */ +/* */ +/* Origin: 30 */ +/* */ +/* IBM_PROLOG_END_TAG */ + +/** @file prdfCenMbaIplCeStats.C + * @brief Contains IPL CE related code. + */ + +// Framework includes +#include <iipServiceDataCollector.h> +#include <prdfEnums.H> +#include <prdfErrlUtil.H> +#include <prdfExtensibleChip.H> +#include <prdfGlobal.H> +#include <prdfPfa5Data.h> +#include <prdf_service_codes.H> + +// Pegasus includes +#include <prdfCenMbaExtraSig.H> +#include <prdfCenMbaIplCeStats.H> +#include <prdfCenMbaThresholds.H> +#include <prdfCenMemUtils.H> +#include <prdfMemoryMru.H> +#include <prdfPlatCalloutUtil.H> + +using namespace TARGETING; + +namespace PRDF +{ + +using namespace PlatServices; +using namespace HWAS; + +//------------------------------------------------------------------------------ + +void CenMbaIplCeStats::banAnalysis( const CenRank & i_rank ) +{ + + for ( uint8_t i = 0; i < MAX_PORT_PER_MBA; i++ ) + { + HalfRankKey banKey = { i_rank, i }; + iv_bannedAnalysis[banKey] = true; + } +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaIplCeStats::banAnalysis( const CenRank & i_rank, + uint8_t i_portSlct ) +{ + int32_t o_rc = SUCCESS; + + do + { + if ( i_portSlct >= MAX_PORT_PER_MBA ) + { + PRDF_ERR("[banAnalysis] i_portSlct (0x%02x) is invalid", + i_portSlct ); + o_rc = FAIL; + break; + } + + HalfRankKey banKey = { i_rank, i_portSlct }; + iv_bannedAnalysis[banKey] = true; + + } while (0); + + return o_rc; +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaIplCeStats::collectStats( const CenRank & i_stopRank ) +{ + #define PRDF_FUNC "[CenMbaIplCeStats::collectStats] " + int32_t o_rc = SUCCESS; + do + { + MemUtils::MaintSymbols symData; + o_rc = MemUtils::collectCeStats( iv_mbaChip, symData, i_stopRank); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"MemUtils::collectCeStats() failed. MBA:0X%08X", + getHuid( iv_mbaChip->GetChipHandle() ) ); + break; + } + + // if size of stats collected is zero, it may mean some symbol + // has gone beyond maximum value. But this is only valid for DD1 + // and has a very low probability. So ignoring this case. + + for ( uint32_t i = 0; i < symData.size(); i++ ) + { + uint8_t dimmSlct = i_stopRank.getDimmSlct(); + uint8_t dram = symData[i].symbol.getDram(); + uint8_t portSlct = symData[i].symbol.getPortSlct(); + + // Check if analysis is banned. + HalfRankKey banKey = { i_stopRank, portSlct }; + if ( iv_bannedAnalysis[banKey] ) + continue; + + // Update iv_ceSymbols with the new symbol data. + SymbolKey symkey = { symData[i].symbol }; + iv_ceSymbols.push_back (symkey ); + + // Increment the soft CEs per DRAM. + DramKey dramKey = { i_stopRank, dram, portSlct }; + iv_dramMap[dramKey]++; + + // Increment the soft CEs per half rank. + HalfRankKey rankKey = { i_stopRank, portSlct }; + iv_rankMap[rankKey]++; + + // In case of dimm Slct , rank select does not matter + CenRank dimmRank( dimmSlct, 0); + // Increment the soft CEs per half dimm select. + HalfRankKey dsKey = { dimmRank, portSlct }; + iv_dsMap[dsKey]++; + } + + } while (0); + + // We have to clear all stats before giving control back to MDIA.. + // This is done by setting up MBSTRQ[53] bit + // We are doing cleanup in TdController code, + // So not clearing up stats here. + return o_rc; + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaIplCeStats::analyzeStats( bool & o_callOutsMade ) +{ + #define PRDF_FUNC "CenMbaIplCeStats::analyzeStats " + int32_t o_rc = SUCCESS; + + o_callOutsMade = false; + + do + { + TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle(); + + o_rc = calloutCePerDram( o_callOutsMade ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC" calloutCePerDram() failed. MBA:0X%08X", + getHuid( mbaTrgt ) ); + break; + } + + o_rc = calloutCePerRank( o_callOutsMade ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"calloutCePerRank() failed. MBA:0X%08X", + getHuid( mbaTrgt ) ); + break; + } + + o_rc = calloutCePerDs( o_callOutsMade ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC" calloutCePerDs() failed. MBA:0X%08X", + getHuid( mbaTrgt ) ); + break; + } + + } while (0); + + return o_rc; + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaIplCeStats::calloutHardCes( const CenRank & i_stopRank ) +{ + #define PRDF_FUNC "[CenMbaIplCeStats::calloutHardCes] " + TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle(); + int32_t o_rc = SUCCESS; + do + { + MemUtils::MaintSymbols symData; + o_rc = MemUtils::collectCeStats( iv_mbaChip, symData, i_stopRank); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"MemUtils::collectCeStats() failed. MBA:0X%08X", + getHuid( iv_mbaChip->GetChipHandle() ) ); + break; + } + + for ( uint32_t i = 0; i < symData.size(); i++ ) + { + uint8_t portSlct = symData[i].symbol.getPortSlct(); + + // Check if analysis is banned. + HalfRankKey banKey = { i_stopRank, portSlct }; + + bool& isBanned = iv_bannedAnalysis[banKey]; + + if ( isBanned ) + continue; + + // At this point a hard CE was found, callout the symbol. + MemoryMru memMru ( mbaTrgt, symData[i].symbol.getRank(), + symData[i].symbol ); + + // We are creating and committing error log here. It is different + // from rest of attention flow. We could have set the callout + // values in sdc but it would have created confusion in ffdc if + // we also get vcm/ue at same time. + errlHndl_t l_errl = NULL; + + PRDF_CREATE_ERRL( l_errl, + ERRL_SEV_PREDICTIVE, + ERRL_ETYPE_NOT_APPLICABLE, + SRCI_ERR_INFO, + SRCI_NO_ATTR, + PRDF_MNFG_IPL_CE_ANALYSIS, + LIC_REFCODE, + PRDF_DETECTED_FAIL_HARDWARE, + getHuid( mbaTrgt ), + 0, PRDFSIG_MnfgIplHardCE, 0); + addMruAndCommitErrl( memMru, l_errl); + + // Ban the half rank. + isBanned = true; + } + }while(0); + return o_rc; + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaIplCeStats::calloutCePerDram( bool & o_callOutsMade ) +{ + #define PRDF_FUNC "[CenMbaIplCeStats::calloutCePerDram] " + int32_t o_rc = SUCCESS; + + TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle(); + + for ( CePerDramMap::iterator dramIter = iv_dramMap.begin(); + dramIter != iv_dramMap.end(); dramIter++ ) + { + // First, check if this half rank is banned from analysis. + HalfRankKey banKey = { dramIter->first.rank, + dramIter->first.portSlct }; + + // Check if the rank has already been banned. Note that [] will create + // the an entry if one does not exist, so used find() instead to check + // for existence in the map. + if ( iv_bannedAnalysis.end() != iv_bannedAnalysis.find(banKey) ) + continue; + + // Get the CEs per DRAM threshold. + uint16_t dramTh, junk0, junk1; + o_rc = getMnfgMemCeTh( iv_mbaChip, dramIter->first.rank, dramTh, + junk0, junk1 ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getMnfgMemCeTh() failed. MBA:0x%08X", + getHuid( mbaTrgt ) ); + break; + } + + // Now, check if a threshold has been reached. If not, continue to the + // next entry in iv_dsMap. + if ( dramIter->second <= dramTh ) + continue; + + // At this point a threshold has been reached. Callout a single symbol + // found in this dram. + for ( CESymbols::iterator symIter = iv_ceSymbols.begin(); + symIter != iv_ceSymbols.end(); symIter++ ) + { + if ( (dramIter->first.rank == symIter->symbol.getRank() ) && + (dramIter->first.dram == symIter->symbol.getDram() ) ) + { + MemoryMru memMru ( mbaTrgt, symIter->symbol.getRank() , + symIter->symbol ); + + errlHndl_t l_errl = NULL; + + PRDF_CREATE_ERRL( l_errl, + ERRL_SEV_PREDICTIVE, + ERRL_ETYPE_NOT_APPLICABLE, + SRCI_ERR_INFO, + SRCI_NO_ATTR, + PRDF_MNFG_IPL_CE_ANALYSIS, + LIC_REFCODE, + PRDF_DETECTED_FAIL_HARDWARE, + getHuid( mbaTrgt ), + 0, PRDFSIG_MnfgIplDramCTE, 0); + + addMruAndCommitErrl( memMru, l_errl); + + // Ban the half rank. + iv_bannedAnalysis[banKey] = true; + o_callOutsMade = true; + + // Only one symbol needs to be called out, so exit on first + // occurance. + break; + } + } + } + return o_rc; + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaIplCeStats::calloutCePerRank( bool & o_callOutsMade ) +{ + #define PRDF_FUNC "[CenMbaIplCeStats::calloutCePerRank] " + int32_t o_rc = SUCCESS; + + TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle(); + + for ( CePerHalfRankMap::iterator rankIter = iv_rankMap.begin(); + rankIter != iv_rankMap.end(); rankIter++ ) + { + // First, check if this half rank is banned from analysis. + HalfRankKey banKey = { rankIter->first.rank, + rankIter->first.portSlct }; + + // Check if the rank has already been banned. Note that [] will create + // the an entry if one does not exist, so used find() instead to check + // for existence in the map. + if ( iv_bannedAnalysis.end() != iv_bannedAnalysis.find(banKey) ) + continue; + + // Get the CEs per rank threshold. + uint16_t junk0, rankTh, junk1; + o_rc = getMnfgMemCeTh( iv_mbaChip, rankIter->first.rank, junk0, + rankTh, junk1 ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getMnfgMemCeTh() failed. MBA:0x%08X", + getHuid( mbaTrgt ) ); + break; + } + + // Now, check if a threshold has been reached. If not, continue to the + // next entry in iv_rankMap. + if ( rankIter->second <= rankTh ) + continue; + + // At this point a threshold has been reached. Callout a single symbol + // found in this rank. + for ( CESymbols::iterator symIter = iv_ceSymbols.begin(); + symIter != iv_ceSymbols.end(); symIter++ ) + { + if ( (rankIter->first.rank == symIter->symbol.getRank() ) && + (rankIter->first.portSlct == + symIter->symbol.getPortSlct()) ) + { + MemoryMru memMru ( mbaTrgt, symIter->symbol.getRank() , + symIter->symbol ); + + errlHndl_t l_errl = NULL; + + PRDF_CREATE_ERRL( l_errl, + ERRL_SEV_PREDICTIVE, + ERRL_ETYPE_NOT_APPLICABLE, + SRCI_ERR_INFO, + SRCI_NO_ATTR, + PRDF_MNFG_IPL_CE_ANALYSIS, + LIC_REFCODE, + PRDF_DETECTED_FAIL_HARDWARE, + getHuid( mbaTrgt ), + 0, PRDFSIG_MnfgIplRankCTE, 0); + + addMruAndCommitErrl( memMru, l_errl); + // Ban the half rank. + iv_bannedAnalysis[banKey] = true; + o_callOutsMade = true; + + // Only one symbol needs to be called out, so exit on first + // occurance. + break; + } + } + } + return o_rc; + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaIplCeStats::calloutCePerDs( bool & o_callOutsMade ) +{ + #define PRDF_FUNC "[CenMbaIplCeStats::calloutCePerDs] " + int32_t o_rc = SUCCESS; + + TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle(); + + for ( CePerHalfDsMap::iterator dsIter = iv_dsMap.begin(); + dsIter != iv_dsMap.end(); dsIter++ ) + { + // First, check if this half dimm select is banned from analysis. + HalfRankKey banKey = { dsIter->first.rank, + dsIter->first.portSlct }; + + // Check if the rank has already been banned. Note that [] will create + // the an entry if one does not exist, so used find() instead to check + // for existence in the map. + if ( iv_bannedAnalysis.end() != iv_bannedAnalysis.find(banKey) ) + continue; + + // Get the CEs per dimm select threshold. + uint16_t junk0, junk1, dsTh; + o_rc = getMnfgMemCeTh( iv_mbaChip, dsIter->first.rank, junk0, + junk1, dsTh ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getMnfgMemCeTh() failed. MBA:0x%08X", + getHuid( mbaTrgt ) ); + break; + } + + // Now, check if a threshold has been reached. If not, continue to the + // next entry in iv_dsMap. + if ( dsIter->second <= dsTh ) + continue; + + // At this point a threshold has been reached. Callout a single symbol + // found in this dimm select. + for ( CESymbols::iterator symIter = iv_ceSymbols.begin(); + symIter != iv_ceSymbols.end(); symIter++ ) + { + if ( (dsIter->first.rank.getDimmSlct() == + symIter->symbol.getRank().getDimmSlct() ) + && (dsIter->first.portSlct == + symIter->symbol.getPortSlct()) ) + { + MemoryMru memMru ( mbaTrgt, symIter->symbol.getRank() , + symIter->symbol ); + + errlHndl_t l_errl = NULL; + PRDF_CREATE_ERRL( l_errl, + ERRL_SEV_PREDICTIVE, + ERRL_ETYPE_NOT_APPLICABLE, + SRCI_ERR_INFO, + SRCI_NO_ATTR, + PRDF_MNFG_IPL_CE_ANALYSIS, + LIC_REFCODE, + PRDF_DETECTED_FAIL_HARDWARE, + getHuid(mbaTrgt), + 0, PRDFSIG_MnfgIplDsCTE, 0); + + addMruAndCommitErrl( memMru, l_errl); + // Ban the half dimm select. + iv_bannedAnalysis[banKey] = true; + o_callOutsMade = true; + + // Only one symbol needs to be called out, so exit on first + // occurance. + break; + } + } + } + return o_rc; + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +void CenMbaIplCeStats::addMruAndCommitErrl( const MemoryMru & i_memmru, + errlHndl_t i_errl ) +{ + // Add MemoryMru callouts and FFDC + CalloutUtil::calloutMemoryMru( i_errl, i_memmru, + SRCI_PRIORITY_HIGH, + HWAS::DELAYED_DECONFIG, + HWAS::GARD_Predictive ); + + // Add traces + i_errl->collectTrace( PRDF_COMP_NAME, 512 ); + + // Commit the error log + ERRORLOG::errlCommit( i_errl, PRDF_COMP_ID ); +} + +} // end namespace PRDF diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.H b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.H new file mode 100755 index 000000000..af358dd6b --- /dev/null +++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.H @@ -0,0 +1,230 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.H $ */ +/* */ +/* IBM CONFIDENTIAL */ +/* */ +/* COPYRIGHT International Business Machines Corp. 2013 */ +/* */ +/* p1 */ +/* */ +/* Object Code Only (OCO) source materials */ +/* Licensed Internal Code Source Materials */ +/* IBM HostBoot Licensed Internal Code */ +/* */ +/* The source code for this program is not published or otherwise */ +/* divested of its trade secrets, irrespective of what has been */ +/* deposited with the U.S. Copyright Office. */ +/* */ +/* Origin: 30 */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#ifndef PRDF_CEN_MBA_IPL_CE_STATS_H +#define PRDF_CEN_MBA_IPL_CE_STATS_H + +/** @file prdfCenMbaIplCeStats.H */ + +//------------------------------------------------------------------------------ +#include <prdfExtensibleChip.H> +#include <prdfCenAddress.H> +#include <prdfCenSymbol.H> +#include <prdfCenAddress.H> +#include <map> + +namespace PRDF +{ +class ExtensibleChip; +class MemoryMru; +//------------------------------------------------------------------------------ + +/** + * This class is used for storing the CE statistics that are gathered during a + * manufacturing mode IPL for MDIA analysis. Only one instance of this object is + * meant to be used for each MBA and is stored in its data bundle. + * It is expected that when the IPL memory diagnostics is complete, MDIA will + * call the appropriate function to tell this object to analyze all statistics + * that were collected during the IPL. + */ +class CenMbaIplCeStats +{ + public: + + /** + * @brief Constructor + * @param i_mbaChip The MBA chip. + */ + explicit CenMbaIplCeStats( ExtensibleChip * i_mbaChip ) + :iv_mbaChip(i_mbaChip) {} + + /** + * @brief Destructor + */ + ~CenMbaIplCeStats() {} + + /** + * @brief Bans analysis of the given rank. + * @param i_rank The rank to ban. + */ + void banAnalysis( const CenRank & i_rank ); + + /** + * @brief Bans analysis of the given half rank. + * @param i_rank The rank. + * @param i_portSlct The port select. + * @return Non-SUCCESS if the parameters are invalid, SUCCESS otherwise. + */ + int32_t banAnalysis( const CenRank & i_rank, uint8_t i_portSlct ); + + /** + * @brief Will collect all the maintenance statistics and store them for + * analysis. Will also clear the scrub statistics counters when + * collection is complete. + * @param i_rank The rank the maintenance command stopped on. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + int32_t collectStats( const CenRank & i_rank ); + + /** + * @brief Analyzes the maintenance statistics that were gathered, making + * the appropriate callouts. + * @param o_callOutsDone TRUE if PRD made a hardware callout, FALSE + * otherwise. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + int32_t analyzeStats ( bool & o_callOutsDone ); + + /** @brief Calls out all symbols with hard CEs. + * @param i_stopRank The rank the maintenance command stopped on. + */ + int32_t calloutHardCes( const CenRank & i_stopRank ); + + private: // enums, structs, typedefs + + /** @brief The key type for each entry in iv_symMap. */ + struct SymbolKey + { + CenSymbol symbol; ///< The failing symbol + + /** @brief Overrides the '==' operator. */ + bool operator==( const SymbolKey & i ) const + { return ( symbol == i.symbol ); } + + /** @brief Overrides the '<' operator. */ + bool operator<( const SymbolKey & i ) const + { return (symbol < i.symbol ); } + }; + + + /** @brief The key to identify half rank. + * This key will be used to ban analysis, rank and dimm + * threshold analysis + */ + struct HalfRankKey + { + CenRank rank; ///< The rank + uint8_t portSlct; ///< The port select + + /** @brief Overrides the '==' operator. */ + bool operator==( const HalfRankKey & i ) const + { return ( (rank == i.rank) && (portSlct == i.portSlct) ); } + + /** @brief Overrides the '<' operator. */ + bool operator<( const HalfRankKey & i ) const + { + return ( (rank < i.rank) || + ((rank == i.rank) && (portSlct < i.portSlct)) ); + } + }; + + /** @brief The key type for each entry in iv_dramMap. */ + struct DramKey + { + CenRank rank; ///< The rank + uint8_t dram; ///< The DRAM (x8:0-17 x4:0-35) + uint8_t portSlct; ///< The port select (0-1) + + // Techinally, the port select can be derived from the DRAM value, + // however, it simplifies things to just store the port select here. + // Therefore, the port select does not need to be used in operator==() + // or operators<(). + + /** @brief Overrides the '==' operator. */ + bool operator==( const DramKey & i ) const + { return ( (rank == i.rank) && (dram == i.dram) ); } + + /** @brief Overrides the '<' operator. */ + bool operator<( const DramKey & i ) const + { return ( (rank < i.rank) || ((rank == i.rank) && (dram < i.dram)) ); } + }; + + // data type to collect all symbol statistics + typedef std::vector<SymbolKey> CESymbols; + + // data type to collect dimm specific statitics. While + // filling up data for this data type, we should ignore rank select + // and only consider dimm slct. + typedef std::map<HalfRankKey, uint32_t> CePerHalfDsMap; + // data type to collect all symbol statistics for a rank. + typedef std::map<HalfRankKey, uint32_t> CePerHalfRankMap; + // data type to store banned half ranks on which analysis is not required. + typedef std::map<HalfRankKey, bool> BannedAnalysisMap; + // data type to collect all symbol statistics for a dram. + typedef std::map<DramKey, uint32_t> CePerDramMap; + + private: // functions + + /** @brief Calls out all symbols on a dram that has exceeded threshold. + * @param o_callOutsMade TRUE if a callout was made, FALSE othewise. + * @return Non-SUCCESS if an internal function fails, SUCCESS othewise. + */ + int32_t calloutCePerDram( bool & o_callOutsMade ); + + /** @brief Calls out all symbols on a rank that has exceeded threshold. + * @param o_callOutsMade TRUE if a callout was made, FALSE othewise. + * @return Non-SUCCESS if an internal function fails, SUCCESS othewise. + */ + int32_t calloutCePerRank( bool & o_callOutsMade ); + + /** @brief Calls out all symbols on a dimm select that has exceeded + * threshold. + * @param o_callOutsMade TRUE if a callout was made, FALSE othewise. + * @return Non-SUCCESS if an internal function fails, SUCCESS othewise. + */ + int32_t calloutCePerDs( bool & o_callOutsMade ); + + /** + * @brief Add MemoryMru callout to error log and commit it. + * @param i_memmru Memory MRU. + * @param i_errl Error log. + */ + void addMruAndCommitErrl( const MemoryMru & i_memmru, errlHndl_t i_errl ); + + private: // instance variables + + /** The MBA chip. */ + ExtensibleChip * iv_mbaChip; + + /** The rank that this object is expecting to collect for. */ + CenRank iv_rankToAnalyze; + + /** A map to keep track of which half ranks have all ready been called out. + * This helps reduce excessive callouts for the same hardware. */ + BannedAnalysisMap iv_bannedAnalysis; + + /** A vector containing all data for every failing symbol. */ + CESymbols iv_ceSymbols; + + /** A map containing count for every failing dimm select. */ + CePerHalfDsMap iv_dsMap; + + /** A map containing count for every failing rank. */ + CePerHalfRankMap iv_rankMap; + + /** A map containing count for every failing DRAM. */ + CePerDramMap iv_dramMap; +}; + +} //end namespace PRDF +#endif /* PRDF_CEN_MBA_IPL_CE_STATS_H */ diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C index e329f15df..aada91165 100644 --- a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C +++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C @@ -52,24 +52,30 @@ using namespace PlatServices; enum EccErrorMask { - NO_ERROR = 0, ///< No ECC errors found - UE = 0x80, ///< UE - MPE = 0x40, ///< Chip mark placed - RCE = 0x20, ///< Retry CE - MCE = 0x10, ///< CE on chip mark + NO_ERROR = 0, ///< No ECC errors found + UE = 0x01, ///< UE + MPE = 0x02, ///< Chip mark placed + MCE = 0x04, ///< CE on chip mark + HARD_CTE = 0x08, ///< Hard CE threshold exceeed + SOFT_CTE = 0x10, ///< Soft CE threshold exceeed + INTER_CTE = 0x20, ///< Intermittent CE threshold exceeed + RETRY_CTE = 0x40, ///< Retry CE threshold exceeed }; //------------------------------------------------------------------------------ // Class Variables //------------------------------------------------------------------------------ -CenMbaTdCtlr::CMD_COMPLETE_FUNCS CenMbaTdCtlr::cv_cmdCompleteFuncs[] = +CenMbaTdCtlr::FUNCS CenMbaTdCtlr::cv_cmdCompleteFuncs[] = { &CenMbaTdCtlr::analyzeCmdComplete, // NO_OP &CenMbaTdCtlr::analyzeVcmPhase1, // VCM_PHASE_1 &CenMbaTdCtlr::analyzeVcmPhase2, // VCM_PHASE_2 &CenMbaTdCtlr::analyzeDsdPhase1, // DSD_PHASE_1 &CenMbaTdCtlr::analyzeDsdPhase2, // DSD_PHASE_2 + &CenMbaTdCtlr::analyzeTpsPhase1, // TPS_PHASE_1 + &CenMbaTdCtlr::analyzeTpsPhase2, // TPS_PHASE_2 + NULL, // RANK_SCRUB }; //------------------------------------------------------------------------------ @@ -109,6 +115,13 @@ int32_t CenMbaTdCtlr::handleCmdCompleteEvent( STEP_CODE_DATA_STRUCT & io_sc ) break; } + if ( NULL == cv_cmdCompleteFuncs[iv_tdState] ) + { + PRDF_ERR( PRDF_FUNC"Function for state %d not supported", + iv_tdState ); + o_rc = FAIL; break; + } + o_rc = (this->*cv_cmdCompleteFuncs[iv_tdState])( io_sc ); if ( SUCCESS != o_rc ) { @@ -206,8 +219,6 @@ int32_t CenMbaTdCtlr::analyzeCmdComplete( STEP_CODE_DATA_STRUCT & io_sc ) int32_t o_rc = SUCCESS; - TargetHandle_t mba = iv_mbaChip->GetChipHandle(); - do { if ( NO_OP != iv_tdState ) @@ -227,7 +238,7 @@ int32_t CenMbaTdCtlr::analyzeCmdComplete( STEP_CODE_DATA_STRUCT & io_sc ) iv_rank = CenRank( addr.getRank() ); // Get error condition which caused command to stop - uint8_t eccErrorMask = NO_ERROR; + uint16_t eccErrorMask = NO_ERROR; o_rc = checkEccErrors( eccErrorMask ); if ( SUCCESS != o_rc ) { @@ -247,30 +258,28 @@ int32_t CenMbaTdCtlr::analyzeCmdComplete( STEP_CODE_DATA_STRUCT & io_sc ) } else if ( eccErrorMask & MPE ) { - // Get the current marks in hardware. - o_rc = mssGetMarkStore( mba, iv_rank, iv_mark ); + o_rc = handleMPE( io_sc ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC"mssGetMarkStore() failed"); + PRDF_ERR( PRDF_FUNC"handleMPE() failed"); break; } - - if ( !iv_mark.getCM().isValid() ) - { - PRDF_ERR( PRDF_FUNC"No valid chip mark to verify"); - o_rc = FAIL; break; - } - - io_sc.service_data->SetErrorSig( PRDFSIG_StartVcmPhase1 ); - - CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc ); - - // Start VCM procedure - o_rc = startVcmPhase1(); - if ( SUCCESS != o_rc ) + } + else if ( isMfgCeCheckingEnabled() ) + { + // During MNFG IPL CE, we will get this condition. + // During SF read, all CE are reported as Hard CE. + // So we will only check for Hard CE threshold. + if ( eccErrorMask & HARD_CTE ) { - PRDF_ERR( PRDF_FUNC"startVcmPhase1() failed" ); - break; + io_sc.service_data->SetErrorSig( PRDFSIG_StartTpsPhase1 ); + // Start TPS Phase 1 + o_rc = startTpsPhase1(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"startTpsPhase1() failed" ); + break; + } } } else @@ -305,7 +314,7 @@ int32_t CenMbaTdCtlr::analyzeVcmPhase1( STEP_CODE_DATA_STRUCT & io_sc ) } // Get error condition which caused command to stop - uint8_t eccErrorMask = NO_ERROR; + uint16_t eccErrorMask = NO_ERROR; o_rc = checkEccErrors( eccErrorMask ); if ( SUCCESS != o_rc ) { @@ -313,7 +322,7 @@ int32_t CenMbaTdCtlr::analyzeVcmPhase1( STEP_CODE_DATA_STRUCT & io_sc ) break; } - if ( (eccErrorMask & UE) || (eccErrorMask & RCE) ) + if ( (eccErrorMask & UE) || (eccErrorMask & RETRY_CTE) ) { // Handle UE. Highest priority o_rc = handleUE( io_sc ); @@ -364,7 +373,7 @@ int32_t CenMbaTdCtlr::analyzeVcmPhase2( STEP_CODE_DATA_STRUCT & io_sc ) } // Get error condition which caused command to stop - uint8_t eccErrorMask = NO_ERROR; + uint16_t eccErrorMask = NO_ERROR; o_rc = checkEccErrors( eccErrorMask ); if ( SUCCESS != o_rc ) { @@ -448,7 +457,7 @@ int32_t CenMbaTdCtlr::analyzeDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc ) } // Get error condition which caused command to stop - uint8_t eccErrorMask = NO_ERROR; + uint16_t eccErrorMask = NO_ERROR; o_rc = checkEccErrors( eccErrorMask ); if ( SUCCESS != o_rc) { @@ -456,7 +465,7 @@ int32_t CenMbaTdCtlr::analyzeDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc ) break; } - if ( ( eccErrorMask & UE) || ( eccErrorMask & RCE ) ) + if ( ( eccErrorMask & UE) || ( eccErrorMask & RETRY_CTE ) ) { // Handle UE. Highest priority o_rc = handleUE( io_sc ); @@ -473,7 +482,7 @@ int32_t CenMbaTdCtlr::analyzeDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc ) CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc ); // Start DSD Phase 2 - startDsdPhase2(); + o_rc = startDsdPhase2(); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC"startDsdPhase2() failed" ); @@ -507,7 +516,7 @@ int32_t CenMbaTdCtlr::analyzeDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc ) } // Get error condition which caused command to stop - uint8_t eccErrorMask = NO_ERROR; + uint16_t eccErrorMask = NO_ERROR; o_rc = checkEccErrors( eccErrorMask ); if ( SUCCESS != o_rc ) { @@ -567,6 +576,144 @@ int32_t CenMbaTdCtlr::analyzeDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc ) //------------------------------------------------------------------------------ +int32_t CenMbaTdCtlr::analyzeTpsPhase1( STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[CenMbaTdCtlr::analyzeTpsPhase1] " + + int32_t o_rc = SUCCESS; + + do + { + if ( TPS_PHASE_1 != iv_tdState ) + { + PRDF_ERR( PRDF_FUNC"Invalid state machine configuration" ); + o_rc = FAIL; break; + } + + CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip ); + + o_rc = mbadb->getIplCeStats()->collectStats( iv_rank ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"collectStats() failed"); + break; + } + + // Get error condition which caused command to stop + uint16_t eccErrorMask = NO_ERROR; + o_rc = checkEccErrors( eccErrorMask ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"checkEccErrors() failed" ); + break; + } + + if ( ( eccErrorMask & UE ) || ( eccErrorMask & RETRY_CTE )) + { + // Handle UE. Highest priority + o_rc = handleUE( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"handleUE() failed" ); + break; + } + } + else if ( eccErrorMask & MPE ) + { + o_rc = handleMPE( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"handleMPE() failed"); + break; + } + } + else + { + // Start TPS Phase 2 + io_sc.service_data->SetErrorSig( PRDFSIG_StartTpsPhase2 ); + o_rc = startTpsPhase2(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"startTpsPhase2() failed" ); + break; + } + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::analyzeTpsPhase2( STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[CenMbaTdCtlr::analyzeTpsPhase2] " + + int32_t o_rc = SUCCESS; + + do + { + if ( TPS_PHASE_2 != iv_tdState ) + { + PRDF_ERR( PRDF_FUNC"Invalid state machine configuration" ); + o_rc = FAIL; break; + } + + CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip ); + + o_rc = mbadb->getIplCeStats()->calloutHardCes( iv_rank ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"calloutHardCes() failed"); + break; + } + + // Get error condition which caused command to stop + uint16_t eccErrorMask = NO_ERROR; + o_rc = checkEccErrors( eccErrorMask ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"checkEccErrors() failed" ); + break; + } + + if ( ( eccErrorMask & UE ) || ( eccErrorMask & RETRY_CTE )) + { + // Handle UE. Highest priority + o_rc = handleUE( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"handleUE() failed" ); + break; + } + } + else if ( eccErrorMask & MPE ) + { + o_rc = handleMPE( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"handleMPE() failed"); + break; + } + } + else + { + io_sc.service_data->SetErrorSig( PRDFSIG_EndTpsPhase2 ); + iv_tdState = NO_OP; + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + int32_t CenMbaTdCtlr::startVcmPhase1() { #define PRDF_FUNC "[CenMbaTdCtlr::startVcmPhase1] " @@ -587,8 +734,7 @@ int32_t CenMbaTdCtlr::startVcmPhase1() } // Start phase 1. - uint32_t stopCond = ( mss_MaintCmd::STOP_END_OF_RANK | - mss_MaintCmd::STOP_ON_END_ADDRESS | + uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_STEER_CLEANUP, @@ -635,8 +781,7 @@ int32_t CenMbaTdCtlr::startVcmPhase2() } // Start phase 2. - uint32_t stopCond = ( mss_MaintCmd::STOP_END_OF_RANK | - mss_MaintCmd::STOP_ON_END_ADDRESS | + uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::SUPERFAST_READ, @@ -691,8 +836,7 @@ int32_t CenMbaTdCtlr::startDsdPhase1() } // Start phase 1. - uint32_t stopCond = ( mss_MaintCmd::STOP_END_OF_RANK | - mss_MaintCmd::STOP_ON_END_ADDRESS | + uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_STEER_CLEANUP, @@ -739,8 +883,7 @@ int32_t CenMbaTdCtlr::startDsdPhase2() } // Start phase 2. - uint32_t stopCond = ( mss_MaintCmd::STOP_END_OF_RANK | - mss_MaintCmd::STOP_ON_END_ADDRESS | + uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::SUPERFAST_READ, @@ -767,14 +910,119 @@ int32_t CenMbaTdCtlr::startDsdPhase2() //------------------------------------------------------------------------------ -bool CenMbaTdCtlr::isInTdMode() +int32_t CenMbaTdCtlr::startTpsPhase1() { - return ( (NO_OP != iv_tdState) && (MAX_TD_STATE > iv_tdState) ); + #define PRDF_FUNC "[CenMbaTdCtlr::startTpsPhase1] " + + int32_t o_rc = SUCCESS; + + iv_tdState = TPS_PHASE_1; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + o_rc = prepareNextCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"prepareNextCmd() failed" ); + break; + } + + // We are using current state as input parameter in mnfgCeSetup. + // So it is mandatory to set iv_tdState before calling this function. + o_rc = mnfgCeSetup(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"mnfgCeSetup() failed" ); + break; + } + + // Start phase 1. + uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | + mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); + + iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_SCRUB, + mba, iv_rank, stopCond ); + if ( NULL == iv_mssCmd ) + { + PRDF_ERR( PRDF_FUNC"createMssCmd() failed"); + o_rc = FAIL; break; + } + + o_rc = iv_mssCmd->setupAndExecuteCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"setupAndExecuteCmd() failed" ); + break; + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC } //------------------------------------------------------------------------------ -int32_t CenMbaTdCtlr::checkEccErrors( uint8_t & o_eccErrorMask ) +int32_t CenMbaTdCtlr::startTpsPhase2() +{ + #define PRDF_FUNC "[CenMbaTdCtlr::startTpsPhase2] " + + int32_t o_rc = SUCCESS; + + iv_tdState = TPS_PHASE_2; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + o_rc = prepareNextCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"prepareNextCmd() failed" ); + break; + } + + // We are using current state as input parameter in mnfgCeSetup. + // So it is mandatory to set iv_tdState before calling this function. + o_rc = mnfgCeSetup(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"mnfgCeSetup() failed" ); + break; + } + + // Start phase 2. + uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS | + mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); + + iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_SCRUB, + mba, iv_rank, stopCond ); + if ( NULL == iv_mssCmd ) + { + PRDF_ERR( PRDF_FUNC"createMssCmd() failed"); + o_rc = FAIL; break; + } + + o_rc = iv_mssCmd->setupAndExecuteCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"setupAndExecuteCmd() failed" ); + break; + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::checkEccErrors( uint16_t & o_eccErrorMask ) { #define PRDF_FUNC "[CenMbaTdCtlr::checkEccErrors] " @@ -821,7 +1069,20 @@ int32_t CenMbaTdCtlr::checkEccErrors( uint8_t & o_eccErrorMask ) if ( mbsEccFir->IsBitSet(38) ) o_eccErrorMask |= MCE; if ( mbsEccFir->IsBitSet(41) ) o_eccErrorMask |= UE; - if ( mbsEccFir->IsBitSet(42) ) o_eccErrorMask |= RCE; + + SCAN_COMM_REGISTER_CLASS * mbaSpaFir = + iv_mbaChip->getRegister("MBASPA"); + o_rc = mbaSpaFir->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"Failed to read MBASPA Regsiter"); + break; + } + + if ( mbaSpaFir->IsBitSet(1) ) o_eccErrorMask |= HARD_CTE; + if ( mbaSpaFir->IsBitSet(2) ) o_eccErrorMask |= SOFT_CTE; + if ( mbaSpaFir->IsBitSet(3) ) o_eccErrorMask |= INTER_CTE; + if ( mbaSpaFir->IsBitSet(4) ) o_eccErrorMask |= RETRY_CTE; } while(0); @@ -846,6 +1107,7 @@ int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc ) io_sc.service_data->SetServiceCall(); TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip ); do { @@ -892,6 +1154,13 @@ int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc ) } callouts.insert( callouts.end(), dimms.begin(), dimms.end() ); + + if ( isMfgCeCheckingEnabled() ) + { + // As we are doing callout for UE, we dont need to do callout + // during CE for this rank on given port + mbadb->getIplCeStats()->banAnalysis( iv_rank, ps ); + } } if ( SUCCESS != o_rc ) break; @@ -909,6 +1178,13 @@ int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc ) PRDF_ERR( PRDF_FUNC"getConnectedDimms() failed" ); o_rc = FAIL; break; } + + if ( isMfgCeCheckingEnabled() ) + { + // As we are doing callout for UE, we dont need to do callout + // during CE for this rank on both port + mbadb->getIplCeStats()->banAnalysis( iv_rank); + } } // Callout all DIMMs in the list. @@ -927,6 +1203,50 @@ int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc ) //------------------------------------------------------------------------------ +int32_t CenMbaTdCtlr::handleMPE( STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[CenMbaTdCtlr::handleMPE] " + + int32_t o_rc = SUCCESS; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + // Get the current marks in hardware. + o_rc = mssGetMarkStore( mba, iv_rank, iv_mark ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"mssGetMarkStore() failed"); + break; + } + + if ( !iv_mark.getCM().isValid() ) + { + PRDF_ERR( PRDF_FUNC"No valid chip mark to verify"); + o_rc = FAIL; break; + } + + io_sc.service_data->SetErrorSig( PRDFSIG_StartVcmPhase1 ); + + CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc ); + + // Start VCM procedure + o_rc = startVcmPhase1(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"startVcmPhase1() failed" ); + break; + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} +//------------------------------------------------------------------------------ + int32_t CenMbaTdCtlr::handleMCE_VCM2( STEP_CODE_DATA_STRUCT & io_sc ) { #define PRDF_FUNC "[CenMbaTdCtlr::handleMCE_VCM2] " @@ -1285,6 +1605,20 @@ int32_t CenMbaTdCtlr::prepareNextCmd() break; } + SCAN_COMM_REGISTER_CLASS * spaAnd = + iv_mbaChip->getRegister("MBASPA_AND"); + spaAnd->setAllBits(); + + // clear threshold exceeded attentions + spaAnd->SetBitFieldJustified( 1, 4, 0 ); + + o_rc = spaAnd->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"Write() failed on MBASPA_AND" ); + o_rc = FAIL; break; + } + } while (0); return o_rc; @@ -1338,5 +1672,69 @@ int32_t CenMbaTdCtlr::signalMdiaCmdComplete() #undef PRDF_FUNC } +// Do the setup for mnfg IPL CE +int32_t CenMbaTdCtlr::mnfgCeSetup() +{ + #define PRDF_FUNC "[CenMbaTdCtlr::mnfgCeSetup] " + + int32_t o_rc = SUCCESS; + + do + { + CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip ); + ExtensibleChip * membChip = mbadb->getMembChip(); + if ( NULL == membChip ) + { + PRDF_ERR( PRDF_FUNC"getMembChip() failed" ); + o_rc = FAIL; break; + } + + uint32_t mbaPos = getTargetPosition( iv_mbaChip->GetChipHandle() ); + + const char * reg_str = ( 0 == mbaPos ) ? "MBA0_MBSTR" : "MBA1_MBSTR"; + SCAN_COMM_REGISTER_CLASS * mbstr = membChip->getRegister( reg_str ); + o_rc = mbstr->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"Read() failed on %s", reg_str ); + break; + } + + if ( TPS_PHASE_1 == iv_tdState ) + { + // Enable per-symbol error counters to count soft CEs + mbstr->SetBit(55); + mbstr->SetBit(56); + // Disable per-symbol error counters to count hard CEs + mbstr->ClearBit(57); + } + else if ( TPS_PHASE_2 == iv_tdState ) + { + // Disable per-symbol error counters to count soft CEs + mbstr->ClearBit(55); + mbstr->ClearBit(56); + // Enable per-symbol error counters to count hard CEs + mbstr->SetBit(57); + } + else + { + PRDF_ERR( PRDF_FUNC"Inavlid State:%u", iv_tdState ); + o_rc = FAIL; break; + } + + o_rc = mbstr->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"Write() failed on %s", reg_str ); + break; + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + } // end namespace PRDF diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H index 807211e30..fd4ffd015 100644 --- a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H +++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H @@ -40,24 +40,8 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon { private: // constants, enums - /** - * @brief Lists all possible states of TD controller - * @note These enums are used as array indexes to cv_cmdCompleteFuncs and - * the last entry will be used to get the size of the array. - */ - enum TdState - { - NO_OP = 0, ///< No TD procedures in place. - VCM_PHASE_1, ///< Verify Chip Mark phase 1. - VCM_PHASE_2, ///< Verify Chip Mark phase 2. - DSD_PHASE_1, ///< DRAM Spare Deploy phase 1. - DSD_PHASE_2, ///< DRAM Spare Deploy phase 2. - MAX_TD_STATE ///< The maximum number of TD states. - }; - // Function pointers for maintenance command complete events. - typedef int32_t (CenMbaTdCtlr::*CMD_COMPLETE_FUNCS) - ( STEP_CODE_DATA_STRUCT & io_sc ); + typedef int32_t (CenMbaTdCtlr::*FUNCS)( STEP_CODE_DATA_STRUCT & io_sc ); public: // functions @@ -72,7 +56,7 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon * @param i_mbaChip An MBA chip. */ explicit CenMbaTdCtlr( ExtensibleChip * i_mbaChip ) : - CenMbaTdCtlrCommon(i_mbaChip), iv_tdState(NO_OP) + CenMbaTdCtlrCommon(i_mbaChip) {} public: // Overloaded functions @@ -88,13 +72,15 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon int32_t analyzeVcmPhase2( STEP_CODE_DATA_STRUCT & io_sc ); int32_t analyzeDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc ); int32_t analyzeDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc ); + int32_t analyzeTpsPhase1( STEP_CODE_DATA_STRUCT & io_sc ); + int32_t analyzeTpsPhase2( STEP_CODE_DATA_STRUCT & io_sc ); int32_t startVcmPhase1(); int32_t startVcmPhase2(); int32_t startDsdPhase1(); int32_t startDsdPhase2(); - - bool isInTdMode(); + int32_t startTpsPhase1(); + int32_t startTpsPhase2(); private: // functions @@ -104,7 +90,7 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon * occurred. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ - int32_t checkEccErrors( uint8_t & o_eccErrorMask ); + int32_t checkEccErrors( uint16_t & o_eccErrorMask ); /** * @brief Handle UEs during TD analysis. @@ -114,6 +100,14 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon int32_t handleUE( STEP_CODE_DATA_STRUCT & io_sc ); /** + * @brief Handle MPE event + * @param io_sc Service data collector. + * @note This will start VCM phase 1. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + int32_t handleMPE( STEP_CODE_DATA_STRUCT & io_sc ); + + /** * @brief Handle MCE event during VCM Phase 2 * @param io_sc Service data collector. * @note This will update bad bits information in VPD, set callouts, and @@ -155,16 +149,21 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon */ int32_t signalMdiaCmdComplete(); - private: // instance variables + /** + * @brief Does mnfg setup for CE threshold. + * @note Before calling this function, set current state to new + * value (TPS_PHASE_1/ TPS_PHASE_2). + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + int32_t mnfgCeSetup(); - /** The targeted diagnostics state variable (see enum TdState). */ - TdState iv_tdState; + private: // instance variables /** Array of functions pointers for TD controller states. This is used to * determine the next course of action after a maintenance command complete * attention. */ - static CMD_COMPLETE_FUNCS cv_cmdCompleteFuncs[MAX_TD_STATE]; + static FUNCS cv_cmdCompleteFuncs[MAX_TD_STATE]; }; // CenMbaTdCtlr diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaThresholds.H b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaThresholds.H new file mode 100755 index 000000000..45fdd3282 --- /dev/null +++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaThresholds.H @@ -0,0 +1,54 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/plat/pegasus/prdfCenMbaThresholds.H $ */ +/* */ +/* IBM CONFIDENTIAL */ +/* */ +/* COPYRIGHT International Business Machines Corp. 2013 */ +/* */ +/* p1 */ +/* */ +/* Object Code Only (OCO) source materials */ +/* Licensed Internal Code Source Materials */ +/* IBM HostBoot Licensed Internal Code */ +/* */ +/* The source code for this program is not published or otherwise */ +/* divested of its trade secrets, irrespective of what has been */ +/* deposited with the U.S. Copyright Office. */ +/* */ +/* Origin: 30 */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#ifndef __PRDF_CEN_MBA_THRESHOLDS_H +#define __PRDF_CEN_MBA_THRESHOLDS_H + +/** @file prdfCenMbaThresholds.H + * @brief Utility functions used to get specific Centaur thresholds + * during IPL time. + */ + +#include <prdfCenMbaThresholds_common.H> +#include <prdfMfgThresholdMgr.H> +#include <prdfMfgThresholds.H> + +namespace PRDF +{ + +class ExtensibleChip; + +/** + * @brief Returns the manufacturing memory CE thresholds Per 2GB ( base ). + */ +inline uint8_t getMnfgCeTh() +{ + return MfgThresholdMgr::getInstance()-> + getThreshold( PRDF_CEN_MBA_IPL_SOFT_CE_TH_ALGO ); + +} + +} // end namespace PRDF + +#endif /* __PRDF_CEN_MBA_THRESHOLDS_H */ + |