summaryrefslogtreecommitdiffstats
path: root/src/usr/diag/prdf/plat/pegasus
diff options
context:
space:
mode:
Diffstat (limited to 'src/usr/diag/prdf/plat/pegasus')
-rw-r--r--src/usr/diag/prdf/plat/pegasus/prdfCenMbaDataBundle.H23
-rwxr-xr-xsrc/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.C506
-rwxr-xr-xsrc/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.H230
-rw-r--r--src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C492
-rw-r--r--src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H49
-rwxr-xr-xsrc/usr/diag/prdf/plat/pegasus/prdfCenMbaThresholds.H54
6 files changed, 1280 insertions, 74 deletions
diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaDataBundle.H b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaDataBundle.H
index c6dcf5ccc..2c2ba1237 100644
--- a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaDataBundle.H
+++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaDataBundle.H
@@ -31,6 +31,7 @@
#include <prdfCenMbaDataBundle_common.H>
#include <diag/mdia/mdia.H>
+#include <prdfCenMbaIplCeStats.H>
//------------------------------------------------------------------------------
@@ -50,13 +51,27 @@ class CenMbaDataBundle : public CenMbaDataBundleCommon
*/
explicit CenMbaDataBundle( ExtensibleChip * i_mbaChip ) :
CenMbaDataBundleCommon(i_mbaChip), iv_sendCmdCompleteMsg(false),
- iv_cmdCompleteMsgData()
+ iv_cmdCompleteMsgData(), iv_iplCeStats(NULL)
{}
/**
* @brief Destructor.
*/
- ~CenMbaDataBundle() {}
+ ~CenMbaDataBundle()
+ {
+ delete iv_iplCeStats; iv_iplCeStats = NULL;
+ }
+
+ /** @return The IPL CE statistics object. */
+ CenMbaIplCeStats * getIplCeStats()
+ {
+ if ( NULL == iv_iplCeStats )
+ {
+ iv_iplCeStats = new CenMbaIplCeStats( iv_mbaChip );
+ }
+
+ return iv_iplCeStats;
+ }
private: // functions
@@ -74,6 +89,10 @@ class CenMbaDataBundle : public CenMbaDataBundleCommon
*/
MDIA::MaintCommandEventType iv_cmdCompleteMsgData;
+ private: // instance variables
+
+ CenMbaIplCeStats * iv_iplCeStats; ///< MNFG IPL CE statistics object
+
};
//------------------------------------------------------------------------------
diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.C b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.C
new file mode 100755
index 000000000..1b3541865
--- /dev/null
+++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.C
@@ -0,0 +1,506 @@
+/* IBM_PROLOG_BEGIN_TAG */
+/* This is an automatically generated prolog. */
+/* */
+/* $Source: src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.C $ */
+/* */
+/* IBM CONFIDENTIAL */
+/* */
+/* COPYRIGHT International Business Machines Corp. 2013 */
+/* */
+/* p1 */
+/* */
+/* Object Code Only (OCO) source materials */
+/* Licensed Internal Code Source Materials */
+/* IBM HostBoot Licensed Internal Code */
+/* */
+/* The source code for this program is not published or otherwise */
+/* divested of its trade secrets, irrespective of what has been */
+/* deposited with the U.S. Copyright Office. */
+/* */
+/* Origin: 30 */
+/* */
+/* IBM_PROLOG_END_TAG */
+
+/** @file prdfCenMbaIplCeStats.C
+ * @brief Contains IPL CE related code.
+ */
+
+// Framework includes
+#include <iipServiceDataCollector.h>
+#include <prdfEnums.H>
+#include <prdfErrlUtil.H>
+#include <prdfExtensibleChip.H>
+#include <prdfGlobal.H>
+#include <prdfPfa5Data.h>
+#include <prdf_service_codes.H>
+
+// Pegasus includes
+#include <prdfCenMbaExtraSig.H>
+#include <prdfCenMbaIplCeStats.H>
+#include <prdfCenMbaThresholds.H>
+#include <prdfCenMemUtils.H>
+#include <prdfMemoryMru.H>
+#include <prdfPlatCalloutUtil.H>
+
+using namespace TARGETING;
+
+namespace PRDF
+{
+
+using namespace PlatServices;
+using namespace HWAS;
+
+//------------------------------------------------------------------------------
+
+void CenMbaIplCeStats::banAnalysis( const CenRank & i_rank )
+{
+
+ for ( uint8_t i = 0; i < MAX_PORT_PER_MBA; i++ )
+ {
+ HalfRankKey banKey = { i_rank, i };
+ iv_bannedAnalysis[banKey] = true;
+ }
+}
+
+//------------------------------------------------------------------------------
+
+int32_t CenMbaIplCeStats::banAnalysis( const CenRank & i_rank,
+ uint8_t i_portSlct )
+{
+ int32_t o_rc = SUCCESS;
+
+ do
+ {
+ if ( i_portSlct >= MAX_PORT_PER_MBA )
+ {
+ PRDF_ERR("[banAnalysis] i_portSlct (0x%02x) is invalid",
+ i_portSlct );
+ o_rc = FAIL;
+ break;
+ }
+
+ HalfRankKey banKey = { i_rank, i_portSlct };
+ iv_bannedAnalysis[banKey] = true;
+
+ } while (0);
+
+ return o_rc;
+}
+
+//------------------------------------------------------------------------------
+
+int32_t CenMbaIplCeStats::collectStats( const CenRank & i_stopRank )
+{
+ #define PRDF_FUNC "[CenMbaIplCeStats::collectStats] "
+ int32_t o_rc = SUCCESS;
+ do
+ {
+ MemUtils::MaintSymbols symData;
+ o_rc = MemUtils::collectCeStats( iv_mbaChip, symData, i_stopRank);
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"MemUtils::collectCeStats() failed. MBA:0X%08X",
+ getHuid( iv_mbaChip->GetChipHandle() ) );
+ break;
+ }
+
+ // if size of stats collected is zero, it may mean some symbol
+ // has gone beyond maximum value. But this is only valid for DD1
+ // and has a very low probability. So ignoring this case.
+
+ for ( uint32_t i = 0; i < symData.size(); i++ )
+ {
+ uint8_t dimmSlct = i_stopRank.getDimmSlct();
+ uint8_t dram = symData[i].symbol.getDram();
+ uint8_t portSlct = symData[i].symbol.getPortSlct();
+
+ // Check if analysis is banned.
+ HalfRankKey banKey = { i_stopRank, portSlct };
+ if ( iv_bannedAnalysis[banKey] )
+ continue;
+
+ // Update iv_ceSymbols with the new symbol data.
+ SymbolKey symkey = { symData[i].symbol };
+ iv_ceSymbols.push_back (symkey );
+
+ // Increment the soft CEs per DRAM.
+ DramKey dramKey = { i_stopRank, dram, portSlct };
+ iv_dramMap[dramKey]++;
+
+ // Increment the soft CEs per half rank.
+ HalfRankKey rankKey = { i_stopRank, portSlct };
+ iv_rankMap[rankKey]++;
+
+ // In case of dimm Slct , rank select does not matter
+ CenRank dimmRank( dimmSlct, 0);
+ // Increment the soft CEs per half dimm select.
+ HalfRankKey dsKey = { dimmRank, portSlct };
+ iv_dsMap[dsKey]++;
+ }
+
+ } while (0);
+
+ // We have to clear all stats before giving control back to MDIA..
+ // This is done by setting up MBSTRQ[53] bit
+ // We are doing cleanup in TdController code,
+ // So not clearing up stats here.
+ return o_rc;
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
+
+int32_t CenMbaIplCeStats::analyzeStats( bool & o_callOutsMade )
+{
+ #define PRDF_FUNC "CenMbaIplCeStats::analyzeStats "
+ int32_t o_rc = SUCCESS;
+
+ o_callOutsMade = false;
+
+ do
+ {
+ TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle();
+
+ o_rc = calloutCePerDram( o_callOutsMade );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC" calloutCePerDram() failed. MBA:0X%08X",
+ getHuid( mbaTrgt ) );
+ break;
+ }
+
+ o_rc = calloutCePerRank( o_callOutsMade );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"calloutCePerRank() failed. MBA:0X%08X",
+ getHuid( mbaTrgt ) );
+ break;
+ }
+
+ o_rc = calloutCePerDs( o_callOutsMade );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC" calloutCePerDs() failed. MBA:0X%08X",
+ getHuid( mbaTrgt ) );
+ break;
+ }
+
+ } while (0);
+
+ return o_rc;
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
+
+int32_t CenMbaIplCeStats::calloutHardCes( const CenRank & i_stopRank )
+{
+ #define PRDF_FUNC "[CenMbaIplCeStats::calloutHardCes] "
+ TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle();
+ int32_t o_rc = SUCCESS;
+ do
+ {
+ MemUtils::MaintSymbols symData;
+ o_rc = MemUtils::collectCeStats( iv_mbaChip, symData, i_stopRank);
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"MemUtils::collectCeStats() failed. MBA:0X%08X",
+ getHuid( iv_mbaChip->GetChipHandle() ) );
+ break;
+ }
+
+ for ( uint32_t i = 0; i < symData.size(); i++ )
+ {
+ uint8_t portSlct = symData[i].symbol.getPortSlct();
+
+ // Check if analysis is banned.
+ HalfRankKey banKey = { i_stopRank, portSlct };
+
+ bool& isBanned = iv_bannedAnalysis[banKey];
+
+ if ( isBanned )
+ continue;
+
+ // At this point a hard CE was found, callout the symbol.
+ MemoryMru memMru ( mbaTrgt, symData[i].symbol.getRank(),
+ symData[i].symbol );
+
+ // We are creating and committing error log here. It is different
+ // from rest of attention flow. We could have set the callout
+ // values in sdc but it would have created confusion in ffdc if
+ // we also get vcm/ue at same time.
+ errlHndl_t l_errl = NULL;
+
+ PRDF_CREATE_ERRL( l_errl,
+ ERRL_SEV_PREDICTIVE,
+ ERRL_ETYPE_NOT_APPLICABLE,
+ SRCI_ERR_INFO,
+ SRCI_NO_ATTR,
+ PRDF_MNFG_IPL_CE_ANALYSIS,
+ LIC_REFCODE,
+ PRDF_DETECTED_FAIL_HARDWARE,
+ getHuid( mbaTrgt ),
+ 0, PRDFSIG_MnfgIplHardCE, 0);
+ addMruAndCommitErrl( memMru, l_errl);
+
+ // Ban the half rank.
+ isBanned = true;
+ }
+ }while(0);
+ return o_rc;
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
+
+int32_t CenMbaIplCeStats::calloutCePerDram( bool & o_callOutsMade )
+{
+ #define PRDF_FUNC "[CenMbaIplCeStats::calloutCePerDram] "
+ int32_t o_rc = SUCCESS;
+
+ TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle();
+
+ for ( CePerDramMap::iterator dramIter = iv_dramMap.begin();
+ dramIter != iv_dramMap.end(); dramIter++ )
+ {
+ // First, check if this half rank is banned from analysis.
+ HalfRankKey banKey = { dramIter->first.rank,
+ dramIter->first.portSlct };
+
+ // Check if the rank has already been banned. Note that [] will create
+ // the an entry if one does not exist, so used find() instead to check
+ // for existence in the map.
+ if ( iv_bannedAnalysis.end() != iv_bannedAnalysis.find(banKey) )
+ continue;
+
+ // Get the CEs per DRAM threshold.
+ uint16_t dramTh, junk0, junk1;
+ o_rc = getMnfgMemCeTh( iv_mbaChip, dramIter->first.rank, dramTh,
+ junk0, junk1 );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "getMnfgMemCeTh() failed. MBA:0x%08X",
+ getHuid( mbaTrgt ) );
+ break;
+ }
+
+ // Now, check if a threshold has been reached. If not, continue to the
+ // next entry in iv_dsMap.
+ if ( dramIter->second <= dramTh )
+ continue;
+
+ // At this point a threshold has been reached. Callout a single symbol
+ // found in this dram.
+ for ( CESymbols::iterator symIter = iv_ceSymbols.begin();
+ symIter != iv_ceSymbols.end(); symIter++ )
+ {
+ if ( (dramIter->first.rank == symIter->symbol.getRank() ) &&
+ (dramIter->first.dram == symIter->symbol.getDram() ) )
+ {
+ MemoryMru memMru ( mbaTrgt, symIter->symbol.getRank() ,
+ symIter->symbol );
+
+ errlHndl_t l_errl = NULL;
+
+ PRDF_CREATE_ERRL( l_errl,
+ ERRL_SEV_PREDICTIVE,
+ ERRL_ETYPE_NOT_APPLICABLE,
+ SRCI_ERR_INFO,
+ SRCI_NO_ATTR,
+ PRDF_MNFG_IPL_CE_ANALYSIS,
+ LIC_REFCODE,
+ PRDF_DETECTED_FAIL_HARDWARE,
+ getHuid( mbaTrgt ),
+ 0, PRDFSIG_MnfgIplDramCTE, 0);
+
+ addMruAndCommitErrl( memMru, l_errl);
+
+ // Ban the half rank.
+ iv_bannedAnalysis[banKey] = true;
+ o_callOutsMade = true;
+
+ // Only one symbol needs to be called out, so exit on first
+ // occurance.
+ break;
+ }
+ }
+ }
+ return o_rc;
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
+
+int32_t CenMbaIplCeStats::calloutCePerRank( bool & o_callOutsMade )
+{
+ #define PRDF_FUNC "[CenMbaIplCeStats::calloutCePerRank] "
+ int32_t o_rc = SUCCESS;
+
+ TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle();
+
+ for ( CePerHalfRankMap::iterator rankIter = iv_rankMap.begin();
+ rankIter != iv_rankMap.end(); rankIter++ )
+ {
+ // First, check if this half rank is banned from analysis.
+ HalfRankKey banKey = { rankIter->first.rank,
+ rankIter->first.portSlct };
+
+ // Check if the rank has already been banned. Note that [] will create
+ // the an entry if one does not exist, so used find() instead to check
+ // for existence in the map.
+ if ( iv_bannedAnalysis.end() != iv_bannedAnalysis.find(banKey) )
+ continue;
+
+ // Get the CEs per rank threshold.
+ uint16_t junk0, rankTh, junk1;
+ o_rc = getMnfgMemCeTh( iv_mbaChip, rankIter->first.rank, junk0,
+ rankTh, junk1 );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "getMnfgMemCeTh() failed. MBA:0x%08X",
+ getHuid( mbaTrgt ) );
+ break;
+ }
+
+ // Now, check if a threshold has been reached. If not, continue to the
+ // next entry in iv_rankMap.
+ if ( rankIter->second <= rankTh )
+ continue;
+
+ // At this point a threshold has been reached. Callout a single symbol
+ // found in this rank.
+ for ( CESymbols::iterator symIter = iv_ceSymbols.begin();
+ symIter != iv_ceSymbols.end(); symIter++ )
+ {
+ if ( (rankIter->first.rank == symIter->symbol.getRank() ) &&
+ (rankIter->first.portSlct ==
+ symIter->symbol.getPortSlct()) )
+ {
+ MemoryMru memMru ( mbaTrgt, symIter->symbol.getRank() ,
+ symIter->symbol );
+
+ errlHndl_t l_errl = NULL;
+
+ PRDF_CREATE_ERRL( l_errl,
+ ERRL_SEV_PREDICTIVE,
+ ERRL_ETYPE_NOT_APPLICABLE,
+ SRCI_ERR_INFO,
+ SRCI_NO_ATTR,
+ PRDF_MNFG_IPL_CE_ANALYSIS,
+ LIC_REFCODE,
+ PRDF_DETECTED_FAIL_HARDWARE,
+ getHuid( mbaTrgt ),
+ 0, PRDFSIG_MnfgIplRankCTE, 0);
+
+ addMruAndCommitErrl( memMru, l_errl);
+ // Ban the half rank.
+ iv_bannedAnalysis[banKey] = true;
+ o_callOutsMade = true;
+
+ // Only one symbol needs to be called out, so exit on first
+ // occurance.
+ break;
+ }
+ }
+ }
+ return o_rc;
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
+
+int32_t CenMbaIplCeStats::calloutCePerDs( bool & o_callOutsMade )
+{
+ #define PRDF_FUNC "[CenMbaIplCeStats::calloutCePerDs] "
+ int32_t o_rc = SUCCESS;
+
+ TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle();
+
+ for ( CePerHalfDsMap::iterator dsIter = iv_dsMap.begin();
+ dsIter != iv_dsMap.end(); dsIter++ )
+ {
+ // First, check if this half dimm select is banned from analysis.
+ HalfRankKey banKey = { dsIter->first.rank,
+ dsIter->first.portSlct };
+
+ // Check if the rank has already been banned. Note that [] will create
+ // the an entry if one does not exist, so used find() instead to check
+ // for existence in the map.
+ if ( iv_bannedAnalysis.end() != iv_bannedAnalysis.find(banKey) )
+ continue;
+
+ // Get the CEs per dimm select threshold.
+ uint16_t junk0, junk1, dsTh;
+ o_rc = getMnfgMemCeTh( iv_mbaChip, dsIter->first.rank, junk0,
+ junk1, dsTh );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "getMnfgMemCeTh() failed. MBA:0x%08X",
+ getHuid( mbaTrgt ) );
+ break;
+ }
+
+ // Now, check if a threshold has been reached. If not, continue to the
+ // next entry in iv_dsMap.
+ if ( dsIter->second <= dsTh )
+ continue;
+
+ // At this point a threshold has been reached. Callout a single symbol
+ // found in this dimm select.
+ for ( CESymbols::iterator symIter = iv_ceSymbols.begin();
+ symIter != iv_ceSymbols.end(); symIter++ )
+ {
+ if ( (dsIter->first.rank.getDimmSlct() ==
+ symIter->symbol.getRank().getDimmSlct() )
+ && (dsIter->first.portSlct ==
+ symIter->symbol.getPortSlct()) )
+ {
+ MemoryMru memMru ( mbaTrgt, symIter->symbol.getRank() ,
+ symIter->symbol );
+
+ errlHndl_t l_errl = NULL;
+ PRDF_CREATE_ERRL( l_errl,
+ ERRL_SEV_PREDICTIVE,
+ ERRL_ETYPE_NOT_APPLICABLE,
+ SRCI_ERR_INFO,
+ SRCI_NO_ATTR,
+ PRDF_MNFG_IPL_CE_ANALYSIS,
+ LIC_REFCODE,
+ PRDF_DETECTED_FAIL_HARDWARE,
+ getHuid(mbaTrgt),
+ 0, PRDFSIG_MnfgIplDsCTE, 0);
+
+ addMruAndCommitErrl( memMru, l_errl);
+ // Ban the half dimm select.
+ iv_bannedAnalysis[banKey] = true;
+ o_callOutsMade = true;
+
+ // Only one symbol needs to be called out, so exit on first
+ // occurance.
+ break;
+ }
+ }
+ }
+ return o_rc;
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
+
+void CenMbaIplCeStats::addMruAndCommitErrl( const MemoryMru & i_memmru,
+ errlHndl_t i_errl )
+{
+ // Add MemoryMru callouts and FFDC
+ CalloutUtil::calloutMemoryMru( i_errl, i_memmru,
+ SRCI_PRIORITY_HIGH,
+ HWAS::DELAYED_DECONFIG,
+ HWAS::GARD_Predictive );
+
+ // Add traces
+ i_errl->collectTrace( PRDF_COMP_NAME, 512 );
+
+ // Commit the error log
+ ERRORLOG::errlCommit( i_errl, PRDF_COMP_ID );
+}
+
+} // end namespace PRDF
diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.H b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.H
new file mode 100755
index 000000000..af358dd6b
--- /dev/null
+++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.H
@@ -0,0 +1,230 @@
+/* IBM_PROLOG_BEGIN_TAG */
+/* This is an automatically generated prolog. */
+/* */
+/* $Source: src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.H $ */
+/* */
+/* IBM CONFIDENTIAL */
+/* */
+/* COPYRIGHT International Business Machines Corp. 2013 */
+/* */
+/* p1 */
+/* */
+/* Object Code Only (OCO) source materials */
+/* Licensed Internal Code Source Materials */
+/* IBM HostBoot Licensed Internal Code */
+/* */
+/* The source code for this program is not published or otherwise */
+/* divested of its trade secrets, irrespective of what has been */
+/* deposited with the U.S. Copyright Office. */
+/* */
+/* Origin: 30 */
+/* */
+/* IBM_PROLOG_END_TAG */
+
+#ifndef PRDF_CEN_MBA_IPL_CE_STATS_H
+#define PRDF_CEN_MBA_IPL_CE_STATS_H
+
+/** @file prdfCenMbaIplCeStats.H */
+
+//------------------------------------------------------------------------------
+#include <prdfExtensibleChip.H>
+#include <prdfCenAddress.H>
+#include <prdfCenSymbol.H>
+#include <prdfCenAddress.H>
+#include <map>
+
+namespace PRDF
+{
+class ExtensibleChip;
+class MemoryMru;
+//------------------------------------------------------------------------------
+
+/**
+ * This class is used for storing the CE statistics that are gathered during a
+ * manufacturing mode IPL for MDIA analysis. Only one instance of this object is
+ * meant to be used for each MBA and is stored in its data bundle.
+ * It is expected that when the IPL memory diagnostics is complete, MDIA will
+ * call the appropriate function to tell this object to analyze all statistics
+ * that were collected during the IPL.
+ */
+class CenMbaIplCeStats
+{
+ public:
+
+ /**
+ * @brief Constructor
+ * @param i_mbaChip The MBA chip.
+ */
+ explicit CenMbaIplCeStats( ExtensibleChip * i_mbaChip )
+ :iv_mbaChip(i_mbaChip) {}
+
+ /**
+ * @brief Destructor
+ */
+ ~CenMbaIplCeStats() {}
+
+ /**
+ * @brief Bans analysis of the given rank.
+ * @param i_rank The rank to ban.
+ */
+ void banAnalysis( const CenRank & i_rank );
+
+ /**
+ * @brief Bans analysis of the given half rank.
+ * @param i_rank The rank.
+ * @param i_portSlct The port select.
+ * @return Non-SUCCESS if the parameters are invalid, SUCCESS otherwise.
+ */
+ int32_t banAnalysis( const CenRank & i_rank, uint8_t i_portSlct );
+
+ /**
+ * @brief Will collect all the maintenance statistics and store them for
+ * analysis. Will also clear the scrub statistics counters when
+ * collection is complete.
+ * @param i_rank The rank the maintenance command stopped on.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
+ */
+ int32_t collectStats( const CenRank & i_rank );
+
+ /**
+ * @brief Analyzes the maintenance statistics that were gathered, making
+ * the appropriate callouts.
+ * @param o_callOutsDone TRUE if PRD made a hardware callout, FALSE
+ * otherwise.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
+ */
+ int32_t analyzeStats ( bool & o_callOutsDone );
+
+ /** @brief Calls out all symbols with hard CEs.
+ * @param i_stopRank The rank the maintenance command stopped on.
+ */
+ int32_t calloutHardCes( const CenRank & i_stopRank );
+
+ private: // enums, structs, typedefs
+
+ /** @brief The key type for each entry in iv_symMap. */
+ struct SymbolKey
+ {
+ CenSymbol symbol; ///< The failing symbol
+
+ /** @brief Overrides the '==' operator. */
+ bool operator==( const SymbolKey & i ) const
+ { return ( symbol == i.symbol ); }
+
+ /** @brief Overrides the '<' operator. */
+ bool operator<( const SymbolKey & i ) const
+ { return (symbol < i.symbol ); }
+ };
+
+
+ /** @brief The key to identify half rank.
+ * This key will be used to ban analysis, rank and dimm
+ * threshold analysis
+ */
+ struct HalfRankKey
+ {
+ CenRank rank; ///< The rank
+ uint8_t portSlct; ///< The port select
+
+ /** @brief Overrides the '==' operator. */
+ bool operator==( const HalfRankKey & i ) const
+ { return ( (rank == i.rank) && (portSlct == i.portSlct) ); }
+
+ /** @brief Overrides the '<' operator. */
+ bool operator<( const HalfRankKey & i ) const
+ {
+ return ( (rank < i.rank) ||
+ ((rank == i.rank) && (portSlct < i.portSlct)) );
+ }
+ };
+
+ /** @brief The key type for each entry in iv_dramMap. */
+ struct DramKey
+ {
+ CenRank rank; ///< The rank
+ uint8_t dram; ///< The DRAM (x8:0-17 x4:0-35)
+ uint8_t portSlct; ///< The port select (0-1)
+
+ // Techinally, the port select can be derived from the DRAM value,
+ // however, it simplifies things to just store the port select here.
+ // Therefore, the port select does not need to be used in operator==()
+ // or operators<().
+
+ /** @brief Overrides the '==' operator. */
+ bool operator==( const DramKey & i ) const
+ { return ( (rank == i.rank) && (dram == i.dram) ); }
+
+ /** @brief Overrides the '<' operator. */
+ bool operator<( const DramKey & i ) const
+ { return ( (rank < i.rank) || ((rank == i.rank) && (dram < i.dram)) ); }
+ };
+
+ // data type to collect all symbol statistics
+ typedef std::vector<SymbolKey> CESymbols;
+
+ // data type to collect dimm specific statitics. While
+ // filling up data for this data type, we should ignore rank select
+ // and only consider dimm slct.
+ typedef std::map<HalfRankKey, uint32_t> CePerHalfDsMap;
+ // data type to collect all symbol statistics for a rank.
+ typedef std::map<HalfRankKey, uint32_t> CePerHalfRankMap;
+ // data type to store banned half ranks on which analysis is not required.
+ typedef std::map<HalfRankKey, bool> BannedAnalysisMap;
+ // data type to collect all symbol statistics for a dram.
+ typedef std::map<DramKey, uint32_t> CePerDramMap;
+
+ private: // functions
+
+ /** @brief Calls out all symbols on a dram that has exceeded threshold.
+ * @param o_callOutsMade TRUE if a callout was made, FALSE othewise.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS othewise.
+ */
+ int32_t calloutCePerDram( bool & o_callOutsMade );
+
+ /** @brief Calls out all symbols on a rank that has exceeded threshold.
+ * @param o_callOutsMade TRUE if a callout was made, FALSE othewise.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS othewise.
+ */
+ int32_t calloutCePerRank( bool & o_callOutsMade );
+
+ /** @brief Calls out all symbols on a dimm select that has exceeded
+ * threshold.
+ * @param o_callOutsMade TRUE if a callout was made, FALSE othewise.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS othewise.
+ */
+ int32_t calloutCePerDs( bool & o_callOutsMade );
+
+ /**
+ * @brief Add MemoryMru callout to error log and commit it.
+ * @param i_memmru Memory MRU.
+ * @param i_errl Error log.
+ */
+ void addMruAndCommitErrl( const MemoryMru & i_memmru, errlHndl_t i_errl );
+
+ private: // instance variables
+
+ /** The MBA chip. */
+ ExtensibleChip * iv_mbaChip;
+
+ /** The rank that this object is expecting to collect for. */
+ CenRank iv_rankToAnalyze;
+
+ /** A map to keep track of which half ranks have all ready been called out.
+ * This helps reduce excessive callouts for the same hardware. */
+ BannedAnalysisMap iv_bannedAnalysis;
+
+ /** A vector containing all data for every failing symbol. */
+ CESymbols iv_ceSymbols;
+
+ /** A map containing count for every failing dimm select. */
+ CePerHalfDsMap iv_dsMap;
+
+ /** A map containing count for every failing rank. */
+ CePerHalfRankMap iv_rankMap;
+
+ /** A map containing count for every failing DRAM. */
+ CePerDramMap iv_dramMap;
+};
+
+} //end namespace PRDF
+#endif /* PRDF_CEN_MBA_IPL_CE_STATS_H */
diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C
index e329f15df..aada91165 100644
--- a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C
+++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C
@@ -52,24 +52,30 @@ using namespace PlatServices;
enum EccErrorMask
{
- NO_ERROR = 0, ///< No ECC errors found
- UE = 0x80, ///< UE
- MPE = 0x40, ///< Chip mark placed
- RCE = 0x20, ///< Retry CE
- MCE = 0x10, ///< CE on chip mark
+ NO_ERROR = 0, ///< No ECC errors found
+ UE = 0x01, ///< UE
+ MPE = 0x02, ///< Chip mark placed
+ MCE = 0x04, ///< CE on chip mark
+ HARD_CTE = 0x08, ///< Hard CE threshold exceeed
+ SOFT_CTE = 0x10, ///< Soft CE threshold exceeed
+ INTER_CTE = 0x20, ///< Intermittent CE threshold exceeed
+ RETRY_CTE = 0x40, ///< Retry CE threshold exceeed
};
//------------------------------------------------------------------------------
// Class Variables
//------------------------------------------------------------------------------
-CenMbaTdCtlr::CMD_COMPLETE_FUNCS CenMbaTdCtlr::cv_cmdCompleteFuncs[] =
+CenMbaTdCtlr::FUNCS CenMbaTdCtlr::cv_cmdCompleteFuncs[] =
{
&CenMbaTdCtlr::analyzeCmdComplete, // NO_OP
&CenMbaTdCtlr::analyzeVcmPhase1, // VCM_PHASE_1
&CenMbaTdCtlr::analyzeVcmPhase2, // VCM_PHASE_2
&CenMbaTdCtlr::analyzeDsdPhase1, // DSD_PHASE_1
&CenMbaTdCtlr::analyzeDsdPhase2, // DSD_PHASE_2
+ &CenMbaTdCtlr::analyzeTpsPhase1, // TPS_PHASE_1
+ &CenMbaTdCtlr::analyzeTpsPhase2, // TPS_PHASE_2
+ NULL, // RANK_SCRUB
};
//------------------------------------------------------------------------------
@@ -109,6 +115,13 @@ int32_t CenMbaTdCtlr::handleCmdCompleteEvent( STEP_CODE_DATA_STRUCT & io_sc )
break;
}
+ if ( NULL == cv_cmdCompleteFuncs[iv_tdState] )
+ {
+ PRDF_ERR( PRDF_FUNC"Function for state %d not supported",
+ iv_tdState );
+ o_rc = FAIL; break;
+ }
+
o_rc = (this->*cv_cmdCompleteFuncs[iv_tdState])( io_sc );
if ( SUCCESS != o_rc )
{
@@ -206,8 +219,6 @@ int32_t CenMbaTdCtlr::analyzeCmdComplete( STEP_CODE_DATA_STRUCT & io_sc )
int32_t o_rc = SUCCESS;
- TargetHandle_t mba = iv_mbaChip->GetChipHandle();
-
do
{
if ( NO_OP != iv_tdState )
@@ -227,7 +238,7 @@ int32_t CenMbaTdCtlr::analyzeCmdComplete( STEP_CODE_DATA_STRUCT & io_sc )
iv_rank = CenRank( addr.getRank() );
// Get error condition which caused command to stop
- uint8_t eccErrorMask = NO_ERROR;
+ uint16_t eccErrorMask = NO_ERROR;
o_rc = checkEccErrors( eccErrorMask );
if ( SUCCESS != o_rc )
{
@@ -247,30 +258,28 @@ int32_t CenMbaTdCtlr::analyzeCmdComplete( STEP_CODE_DATA_STRUCT & io_sc )
}
else if ( eccErrorMask & MPE )
{
- // Get the current marks in hardware.
- o_rc = mssGetMarkStore( mba, iv_rank, iv_mark );
+ o_rc = handleMPE( io_sc );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC"mssGetMarkStore() failed");
+ PRDF_ERR( PRDF_FUNC"handleMPE() failed");
break;
}
-
- if ( !iv_mark.getCM().isValid() )
- {
- PRDF_ERR( PRDF_FUNC"No valid chip mark to verify");
- o_rc = FAIL; break;
- }
-
- io_sc.service_data->SetErrorSig( PRDFSIG_StartVcmPhase1 );
-
- CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc );
-
- // Start VCM procedure
- o_rc = startVcmPhase1();
- if ( SUCCESS != o_rc )
+ }
+ else if ( isMfgCeCheckingEnabled() )
+ {
+ // During MNFG IPL CE, we will get this condition.
+ // During SF read, all CE are reported as Hard CE.
+ // So we will only check for Hard CE threshold.
+ if ( eccErrorMask & HARD_CTE )
{
- PRDF_ERR( PRDF_FUNC"startVcmPhase1() failed" );
- break;
+ io_sc.service_data->SetErrorSig( PRDFSIG_StartTpsPhase1 );
+ // Start TPS Phase 1
+ o_rc = startTpsPhase1();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"startTpsPhase1() failed" );
+ break;
+ }
}
}
else
@@ -305,7 +314,7 @@ int32_t CenMbaTdCtlr::analyzeVcmPhase1( STEP_CODE_DATA_STRUCT & io_sc )
}
// Get error condition which caused command to stop
- uint8_t eccErrorMask = NO_ERROR;
+ uint16_t eccErrorMask = NO_ERROR;
o_rc = checkEccErrors( eccErrorMask );
if ( SUCCESS != o_rc )
{
@@ -313,7 +322,7 @@ int32_t CenMbaTdCtlr::analyzeVcmPhase1( STEP_CODE_DATA_STRUCT & io_sc )
break;
}
- if ( (eccErrorMask & UE) || (eccErrorMask & RCE) )
+ if ( (eccErrorMask & UE) || (eccErrorMask & RETRY_CTE) )
{
// Handle UE. Highest priority
o_rc = handleUE( io_sc );
@@ -364,7 +373,7 @@ int32_t CenMbaTdCtlr::analyzeVcmPhase2( STEP_CODE_DATA_STRUCT & io_sc )
}
// Get error condition which caused command to stop
- uint8_t eccErrorMask = NO_ERROR;
+ uint16_t eccErrorMask = NO_ERROR;
o_rc = checkEccErrors( eccErrorMask );
if ( SUCCESS != o_rc )
{
@@ -448,7 +457,7 @@ int32_t CenMbaTdCtlr::analyzeDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc )
}
// Get error condition which caused command to stop
- uint8_t eccErrorMask = NO_ERROR;
+ uint16_t eccErrorMask = NO_ERROR;
o_rc = checkEccErrors( eccErrorMask );
if ( SUCCESS != o_rc)
{
@@ -456,7 +465,7 @@ int32_t CenMbaTdCtlr::analyzeDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc )
break;
}
- if ( ( eccErrorMask & UE) || ( eccErrorMask & RCE ) )
+ if ( ( eccErrorMask & UE) || ( eccErrorMask & RETRY_CTE ) )
{
// Handle UE. Highest priority
o_rc = handleUE( io_sc );
@@ -473,7 +482,7 @@ int32_t CenMbaTdCtlr::analyzeDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc )
CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc );
// Start DSD Phase 2
- startDsdPhase2();
+ o_rc = startDsdPhase2();
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC"startDsdPhase2() failed" );
@@ -507,7 +516,7 @@ int32_t CenMbaTdCtlr::analyzeDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc )
}
// Get error condition which caused command to stop
- uint8_t eccErrorMask = NO_ERROR;
+ uint16_t eccErrorMask = NO_ERROR;
o_rc = checkEccErrors( eccErrorMask );
if ( SUCCESS != o_rc )
{
@@ -567,6 +576,144 @@ int32_t CenMbaTdCtlr::analyzeDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc )
//------------------------------------------------------------------------------
+int32_t CenMbaTdCtlr::analyzeTpsPhase1( STEP_CODE_DATA_STRUCT & io_sc )
+{
+ #define PRDF_FUNC "[CenMbaTdCtlr::analyzeTpsPhase1] "
+
+ int32_t o_rc = SUCCESS;
+
+ do
+ {
+ if ( TPS_PHASE_1 != iv_tdState )
+ {
+ PRDF_ERR( PRDF_FUNC"Invalid state machine configuration" );
+ o_rc = FAIL; break;
+ }
+
+ CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip );
+
+ o_rc = mbadb->getIplCeStats()->collectStats( iv_rank );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"collectStats() failed");
+ break;
+ }
+
+ // Get error condition which caused command to stop
+ uint16_t eccErrorMask = NO_ERROR;
+ o_rc = checkEccErrors( eccErrorMask );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"checkEccErrors() failed" );
+ break;
+ }
+
+ if ( ( eccErrorMask & UE ) || ( eccErrorMask & RETRY_CTE ))
+ {
+ // Handle UE. Highest priority
+ o_rc = handleUE( io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"handleUE() failed" );
+ break;
+ }
+ }
+ else if ( eccErrorMask & MPE )
+ {
+ o_rc = handleMPE( io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"handleMPE() failed");
+ break;
+ }
+ }
+ else
+ {
+ // Start TPS Phase 2
+ io_sc.service_data->SetErrorSig( PRDFSIG_StartTpsPhase2 );
+ o_rc = startTpsPhase2();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"startTpsPhase2() failed" );
+ break;
+ }
+ }
+
+ } while (0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
+
+int32_t CenMbaTdCtlr::analyzeTpsPhase2( STEP_CODE_DATA_STRUCT & io_sc )
+{
+ #define PRDF_FUNC "[CenMbaTdCtlr::analyzeTpsPhase2] "
+
+ int32_t o_rc = SUCCESS;
+
+ do
+ {
+ if ( TPS_PHASE_2 != iv_tdState )
+ {
+ PRDF_ERR( PRDF_FUNC"Invalid state machine configuration" );
+ o_rc = FAIL; break;
+ }
+
+ CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip );
+
+ o_rc = mbadb->getIplCeStats()->calloutHardCes( iv_rank );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"calloutHardCes() failed");
+ break;
+ }
+
+ // Get error condition which caused command to stop
+ uint16_t eccErrorMask = NO_ERROR;
+ o_rc = checkEccErrors( eccErrorMask );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"checkEccErrors() failed" );
+ break;
+ }
+
+ if ( ( eccErrorMask & UE ) || ( eccErrorMask & RETRY_CTE ))
+ {
+ // Handle UE. Highest priority
+ o_rc = handleUE( io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"handleUE() failed" );
+ break;
+ }
+ }
+ else if ( eccErrorMask & MPE )
+ {
+ o_rc = handleMPE( io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"handleMPE() failed");
+ break;
+ }
+ }
+ else
+ {
+ io_sc.service_data->SetErrorSig( PRDFSIG_EndTpsPhase2 );
+ iv_tdState = NO_OP;
+ }
+
+ } while (0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
+
int32_t CenMbaTdCtlr::startVcmPhase1()
{
#define PRDF_FUNC "[CenMbaTdCtlr::startVcmPhase1] "
@@ -587,8 +734,7 @@ int32_t CenMbaTdCtlr::startVcmPhase1()
}
// Start phase 1.
- uint32_t stopCond = ( mss_MaintCmd::STOP_END_OF_RANK |
- mss_MaintCmd::STOP_ON_END_ADDRESS |
+ uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS |
mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION );
iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_STEER_CLEANUP,
@@ -635,8 +781,7 @@ int32_t CenMbaTdCtlr::startVcmPhase2()
}
// Start phase 2.
- uint32_t stopCond = ( mss_MaintCmd::STOP_END_OF_RANK |
- mss_MaintCmd::STOP_ON_END_ADDRESS |
+ uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS |
mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION );
iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::SUPERFAST_READ,
@@ -691,8 +836,7 @@ int32_t CenMbaTdCtlr::startDsdPhase1()
}
// Start phase 1.
- uint32_t stopCond = ( mss_MaintCmd::STOP_END_OF_RANK |
- mss_MaintCmd::STOP_ON_END_ADDRESS |
+ uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS |
mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION );
iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_STEER_CLEANUP,
@@ -739,8 +883,7 @@ int32_t CenMbaTdCtlr::startDsdPhase2()
}
// Start phase 2.
- uint32_t stopCond = ( mss_MaintCmd::STOP_END_OF_RANK |
- mss_MaintCmd::STOP_ON_END_ADDRESS |
+ uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS |
mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION );
iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::SUPERFAST_READ,
@@ -767,14 +910,119 @@ int32_t CenMbaTdCtlr::startDsdPhase2()
//------------------------------------------------------------------------------
-bool CenMbaTdCtlr::isInTdMode()
+int32_t CenMbaTdCtlr::startTpsPhase1()
{
- return ( (NO_OP != iv_tdState) && (MAX_TD_STATE > iv_tdState) );
+ #define PRDF_FUNC "[CenMbaTdCtlr::startTpsPhase1] "
+
+ int32_t o_rc = SUCCESS;
+
+ iv_tdState = TPS_PHASE_1;
+
+ TargetHandle_t mba = iv_mbaChip->GetChipHandle();
+
+ do
+ {
+ o_rc = prepareNextCmd();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"prepareNextCmd() failed" );
+ break;
+ }
+
+ // We are using current state as input parameter in mnfgCeSetup.
+ // So it is mandatory to set iv_tdState before calling this function.
+ o_rc = mnfgCeSetup();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"mnfgCeSetup() failed" );
+ break;
+ }
+
+ // Start phase 1.
+ uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS |
+ mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION );
+
+ iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_SCRUB,
+ mba, iv_rank, stopCond );
+ if ( NULL == iv_mssCmd )
+ {
+ PRDF_ERR( PRDF_FUNC"createMssCmd() failed");
+ o_rc = FAIL; break;
+ }
+
+ o_rc = iv_mssCmd->setupAndExecuteCmd();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"setupAndExecuteCmd() failed" );
+ break;
+ }
+
+ } while(0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
}
//------------------------------------------------------------------------------
-int32_t CenMbaTdCtlr::checkEccErrors( uint8_t & o_eccErrorMask )
+int32_t CenMbaTdCtlr::startTpsPhase2()
+{
+ #define PRDF_FUNC "[CenMbaTdCtlr::startTpsPhase2] "
+
+ int32_t o_rc = SUCCESS;
+
+ iv_tdState = TPS_PHASE_2;
+
+ TargetHandle_t mba = iv_mbaChip->GetChipHandle();
+
+ do
+ {
+ o_rc = prepareNextCmd();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"prepareNextCmd() failed" );
+ break;
+ }
+
+ // We are using current state as input parameter in mnfgCeSetup.
+ // So it is mandatory to set iv_tdState before calling this function.
+ o_rc = mnfgCeSetup();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"mnfgCeSetup() failed" );
+ break;
+ }
+
+ // Start phase 2.
+ uint32_t stopCond = ( mss_MaintCmd::STOP_ON_END_ADDRESS |
+ mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION );
+
+ iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_SCRUB,
+ mba, iv_rank, stopCond );
+ if ( NULL == iv_mssCmd )
+ {
+ PRDF_ERR( PRDF_FUNC"createMssCmd() failed");
+ o_rc = FAIL; break;
+ }
+
+ o_rc = iv_mssCmd->setupAndExecuteCmd();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"setupAndExecuteCmd() failed" );
+ break;
+ }
+
+ } while(0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
+
+int32_t CenMbaTdCtlr::checkEccErrors( uint16_t & o_eccErrorMask )
{
#define PRDF_FUNC "[CenMbaTdCtlr::checkEccErrors] "
@@ -821,7 +1069,20 @@ int32_t CenMbaTdCtlr::checkEccErrors( uint8_t & o_eccErrorMask )
if ( mbsEccFir->IsBitSet(38) ) o_eccErrorMask |= MCE;
if ( mbsEccFir->IsBitSet(41) ) o_eccErrorMask |= UE;
- if ( mbsEccFir->IsBitSet(42) ) o_eccErrorMask |= RCE;
+
+ SCAN_COMM_REGISTER_CLASS * mbaSpaFir =
+ iv_mbaChip->getRegister("MBASPA");
+ o_rc = mbaSpaFir->Read();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"Failed to read MBASPA Regsiter");
+ break;
+ }
+
+ if ( mbaSpaFir->IsBitSet(1) ) o_eccErrorMask |= HARD_CTE;
+ if ( mbaSpaFir->IsBitSet(2) ) o_eccErrorMask |= SOFT_CTE;
+ if ( mbaSpaFir->IsBitSet(3) ) o_eccErrorMask |= INTER_CTE;
+ if ( mbaSpaFir->IsBitSet(4) ) o_eccErrorMask |= RETRY_CTE;
} while(0);
@@ -846,6 +1107,7 @@ int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc )
io_sc.service_data->SetServiceCall();
TargetHandle_t mba = iv_mbaChip->GetChipHandle();
+ CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip );
do
{
@@ -892,6 +1154,13 @@ int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc )
}
callouts.insert( callouts.end(), dimms.begin(), dimms.end() );
+
+ if ( isMfgCeCheckingEnabled() )
+ {
+ // As we are doing callout for UE, we dont need to do callout
+ // during CE for this rank on given port
+ mbadb->getIplCeStats()->banAnalysis( iv_rank, ps );
+ }
}
if ( SUCCESS != o_rc ) break;
@@ -909,6 +1178,13 @@ int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc )
PRDF_ERR( PRDF_FUNC"getConnectedDimms() failed" );
o_rc = FAIL; break;
}
+
+ if ( isMfgCeCheckingEnabled() )
+ {
+ // As we are doing callout for UE, we dont need to do callout
+ // during CE for this rank on both port
+ mbadb->getIplCeStats()->banAnalysis( iv_rank);
+ }
}
// Callout all DIMMs in the list.
@@ -927,6 +1203,50 @@ int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc )
//------------------------------------------------------------------------------
+int32_t CenMbaTdCtlr::handleMPE( STEP_CODE_DATA_STRUCT & io_sc )
+{
+ #define PRDF_FUNC "[CenMbaTdCtlr::handleMPE] "
+
+ int32_t o_rc = SUCCESS;
+
+ TargetHandle_t mba = iv_mbaChip->GetChipHandle();
+
+ do
+ {
+ // Get the current marks in hardware.
+ o_rc = mssGetMarkStore( mba, iv_rank, iv_mark );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"mssGetMarkStore() failed");
+ break;
+ }
+
+ if ( !iv_mark.getCM().isValid() )
+ {
+ PRDF_ERR( PRDF_FUNC"No valid chip mark to verify");
+ o_rc = FAIL; break;
+ }
+
+ io_sc.service_data->SetErrorSig( PRDFSIG_StartVcmPhase1 );
+
+ CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc );
+
+ // Start VCM procedure
+ o_rc = startVcmPhase1();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"startVcmPhase1() failed" );
+ break;
+ }
+
+ } while(0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+//------------------------------------------------------------------------------
+
int32_t CenMbaTdCtlr::handleMCE_VCM2( STEP_CODE_DATA_STRUCT & io_sc )
{
#define PRDF_FUNC "[CenMbaTdCtlr::handleMCE_VCM2] "
@@ -1285,6 +1605,20 @@ int32_t CenMbaTdCtlr::prepareNextCmd()
break;
}
+ SCAN_COMM_REGISTER_CLASS * spaAnd =
+ iv_mbaChip->getRegister("MBASPA_AND");
+ spaAnd->setAllBits();
+
+ // clear threshold exceeded attentions
+ spaAnd->SetBitFieldJustified( 1, 4, 0 );
+
+ o_rc = spaAnd->Write();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"Write() failed on MBASPA_AND" );
+ o_rc = FAIL; break;
+ }
+
} while (0);
return o_rc;
@@ -1338,5 +1672,69 @@ int32_t CenMbaTdCtlr::signalMdiaCmdComplete()
#undef PRDF_FUNC
}
+// Do the setup for mnfg IPL CE
+int32_t CenMbaTdCtlr::mnfgCeSetup()
+{
+ #define PRDF_FUNC "[CenMbaTdCtlr::mnfgCeSetup] "
+
+ int32_t o_rc = SUCCESS;
+
+ do
+ {
+ CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip );
+ ExtensibleChip * membChip = mbadb->getMembChip();
+ if ( NULL == membChip )
+ {
+ PRDF_ERR( PRDF_FUNC"getMembChip() failed" );
+ o_rc = FAIL; break;
+ }
+
+ uint32_t mbaPos = getTargetPosition( iv_mbaChip->GetChipHandle() );
+
+ const char * reg_str = ( 0 == mbaPos ) ? "MBA0_MBSTR" : "MBA1_MBSTR";
+ SCAN_COMM_REGISTER_CLASS * mbstr = membChip->getRegister( reg_str );
+ o_rc = mbstr->Read();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"Read() failed on %s", reg_str );
+ break;
+ }
+
+ if ( TPS_PHASE_1 == iv_tdState )
+ {
+ // Enable per-symbol error counters to count soft CEs
+ mbstr->SetBit(55);
+ mbstr->SetBit(56);
+ // Disable per-symbol error counters to count hard CEs
+ mbstr->ClearBit(57);
+ }
+ else if ( TPS_PHASE_2 == iv_tdState )
+ {
+ // Disable per-symbol error counters to count soft CEs
+ mbstr->ClearBit(55);
+ mbstr->ClearBit(56);
+ // Enable per-symbol error counters to count hard CEs
+ mbstr->SetBit(57);
+ }
+ else
+ {
+ PRDF_ERR( PRDF_FUNC"Inavlid State:%u", iv_tdState );
+ o_rc = FAIL; break;
+ }
+
+ o_rc = mbstr->Write();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC"Write() failed on %s", reg_str );
+ break;
+ }
+
+ } while(0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
} // end namespace PRDF
diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H
index 807211e30..fd4ffd015 100644
--- a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H
+++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.H
@@ -40,24 +40,8 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon
{
private: // constants, enums
- /**
- * @brief Lists all possible states of TD controller
- * @note These enums are used as array indexes to cv_cmdCompleteFuncs and
- * the last entry will be used to get the size of the array.
- */
- enum TdState
- {
- NO_OP = 0, ///< No TD procedures in place.
- VCM_PHASE_1, ///< Verify Chip Mark phase 1.
- VCM_PHASE_2, ///< Verify Chip Mark phase 2.
- DSD_PHASE_1, ///< DRAM Spare Deploy phase 1.
- DSD_PHASE_2, ///< DRAM Spare Deploy phase 2.
- MAX_TD_STATE ///< The maximum number of TD states.
- };
-
// Function pointers for maintenance command complete events.
- typedef int32_t (CenMbaTdCtlr::*CMD_COMPLETE_FUNCS)
- ( STEP_CODE_DATA_STRUCT & io_sc );
+ typedef int32_t (CenMbaTdCtlr::*FUNCS)( STEP_CODE_DATA_STRUCT & io_sc );
public: // functions
@@ -72,7 +56,7 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon
* @param i_mbaChip An MBA chip.
*/
explicit CenMbaTdCtlr( ExtensibleChip * i_mbaChip ) :
- CenMbaTdCtlrCommon(i_mbaChip), iv_tdState(NO_OP)
+ CenMbaTdCtlrCommon(i_mbaChip)
{}
public: // Overloaded functions
@@ -88,13 +72,15 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon
int32_t analyzeVcmPhase2( STEP_CODE_DATA_STRUCT & io_sc );
int32_t analyzeDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc );
int32_t analyzeDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc );
+ int32_t analyzeTpsPhase1( STEP_CODE_DATA_STRUCT & io_sc );
+ int32_t analyzeTpsPhase2( STEP_CODE_DATA_STRUCT & io_sc );
int32_t startVcmPhase1();
int32_t startVcmPhase2();
int32_t startDsdPhase1();
int32_t startDsdPhase2();
-
- bool isInTdMode();
+ int32_t startTpsPhase1();
+ int32_t startTpsPhase2();
private: // functions
@@ -104,7 +90,7 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon
* occurred.
* @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
*/
- int32_t checkEccErrors( uint8_t & o_eccErrorMask );
+ int32_t checkEccErrors( uint16_t & o_eccErrorMask );
/**
* @brief Handle UEs during TD analysis.
@@ -114,6 +100,14 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon
int32_t handleUE( STEP_CODE_DATA_STRUCT & io_sc );
/**
+ * @brief Handle MPE event
+ * @param io_sc Service data collector.
+ * @note This will start VCM phase 1.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
+ */
+ int32_t handleMPE( STEP_CODE_DATA_STRUCT & io_sc );
+
+ /**
* @brief Handle MCE event during VCM Phase 2
* @param io_sc Service data collector.
* @note This will update bad bits information in VPD, set callouts, and
@@ -155,16 +149,21 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon
*/
int32_t signalMdiaCmdComplete();
- private: // instance variables
+ /**
+ * @brief Does mnfg setup for CE threshold.
+ * @note Before calling this function, set current state to new
+ * value (TPS_PHASE_1/ TPS_PHASE_2).
+ * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
+ */
+ int32_t mnfgCeSetup();
- /** The targeted diagnostics state variable (see enum TdState). */
- TdState iv_tdState;
+ private: // instance variables
/** Array of functions pointers for TD controller states. This is used to
* determine the next course of action after a maintenance command complete
* attention.
*/
- static CMD_COMPLETE_FUNCS cv_cmdCompleteFuncs[MAX_TD_STATE];
+ static FUNCS cv_cmdCompleteFuncs[MAX_TD_STATE];
}; // CenMbaTdCtlr
diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaThresholds.H b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaThresholds.H
new file mode 100755
index 000000000..45fdd3282
--- /dev/null
+++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaThresholds.H
@@ -0,0 +1,54 @@
+/* IBM_PROLOG_BEGIN_TAG */
+/* This is an automatically generated prolog. */
+/* */
+/* $Source: src/usr/diag/prdf/plat/pegasus/prdfCenMbaThresholds.H $ */
+/* */
+/* IBM CONFIDENTIAL */
+/* */
+/* COPYRIGHT International Business Machines Corp. 2013 */
+/* */
+/* p1 */
+/* */
+/* Object Code Only (OCO) source materials */
+/* Licensed Internal Code Source Materials */
+/* IBM HostBoot Licensed Internal Code */
+/* */
+/* The source code for this program is not published or otherwise */
+/* divested of its trade secrets, irrespective of what has been */
+/* deposited with the U.S. Copyright Office. */
+/* */
+/* Origin: 30 */
+/* */
+/* IBM_PROLOG_END_TAG */
+
+#ifndef __PRDF_CEN_MBA_THRESHOLDS_H
+#define __PRDF_CEN_MBA_THRESHOLDS_H
+
+/** @file prdfCenMbaThresholds.H
+ * @brief Utility functions used to get specific Centaur thresholds
+ * during IPL time.
+ */
+
+#include <prdfCenMbaThresholds_common.H>
+#include <prdfMfgThresholdMgr.H>
+#include <prdfMfgThresholds.H>
+
+namespace PRDF
+{
+
+class ExtensibleChip;
+
+/**
+ * @brief Returns the manufacturing memory CE thresholds Per 2GB ( base ).
+ */
+inline uint8_t getMnfgCeTh()
+{
+ return MfgThresholdMgr::getInstance()->
+ getThreshold( PRDF_CEN_MBA_IPL_SOFT_CE_TH_ALGO );
+
+}
+
+} // end namespace PRDF
+
+#endif /* __PRDF_CEN_MBA_THRESHOLDS_H */
+
OpenPOWER on IntegriCloud