/* IBM_PROLOG_BEGIN_TAG */ /* This is an automatically generated prolog. */ /* */ /* $Source: src/usr/diag/prdf/plat/pegasus/prdfCenMbaIplCeStats.C $ */ /* */ /* IBM CONFIDENTIAL */ /* */ /* COPYRIGHT International Business Machines Corp. 2013,2014 */ /* */ /* p1 */ /* */ /* Object Code Only (OCO) source materials */ /* Licensed Internal Code Source Materials */ /* IBM HostBoot Licensed Internal Code */ /* */ /* The source code for this program is not published or otherwise */ /* divested of its trade secrets, irrespective of what has been */ /* deposited with the U.S. Copyright Office. */ /* */ /* Origin: 30 */ /* */ /* IBM_PROLOG_END_TAG */ /** @file prdfCenMbaIplCeStats.C * @brief Contains IPL CE related code. */ // Framework includes #include #include #include #include #include #include #include // Pegasus includes #include #include #include #include #include #include using namespace TARGETING; namespace PRDF { using namespace PlatServices; using namespace HWAS; //------------------------------------------------------------------------------ void CenMbaIplCeStats::banAnalysis( const CenRank & i_rank ) { for ( uint8_t i = 0; i < MAX_PORT_PER_MBA; i++ ) { HalfRankKey banKey = { i_rank, i }; iv_bannedAnalysis[banKey] = true; } } //------------------------------------------------------------------------------ int32_t CenMbaIplCeStats::banAnalysis( const CenRank & i_rank, uint8_t i_portSlct ) { int32_t o_rc = SUCCESS; do { if ( i_portSlct >= MAX_PORT_PER_MBA ) { PRDF_ERR("[banAnalysis] i_portSlct (0x%02x) is invalid", i_portSlct ); o_rc = FAIL; break; } HalfRankKey banKey = { i_rank, i_portSlct }; iv_bannedAnalysis[banKey] = true; } while (0); return o_rc; } //------------------------------------------------------------------------------ int32_t CenMbaIplCeStats::collectStats( const CenRank & i_stopRank ) { #define PRDF_FUNC "[CenMbaIplCeStats::collectStats] " int32_t o_rc = SUCCESS; do { MemUtils::MaintSymbols symData; CenSymbol junk; o_rc = MemUtils::collectCeStats( iv_mbaChip, i_stopRank, symData, junk); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC"MemUtils::collectCeStats() failed. MBA:0X%08X", getHuid( iv_mbaChip->GetChipHandle() ) ); break; } // if size of stats collected is zero, it may mean some symbol // has gone beyond maximum value. But this is only valid for DD1 // and has a very low probability. So ignoring this case. for ( uint32_t i = 0; i < symData.size(); i++ ) { uint8_t dimmSlct = i_stopRank.getDimmSlct(); uint8_t dram = symData[i].symbol.getDram(); uint8_t portSlct = symData[i].symbol.getPortSlct(); // Check if analysis is banned. HalfRankKey banKey = { i_stopRank, portSlct }; if ( iv_bannedAnalysis[banKey] ) continue; // Update iv_ceSymbols with the new symbol data. SymbolKey symkey = { symData[i].symbol }; iv_ceSymbols.push_back (symkey ); // Increment the soft CEs per DRAM. DramKey dramKey = { i_stopRank, dram, portSlct }; iv_dramMap[dramKey]++; // Increment the soft CEs per half rank. HalfRankKey rankKey = { i_stopRank, portSlct }; iv_rankMap[rankKey]++; // In case of dimm select, rank select does not matter CenRank dimmRank( dimmSlct << DIMM_SLCT_PER_MBA ); // Increment the soft CEs per half dimm select. HalfRankKey dsKey = { dimmRank, portSlct }; iv_dsMap[dsKey]++; } } while (0); // We have to clear all stats before giving control back to MDIA.. // This is done by setting up MBSTRQ[53] bit // We are doing cleanup in TdController code, // So not clearing up stats here. return o_rc; #undef PRDF_FUNC } //------------------------------------------------------------------------------ int32_t CenMbaIplCeStats::analyzeStats( bool & o_callOutsMade ) { #define PRDF_FUNC "CenMbaIplCeStats::analyzeStats " int32_t o_rc = SUCCESS; o_callOutsMade = false; do { TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle(); o_rc = calloutCePerDram( o_callOutsMade ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC" calloutCePerDram() failed. MBA:0X%08X", getHuid( mbaTrgt ) ); break; } o_rc = calloutCePerRank( o_callOutsMade ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC"calloutCePerRank() failed. MBA:0X%08X", getHuid( mbaTrgt ) ); break; } o_rc = calloutCePerDs( o_callOutsMade ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC" calloutCePerDs() failed. MBA:0X%08X", getHuid( mbaTrgt ) ); break; } } while (0); return o_rc; #undef PRDF_FUNC } //------------------------------------------------------------------------------ int32_t CenMbaIplCeStats::calloutHardCes( const CenRank & i_stopRank ) { #define PRDF_FUNC "[CenMbaIplCeStats::calloutHardCes] " TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle(); int32_t o_rc = SUCCESS; do { MemUtils::MaintSymbols symData; CenSymbol junk; o_rc = MemUtils::collectCeStats( iv_mbaChip, i_stopRank, symData, junk); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC"MemUtils::collectCeStats() failed. MBA:0X%08X", getHuid( iv_mbaChip->GetChipHandle() ) ); break; } for ( uint32_t i = 0; i < symData.size(); i++ ) { uint8_t portSlct = symData[i].symbol.getPortSlct(); // Check if analysis is banned. HalfRankKey banKey = { i_stopRank, portSlct }; bool& isBanned = iv_bannedAnalysis[banKey]; if ( isBanned ) continue; // At this point a hard CE was found, callout the symbol. MemoryMru memMru ( mbaTrgt, symData[i].symbol.getRank(), symData[i].symbol ); // We are creating and committing error log here. It is different // from rest of attention flow. We could have set the callout // values in sdc but it would have created confusion in ffdc if // we also get vcm/ue at same time. errlHndl_t l_errl = NULL; PRDF_CREATE_ERRL( l_errl, ERRL_SEV_PREDICTIVE, ERRL_ETYPE_NOT_APPLICABLE, SRCI_ERR_INFO, SRCI_NO_ATTR, PRDF_MNFG_IPL_CE_ANALYSIS, LIC_REFCODE, PRDF_DETECTED_FAIL_HARDWARE, getHuid( mbaTrgt ), 0, PRDFSIG_MnfgIplHardCE, 0); addMruAndCommitErrl( memMru, l_errl); // Ban the half rank. isBanned = true; } }while(0); return o_rc; #undef PRDF_FUNC } //------------------------------------------------------------------------------ int32_t CenMbaIplCeStats::calloutCePerDram( bool & o_callOutsMade ) { #define PRDF_FUNC "[CenMbaIplCeStats::calloutCePerDram] " int32_t o_rc = SUCCESS; TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle(); for ( CePerDramMap::iterator dramIter = iv_dramMap.begin(); dramIter != iv_dramMap.end(); dramIter++ ) { // First, check if this half rank is banned from analysis. HalfRankKey banKey = { dramIter->first.rank, dramIter->first.portSlct }; // Check if the rank has already been banned. Note that [] will create // the an entry if one does not exist, so used find() instead to check // for existence in the map. if ( iv_bannedAnalysis.end() != iv_bannedAnalysis.find(banKey) ) continue; // Get the CEs per DRAM threshold. uint16_t dramTh, junk0, junk1; o_rc = getMnfgMemCeTh( iv_mbaChip, dramIter->first.rank, dramTh, junk0, junk1 ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMnfgMemCeTh() failed. MBA:0x%08X", getHuid( mbaTrgt ) ); break; } // Now, check if a threshold has been reached. If not, continue to the // next entry in iv_dsMap. if ( dramIter->second <= dramTh ) continue; // At this point a threshold has been reached. Callout a single symbol // found in this dram. for ( CESymbols::iterator symIter = iv_ceSymbols.begin(); symIter != iv_ceSymbols.end(); symIter++ ) { if ( (dramIter->first.rank == symIter->symbol.getRank() ) && (dramIter->first.dram == symIter->symbol.getDram() ) ) { MemoryMru memMru ( mbaTrgt, symIter->symbol.getRank() , symIter->symbol ); errlHndl_t l_errl = NULL; PRDF_CREATE_ERRL( l_errl, ERRL_SEV_PREDICTIVE, ERRL_ETYPE_NOT_APPLICABLE, SRCI_ERR_INFO, SRCI_NO_ATTR, PRDF_MNFG_IPL_CE_ANALYSIS, LIC_REFCODE, PRDF_DETECTED_FAIL_HARDWARE, getHuid( mbaTrgt ), 0, PRDFSIG_MnfgIplDramCTE, 0); addMruAndCommitErrl( memMru, l_errl); // Ban the half rank. iv_bannedAnalysis[banKey] = true; o_callOutsMade = true; // Only one symbol needs to be called out, so exit on first // occurance. break; } } } return o_rc; #undef PRDF_FUNC } //------------------------------------------------------------------------------ int32_t CenMbaIplCeStats::calloutCePerRank( bool & o_callOutsMade ) { #define PRDF_FUNC "[CenMbaIplCeStats::calloutCePerRank] " int32_t o_rc = SUCCESS; TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle(); for ( CePerHalfRankMap::iterator rankIter = iv_rankMap.begin(); rankIter != iv_rankMap.end(); rankIter++ ) { // First, check if this half rank is banned from analysis. HalfRankKey banKey = { rankIter->first.rank, rankIter->first.portSlct }; // Check if the rank has already been banned. Note that [] will create // the an entry if one does not exist, so used find() instead to check // for existence in the map. if ( iv_bannedAnalysis.end() != iv_bannedAnalysis.find(banKey) ) continue; // Get the CEs per rank threshold. uint16_t junk0, rankTh, junk1; o_rc = getMnfgMemCeTh( iv_mbaChip, rankIter->first.rank, junk0, rankTh, junk1 ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMnfgMemCeTh() failed. MBA:0x%08X", getHuid( mbaTrgt ) ); break; } // Now, check if a threshold has been reached. If not, continue to the // next entry in iv_rankMap. if ( rankIter->second <= rankTh ) continue; // At this point a threshold has been reached. Callout a single symbol // found in this rank. for ( CESymbols::iterator symIter = iv_ceSymbols.begin(); symIter != iv_ceSymbols.end(); symIter++ ) { if ( (rankIter->first.rank == symIter->symbol.getRank() ) && (rankIter->first.portSlct == symIter->symbol.getPortSlct()) ) { MemoryMru memMru ( mbaTrgt, symIter->symbol.getRank() , symIter->symbol ); errlHndl_t l_errl = NULL; PRDF_CREATE_ERRL( l_errl, ERRL_SEV_PREDICTIVE, ERRL_ETYPE_NOT_APPLICABLE, SRCI_ERR_INFO, SRCI_NO_ATTR, PRDF_MNFG_IPL_CE_ANALYSIS, LIC_REFCODE, PRDF_DETECTED_FAIL_HARDWARE, getHuid( mbaTrgt ), 0, PRDFSIG_MnfgIplRankCTE, 0); addMruAndCommitErrl( memMru, l_errl); // Ban the half rank. iv_bannedAnalysis[banKey] = true; o_callOutsMade = true; // Only one symbol needs to be called out, so exit on first // occurance. break; } } } return o_rc; #undef PRDF_FUNC } //------------------------------------------------------------------------------ int32_t CenMbaIplCeStats::calloutCePerDs( bool & o_callOutsMade ) { #define PRDF_FUNC "[CenMbaIplCeStats::calloutCePerDs] " int32_t o_rc = SUCCESS; TargetHandle_t mbaTrgt = iv_mbaChip->GetChipHandle(); for ( CePerHalfDsMap::iterator dsIter = iv_dsMap.begin(); dsIter != iv_dsMap.end(); dsIter++ ) { // First, check if this half dimm select is banned from analysis. HalfRankKey banKey = { dsIter->first.rank, dsIter->first.portSlct }; // Check if the rank has already been banned. Note that [] will create // the an entry if one does not exist, so used find() instead to check // for existence in the map. if ( iv_bannedAnalysis.end() != iv_bannedAnalysis.find(banKey) ) continue; // Get the CEs per dimm select threshold. uint16_t junk0, junk1, dsTh; o_rc = getMnfgMemCeTh( iv_mbaChip, dsIter->first.rank, junk0, junk1, dsTh ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMnfgMemCeTh() failed. MBA:0x%08X", getHuid( mbaTrgt ) ); break; } // Now, check if a threshold has been reached. If not, continue to the // next entry in iv_dsMap. if ( dsIter->second <= dsTh ) continue; // At this point a threshold has been reached. Callout a single symbol // found in this dimm select. for ( CESymbols::iterator symIter = iv_ceSymbols.begin(); symIter != iv_ceSymbols.end(); symIter++ ) { if ( (dsIter->first.rank.getDimmSlct() == symIter->symbol.getRank().getDimmSlct() ) && (dsIter->first.portSlct == symIter->symbol.getPortSlct()) ) { MemoryMru memMru ( mbaTrgt, symIter->symbol.getRank() , symIter->symbol ); errlHndl_t l_errl = NULL; PRDF_CREATE_ERRL( l_errl, ERRL_SEV_PREDICTIVE, ERRL_ETYPE_NOT_APPLICABLE, SRCI_ERR_INFO, SRCI_NO_ATTR, PRDF_MNFG_IPL_CE_ANALYSIS, LIC_REFCODE, PRDF_DETECTED_FAIL_HARDWARE, getHuid(mbaTrgt), 0, PRDFSIG_MnfgIplDsCTE, 0); addMruAndCommitErrl( memMru, l_errl); // Ban the half dimm select. iv_bannedAnalysis[banKey] = true; o_callOutsMade = true; // Only one symbol needs to be called out, so exit on first // occurance. break; } } } return o_rc; #undef PRDF_FUNC } //------------------------------------------------------------------------------ void CenMbaIplCeStats::addMruAndCommitErrl( const MemoryMru & i_memmru, errlHndl_t i_errl ) { // Add MemoryMru callouts and FFDC CalloutUtil::calloutMemoryMru( i_errl, i_memmru, SRCI_PRIORITY_HIGH, HWAS::DELAYED_DECONFIG, HWAS::GARD_Predictive ); // Add traces i_errl->collectTrace( PRDF_COMP_NAME, 512 ); // Commit the error log ERRORLOG::errlCommit( i_errl, PRDF_COMP_ID ); } } // end namespace PRDF