/* IBM_PROLOG_BEGIN_TAG */ /* This is an automatically generated prolog. */ /* */ /* $Source: src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C $ */ /* */ /* OpenPOWER HostBoot Project */ /* */ /* Contributors Listed Below - COPYRIGHT 2013,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ /* you may not use this file except in compliance with the License. */ /* You may obtain a copy of the License at */ /* */ /* http://www.apache.org/licenses/LICENSE-2.0 */ /* */ /* Unless required by applicable law or agreed to in writing, software */ /* distributed under the License is distributed on an "AS IS" BASIS, */ /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ /* implied. See the License for the specific language governing */ /* permissions and limitations under the License. */ /* */ /* IBM_PROLOG_END_TAG */ /** @file prdfMemUtils.C * @brief Utility functions related to memory */ #include // Framework includes #include #include // Platform includes #include #include #include #include #include #if __HOSTBOOT_RUNTIME #include #endif using namespace TARGETING; namespace PRDF { namespace MemUtils { using namespace PlatServices; using namespace PARSERUTILS; using namespace CEN_SYMBOL; const uint8_t CE_REGS_PER_PORT = 9; const uint8_t SYMBOLS_PER_CE_REG = 8; static const char *mbsCeStatReg[][ CE_REGS_PER_PORT ] = { { "MBA0_MBSSYMEC0", "MBA0_MBSSYMEC1","MBA0_MBSSYMEC2", "MBA0_MBSSYMEC3", "MBA0_MBSSYMEC4", "MBA0_MBSSYMEC5", "MBA0_MBSSYMEC6", "MBA0_MBSSYMEC7", "MBA0_MBSSYMEC8" }, { "MBA1_MBSSYMEC0", "MBA1_MBSSYMEC1","MBA1_MBSSYMEC2", "MBA1_MBSSYMEC3", "MBA1_MBSSYMEC4", "MBA1_MBSSYMEC5", "MBA1_MBSSYMEC6", "MBA1_MBSSYMEC7", "MBA1_MBSSYMEC8" } }; static const char *mcbCeStatReg[CE_REGS_PER_PORT] = { "MCB_MBSSYMEC0", "MCB_MBSSYMEC1", "MCB_MBSSYMEC2", "MCB_MBSSYMEC3", "MCB_MBSSYMEC4", "MCB_MBSSYMEC5", "MCB_MBSSYMEC6", "MCB_MBSSYMEC7", "MCB_MBSSYMEC8" }; //------------------------------------------------------------------------------ // Helper structs for collectCeStats() struct DramCount_t { uint8_t totalCount; uint8_t symbolCount; DramCount_t() : totalCount(0), symbolCount(0) {} }; typedef std::map DramCountMap; //------------------------------------------------------------------------------ template<> int32_t collectCeStats( ExtensibleChip * i_chip, const MemRank & i_rank, MaintSymbols & o_maintStats, MemSymbol & o_chipMark, uint8_t i_thr ) { #define PRDF_FUNC "[MemUtils::collectCeStats] " int32_t o_rc = SUCCESS; o_chipMark = MemSymbol(); // Initially invalid. do { PRDF_ASSERT( 0 != i_thr ); TargetHandle_t mcaTrgt = i_chip->getTrgt(); ExtensibleChip * mcbChip = getConnectedParent( i_chip, TYPE_MCBIST ); const bool isX4 = isDramWidthX4(mcaTrgt); // Use this map to keep track of the total counts per DRAM. DramCountMap dramCounts; const char * reg_str = NULL; SCAN_COMM_REGISTER_CLASS * reg = NULL; for ( uint8_t regIdx = 0; regIdx < CE_REGS_PER_PORT; regIdx++ ) { reg_str = mcbCeStatReg[regIdx]; reg = mcbChip->getRegister( reg_str ); o_rc = reg->Read(); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "Read() failed on %s", reg_str ); break; } uint8_t baseSymbol = SYMBOLS_PER_CE_REG * regIdx; for ( uint8_t i = 0; i < SYMBOLS_PER_CE_REG; i++ ) { uint8_t count = reg->GetBitFieldJustified( (i*8), 8 ); if ( 0 == count ) continue; // nothing to do uint8_t sym = baseSymbol + i; PRDF_ASSERT( sym < SYMBOLS_PER_RANK ); uint8_t dram = isX4 ? symbol2Nibble( sym ) : symbol2Byte ( sym ); // Keep track of the total DRAM counts. dramCounts[dram].totalCount += count; // Add any symbols that have exceeded threshold to the list. if ( i_thr <= count ) { // Keep track of the total number of symbols per DRAM that // have exceeded threshold. dramCounts[dram].symbolCount++; SymbolData symData; symData.symbol = MemSymbol::fromSymbol( mcaTrgt, i_rank, sym, CEN_SYMBOL::ODD_SYMBOL_DQ ); if ( !symData.symbol.isValid() ) { PRDF_ERR( PRDF_FUNC "MemSymbol() failed: symbol=%d", sym ); o_rc = FAIL; break; } else { // Add the symbol to the list. symData.count = count; o_maintStats.push_back( symData ); } } } if ( SUCCESS != o_rc ) break; } if ( SUCCESS != o_rc ) break; if ( o_maintStats.empty() ) break; // no need to continue // Sort the list of symbols. std::sort( o_maintStats.begin(), o_maintStats.end(), sortSymDataCount ); // Get the DRAM with the highest count. uint32_t highestDram = 0; uint32_t highestCount = 0; const uint32_t symbolTH = isX4 ? 1 : 2; for ( DramCountMap::iterator it = dramCounts.begin(); it != dramCounts.end(); ++it ) { if ( (symbolTH <= it->second.symbolCount) && (highestCount < it->second.totalCount ) ) { highestDram = it->first; highestCount = it->second.totalCount; } } if ( 0 != highestCount ) { uint8_t sym = isX4 ? nibble2Symbol( highestDram ) : byte2Symbol ( highestDram ); PRDF_ASSERT( sym < SYMBOLS_PER_RANK ); o_chipMark = MemSymbol::fromSymbol( mcaTrgt, i_rank, sym ); } } while(0); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "Failed: i_chip=0x%08x i_rank=m%ds%d i_thr=%d", i_chip->GetId(), i_rank.getMaster(), i_rank.getSlave(), i_thr ); } return o_rc; #undef PRDF_FUNC } //------------------------------------------------------------------------------ template<> int32_t collectCeStats( ExtensibleChip * i_chip, const MemRank & i_rank, MaintSymbols & o_maintStats, MemSymbol & o_chipMark, uint8_t i_thr ) { #define PRDF_FUNC "[MemUtils::collectCeStats] " int32_t o_rc = SUCCESS; o_chipMark = MemSymbol(); // Initially invalid. do { if ( 0 == i_thr ) // Must be non-zero { PRDF_ERR( PRDF_FUNC "i_thr %d is invalid", i_thr ); o_rc = FAIL; break; } TargetHandle_t mbaTrgt = i_chip->getTrgt(); ExtensibleChip * membufChip = getConnectedParent(i_chip, TYPE_MEMBUF); if ( nullptr == membufChip ) { PRDF_ERR( PRDF_FUNC "getMembChip() failed" ); o_rc = FAIL; break; } uint8_t mbaPos = getTargetPosition( mbaTrgt ); if ( MAX_MBA_PER_MEMBUF <= mbaPos ) { PRDF_ERR( PRDF_FUNC "mbaPos %d is invalid", mbaPos ); o_rc = FAIL; break; } const bool isX4 = isDramWidthX4(mbaTrgt); // Get the current spares on this rank. MemSymbol sp0, sp1, ecc; /* TODO RTC 157888/189221 - uncomment when mssGetSteerMux is working o_rc = mssGetSteerMux( mbaTrgt, i_rank, sp0, sp1, ecc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "mssGetSteerMux() failed." ); break; } */ // Use this map to keep track of the total counts per DRAM. DramCountMap dramCounts; const char * reg_str = NULL; SCAN_COMM_REGISTER_CLASS * reg = NULL; for ( uint8_t regIdx = 0; regIdx < CE_REGS_PER_PORT; regIdx++ ) { reg_str = mbsCeStatReg[mbaPos][regIdx]; reg = membufChip->getRegister( reg_str ); o_rc = reg->Read(); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "Read() failed on %s", reg_str ); break; } uint8_t baseSymbol = SYMBOLS_PER_CE_REG * regIdx; for ( uint8_t i = 0; i < SYMBOLS_PER_CE_REG; i++ ) { uint8_t count = reg->GetBitFieldJustified( (i*8), 8 ); if ( 0 == count ) continue; // nothing to do uint8_t sym = baseSymbol + i; uint8_t dram = isX4 ? symbol2Nibble( sym ) : symbol2Byte ( sym ); // Keep track of the total DRAM counts. dramCounts[dram].totalCount += count; // Add any symbols that have exceeded threshold to the list. if ( i_thr <= count ) { // Keep track of the total number of symbols per DRAM that // have exceeded threshold. dramCounts[dram].symbolCount++; SymbolData symData; symData.symbol = MemSymbol::fromSymbol( mbaTrgt, i_rank, sym, CEN_SYMBOL::BOTH_SYMBOL_DQS ); if ( !symData.symbol.isValid() ) { PRDF_ERR( PRDF_FUNC "MemSymbol() failed: symbol=%d", sym ); o_rc = FAIL; break; } else { /* TODO RTC 157888/189221 - sp0 and sp1 aren't defined yet // Check if this symbol is on any of the spares. if ( ( sp0.isValid() && (sp0.getDram() == symData.symbol.getDram()) ) || ( sp1.isValid() && (sp1.getDram() == symData.symbol.getDram()) ) ) { symData.symbol.setDramSpared(); } */ if ( ecc.isValid() && (ecc.getDram() == symData.symbol.getDram()) ) { symData.symbol.setEccSpared(); } // Add the symbol to the list. symData.count = count; o_maintStats.push_back( symData ); } } } if ( SUCCESS != o_rc ) break; } if ( SUCCESS != o_rc ) break; if ( o_maintStats.empty() ) break; // no need to continue // Sort the list of symbols. std::sort( o_maintStats.begin(), o_maintStats.end(), sortSymDataCount ); // Get the DRAM with the highest count. uint32_t highestDram = 0; uint32_t highestCount = 0; const uint32_t symbolTH = isX4 ? 1 : 2; for ( DramCountMap::iterator it = dramCounts.begin(); it != dramCounts.end(); ++it ) { if ( (symbolTH <= it->second.symbolCount) && (highestCount < it->second.totalCount ) ) { highestDram = it->first; highestCount = it->second.totalCount; } } if ( 0 != highestCount ) { uint8_t sym = isX4 ? nibble2Symbol( highestDram ) : byte2Symbol ( highestDram ); PRDF_ASSERT( sym < SYMBOLS_PER_RANK ); o_chipMark = MemSymbol::fromSymbol( mbaTrgt, i_rank, sym ); /* TODO RTC 157888/18922uncomment when mssGetSteerMux is working1 - sp0 and sp1 aren't defined yet // Check if this symbol is on any of the spares. if ( ( sp0.isValid() && (sp0.getDram() == o_chipMark.getDram()) ) || ( sp1.isValid() && (sp1.getDram() == o_chipMark.getDram()) ) ) { o_chipMark.setDramSpared(); } */ if ( ecc.isValid() && (ecc.getDram() == o_chipMark.getDram()) ) { o_chipMark.setEccSpared(); } } } while(0); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "Failed: i_mbaChip=0x%08x i_rank=m%ds%d i_thr=%d", i_chip->GetId(), i_rank.getMaster(), i_rank.getSlave(), i_thr ); } return o_rc; #undef PRDF_FUNC } //------------------------------------------------------------------------------ template<> uint8_t getDramSize(ExtensibleChip *i_chip, uint8_t i_dimmSlct) { #define PRDF_FUNC "[MemUtils::getDramSize] " PRDF_ASSERT( TYPE_MCA == i_chip->getType() ); PRDF_ASSERT( i_dimmSlct < DIMM_SLCT_PER_PORT ); TargetHandle_t mcaTrgt = i_chip->getTrgt(); TargetHandle_t mcsTrgt = getConnectedParent( mcaTrgt, TYPE_MCS ); PRDF_ASSERT( nullptr != mcsTrgt ); uint8_t mcaRelPos = i_chip->getPos() % MAX_MCA_PER_MCS; uint8_t tmp[MAX_MCA_PER_MCS][DIMM_SLCT_PER_PORT]; if ( !mcsTrgt->tryGetAttr(tmp) ) { PRDF_ERR( PRDF_FUNC "Failed to get ATTR_EFF_DRAM_DENSITY" ); PRDF_ASSERT( false ); } return tmp[mcaRelPos][i_dimmSlct]; #undef PRDF_FUNC } template<> uint8_t getDramSize(ExtensibleChip *i_chip, uint8_t i_dimmSlct) { #define PRDF_FUNC "[MemUtils::getDramSize] " PRDF_ASSERT( TYPE_MBA == i_chip->getType() ); uint8_t o_size = 0; do { ExtensibleChip * membufChip = getConnectedParent(i_chip, TYPE_MEMBUF); uint32_t pos = i_chip->getPos(); const char * reg_str = (0 == pos) ? "MBA0_MBAXCR" : "MBA1_MBAXCR"; SCAN_COMM_REGISTER_CLASS * reg = membufChip->getRegister( reg_str ); uint32_t rc = reg->Read(); if ( SUCCESS != rc ) { PRDF_ERR( PRDF_FUNC "Read() failed on %s. Target=0x%08x", reg_str, i_chip->getHuid() ); break; } // The value of MBAXCR[6:7] is 0 = 2Gb, 1 = 4Gb, 2 = 8Gb, and 3 = 16 Gb. // Therefore, to get the DRAM size do the following: // DRAM size = 2 ^ (MBAXCR[6:7] + 1) o_size = 1 << (reg->GetBitFieldJustified(6,2) + 1); } while(0); return o_size; #undef PRDF_FUNC } //------------------------------------------------------------------------------ template<> void cleanupChnlAttns( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) { #define PRDF_FUNC "[MemUtils::cleanupChnlAttns] " PRDF_ASSERT( nullptr != i_chip ); PRDF_ASSERT( TYPE_MEMBUF == i_chip->getType() ); // No cleanup if this is a checkstop attention. if ( CHECK_STOP == io_sc.service_data->getPrimaryAttnType() ) return; #ifdef __HOSTBOOT_MODULE // only do cleanup in Hostboot, no-op in FSP ExtensibleChip * dmiChip = getConnectedParent( i_chip, TYPE_DMI ); // Clear the associated FIR bits for all attention types. // NOTE: If there are any active attentions left in the Centaur the // associated FIR bits in the CHIFIR will be redriven with the // next packet on the bus. SCAN_COMM_REGISTER_CLASS * reg = dmiChip->getRegister("CHIFIR_AND"); reg->setAllBits(); reg->ClearBit(16); // CS reg->ClearBit(19); // RE reg->ClearBit(20); // SPA reg->ClearBit(21); // maintenance command complete reg->Write(); #endif // Hostboot only #undef PRDF_FUNC } //------------------------------------------------------------------------------ /* TODO RTC 136123 int32_t checkMcsChannelFail( ExtensibleChip * i_mcsChip, STEP_CODE_DATA_STRUCT & io_sc ) { #define PRDF_FUNC "[MemUtils::checkMcsChannelFail] " int32_t o_rc = SUCCESS; do { // Skip if already handling unit checkstop. if ( io_sc.service_data->IsUnitCS() ) break; // Must be an MCS. if ( TYPE_MCS != getTargetType(i_mcsChip->GetChipHandle()) ) { PRDF_ERR( PRDF_FUNC "i_mcsChip is not TYPE_MCS" ); o_rc = FAIL; break; } // Check MCIFIR[31] for presence of channel fail. SCAN_COMM_REGISTER_CLASS * mcifir = i_mcsChip->getRegister("MCIFIR"); o_rc = mcifir->Read(); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "Read() failed on MCIFIR" ); break; } if ( !mcifir->IsBitSet(31) ) break; // No channel fail, so exit. // Set unit checkstop flag and cause attention type. io_sc.service_data->setFlag(ServiceDataCollector::UNIT_CS); io_sc.service_data->setSecondaryAttnType(UNIT_CS); io_sc.service_data->SetThresholdMaskId(0); // Set it as SUE generation point. io_sc.service_data->SetUERE(); // Indicate that cleanup is required. P8McsDataBundle * mcsdb = getMcsDataBundle( i_mcsChip ); ExtensibleChip * membChip = mcsdb->getMembChip(); if ( NULL == membChip ) { PRDF_ERR( PRDF_FUNC "getMembChip() returned NULL" ); o_rc = FAIL; break; } MembufDataBundle * mbdb = getMembufDataBundle( membChip ); mbdb->iv_doChnlFailCleanup = true; } while (0); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "Failed: i_mcsChip=0x%08x", i_mcsChip->GetId() ); } return o_rc; #undef PRDF_FUNC } */ //------------------------------------------------------------------------------ template void __cleanupChnlFail( ExtensibleChip * i_chip1, ExtensibleChip * i_chip2, STEP_CODE_DATA_STRUCT & io_sc ); template<> void __cleanupChnlFail( ExtensibleChip * i_dmiChip, ExtensibleChip * i_membChip, STEP_CODE_DATA_STRUCT & io_sc ) { #define PRDF_FUNC "[MemUtils::__cleanupChnlFail] " PRDF_ASSERT( nullptr != i_dmiChip ); PRDF_ASSERT( TYPE_DMI == i_dmiChip->getType() ); PRDF_ASSERT( nullptr != i_membChip ); PRDF_ASSERT( TYPE_MEMBUF == i_membChip->getType() ); // No cleanup if this is a checkstop attention. if ( CHECK_STOP == io_sc.service_data->getPrimaryAttnType() ) return; // Check if cleanup is still required or has already been done. if ( ! getMembufDataBundle(i_membChip)->iv_doChnlFailCleanup ) return; // Cleanup is complete and no longer required on this channel. getMembufDataBundle(i_membChip)->iv_doChnlFailCleanup = false; #ifdef __HOSTBOOT_MODULE // only do cleanup in Hostboot, no-op in FSP // Note that this is a clean up function. If there are any SCOM errors // we will just move on and try the rest. SCAN_COMM_REGISTER_CLASS * reg = nullptr; ExtensibleChip * mcChip = getConnectedParent( i_dmiChip, TYPE_MC ); uint32_t dmiPos = i_dmiChip->getPos() % MAX_DMI_PER_MC; // Mask off all attentions from the DMI target in the chiplet FIRs. reg = mcChip->getRegister( "MC_CHIPLET_FIR_MASK" ); if ( SUCCESS == reg->Read() ) { reg->SetBit( 4 + (dmiPos * 2) ); // 4, 6, 8, 10 reg->Write(); } reg = mcChip->getRegister( "MC_CHIPLET_UCS_FIR_MASK" ); if ( SUCCESS == reg->Read() ) { reg->SetBit( 1 + (dmiPos * 2) ); // 1, 3, 5, 7 reg->Write(); } reg = mcChip->getRegister( "MC_CHIPLET_HA_FIR_MASK" ); if ( SUCCESS == reg->Read() ) { reg->SetBit( 1 + (dmiPos * 2) ); // 1, 3, 5, 7 reg->Write(); } // Mask off all attentions from the DMI target in the IOMCFIR. reg = mcChip->getRegister( "IOMCFIR_MASK_OR" ); reg->SetBitFieldJustified( 8 + (dmiPos * 8), 8, 0xff ); // 8, 16, 24, 32 reg->Write(); // Mask off all attentions from the MEMBUF target in the chiplet FIRs. const char * reg_strs[] { "TP_CHIPLET_FIR_MASK", "NEST_CHIPLET_FIR_MASK", "MEM_CHIPLET_FIR_MASK", "MEM_CHIPLET_SPA_FIR_MASK" }; for ( auto & reg_str : reg_strs ) { reg = i_membChip->getRegister( reg_str ); reg->setAllBits(); // Blindly mask everything reg->Write(); } // For all attached MBAs: // During runtime, send a dynamic memory deallocation message. // During Memory Diagnostics, tell MDIA to stop pattern tests. for ( auto & mbaChip : getConnected(i_membChip, TYPE_MBA) ) { #ifdef __HOSTBOOT_RUNTIME MemDealloc::port( mbaChip ); #else if ( isInMdiaMode() ) mdiaSendEventMsg( mbaChip->getTrgt(), MDIA::STOP_TESTING ); #endif } #endif // Hostboot only #undef PRDF_FUNC } template<> void cleanupChnlFail( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) { PRDF_ASSERT( nullptr != i_chip ); PRDF_ASSERT( TYPE_MEMBUF == i_chip->getType() ); ExtensibleChip * dmiChip = getConnectedParent( i_chip, TYPE_DMI ); __cleanupChnlFail( dmiChip, i_chip, io_sc ); } template<> void cleanupChnlFail( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) { PRDF_ASSERT( nullptr != i_chip ); PRDF_ASSERT( TYPE_DMI == i_chip->getType() ); ExtensibleChip * membChip = getConnectedChild( i_chip, TYPE_MEMBUF, 0 ); PRDF_ASSERT( nullptr != membChip ); // shouldn't be possible __cleanupChnlFail( i_chip, membChip, io_sc ); } template<> void cleanupChnlFail( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) { PRDF_ASSERT( nullptr != i_chip ); PRDF_ASSERT( TYPE_MC == i_chip->getType() ); for ( auto & dmiChip : getConnected(i_chip, TYPE_DMI) ) { cleanupChnlFail( dmiChip, io_sc ); } } //------------------------------------------------------------------------------ } // end namespace MemUtils } // end namespace PRDF