diff options
Diffstat (limited to 'src/usr/diag/prdf/common/plat/mem')
21 files changed, 2036 insertions, 263 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C index 1227afeb8..654b39ba0 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -46,8 +46,8 @@ using namespace PlatServices; // Class MemAddr //------------------------------------------------------------------------------ -template<> -MemAddr MemAddr::fromReadAddr<TYPE_MCBIST>( uint64_t i_addr ) +template<TARGETING::TYPE T> +MemAddr MemAddr::fromReadAddr( uint64_t i_addr ) { uint64_t mrnk = (i_addr >> 59) & 0x7; // 2: 4 uint64_t srnk = (i_addr >> 56) & 0x7; // 5: 7 @@ -58,6 +58,12 @@ MemAddr MemAddr::fromReadAddr<TYPE_MCBIST>( uint64_t i_addr ) return MemAddr( MemRank(mrnk, srnk), bnk, row, col ); } +template +MemAddr MemAddr::fromReadAddr<TYPE_MCBIST>( uint64_t i_addr ); +template +MemAddr MemAddr::fromReadAddr<TYPE_OCMB_CHIP>( uint64_t i_addr ); + + template<> MemAddr MemAddr::fromReadAddr<TYPE_MEMBUF>( uint64_t i_addr ) { @@ -73,8 +79,8 @@ MemAddr MemAddr::fromReadAddr<TYPE_MEMBUF>( uint64_t i_addr ) return MemAddr( MemRank(mrnk, srnk), bnk, row, col ); } -template<> -MemAddr MemAddr::fromMaintAddr<TYPE_MCBIST>( uint64_t i_addr ) +template<TARGETING::TYPE T> +MemAddr MemAddr::fromMaintAddr( uint64_t i_addr ) { uint64_t rslct = (i_addr >> 59) & 0x3; // 3: 4 uint64_t srnk = (i_addr >> 56) & 0x7; // 5: 7 @@ -88,6 +94,12 @@ MemAddr MemAddr::fromMaintAddr<TYPE_MCBIST>( uint64_t i_addr ) return MemAddr( MemRank(mrnk, srnk), bnk, row, col ); } +template +MemAddr MemAddr::fromMaintAddr<TYPE_MCBIST>( uint64_t i_addr ); +template +MemAddr MemAddr::fromMaintAddr<TYPE_OCMB_CHIP>( uint64_t i_addr ); + + template<> MemAddr MemAddr::fromMaintAddr<TYPE_MBA>( uint64_t i_addr ) { @@ -169,6 +181,53 @@ uint32_t getMemReadAddr<TYPE_MCBIST>( ExtensibleChip * i_chip, uint32_t i_pos, //------------------------------------------------------------------------------ template<> +uint32_t getMemReadAddr<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + MemAddr::ReadReg i_reg, + MemAddr & o_addr ) +{ + #define PRDF_FUNC "[getMemReadAddr<TYPE_OCMB_CHIP>] " + + uint32_t o_rc = SUCCESS; + + // Check parameters + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + // Get the register string. + const char * reg_str = ""; + switch ( i_reg ) + { + case MemAddr::READ_NCE_ADDR: reg_str = "MBNCER"; break; + case MemAddr::READ_RCE_ADDR: reg_str = "MBRCER"; break; + case MemAddr::READ_MPE_ADDR: reg_str = "MBMPER"; break; + case MemAddr::READ_UE_ADDR : reg_str = "MBUER" ; break; + case MemAddr::READ_AUE_ADDR: reg_str = "MBAUER"; break; + default: PRDF_ASSERT( false ); + } + + // Read the address register + SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister( reg_str ); + o_rc = reg->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on %s: i_chip=0x%08x", + reg_str, i_chip->getHuid() ); + } + else + { + // Get the address object. + uint64_t addr = reg->GetBitFieldJustified( 0, 64 ); + o_addr = MemAddr::fromReadAddr<TYPE_OCMB_CHIP>( addr ); + } + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<> uint32_t getMemReadAddr<TYPE_MEMBUF>( ExtensibleChip * i_chip, uint32_t i_pos, MemAddr::ReadReg i_reg, MemAddr & o_addr ) { @@ -247,15 +306,14 @@ uint32_t getMemReadAddr<TYPE_MBA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ -template<> -uint32_t getMemMaintAddr<TYPE_MCBIST>( ExtensibleChip * i_chip, - MemAddr & o_addr ) +template<TARGETING::TYPE T> +uint32_t getMemMaintAddr( ExtensibleChip * i_chip, MemAddr & o_addr ) { - #define PRDF_FUNC "[getMemMaintAddr<TYPE_MCBIST>] " + #define PRDF_FUNC "[getMemMaintAddr<T>] " // Check parameters PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MCBIST == i_chip->getType() ); + PRDF_ASSERT( T == i_chip->getType() ); // Read the address register SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister( "MCBMCAT" ); @@ -269,7 +327,7 @@ uint32_t getMemMaintAddr<TYPE_MCBIST>( ExtensibleChip * i_chip, { // Get the address object. uint64_t addr = reg->GetBitFieldJustified( 0, 64 ); - o_addr = MemAddr::fromMaintAddr<TYPE_MCBIST>( addr ); + o_addr = MemAddr::fromMaintAddr<T>( addr ); } return o_rc; @@ -277,6 +335,13 @@ uint32_t getMemMaintAddr<TYPE_MCBIST>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t getMemMaintAddr<TYPE_MCBIST>( ExtensibleChip * i_chip, + MemAddr & o_addr ); +template +uint32_t getMemMaintAddr<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + MemAddr & o_addr ); + //------------------------------------------------------------------------------ template<> @@ -389,8 +454,9 @@ uint32_t getMemMaintEndAddr<TYPE_MBA>( ExtensibleChip * i_chip, #ifdef __HOSTBOOT_MODULE -uint32_t getMcbistMaintPort( ExtensibleChip * i_mcbChip, - std::vector<ExtensibleChip *> & o_mcaList ) +template<> +uint32_t getMcbistMaintPort<TYPE_MCBIST>( ExtensibleChip * i_mcbChip, + ExtensibleChipList & o_mcaList ) { #define PRDF_FUNC "[getMcbistMaintPort] " @@ -402,9 +468,9 @@ uint32_t getMcbistMaintPort( ExtensibleChip * i_mcbChip, o_mcaList.clear(); - SCAN_COMM_REGISTER_CLASS * mcbagra = i_mcbChip->getRegister( "MCBAGRA" ); - SCAN_COMM_REGISTER_CLASS * mcbmcat = i_mcbChip->getRegister( "MCBMCAT" ); - SCAN_COMM_REGISTER_CLASS * mcb_cntl = i_mcbChip->getRegister( "MCB_CNTL" ); + SCAN_COMM_REGISTER_CLASS * mcbagra = i_mcbChip->getRegister( "MCBAGRA" ); + SCAN_COMM_REGISTER_CLASS * mcbmcat = i_mcbChip->getRegister( "MCBMCAT" ); + SCAN_COMM_REGISTER_CLASS * mcb_cntl = i_mcbChip->getRegister( "MCB_CNTL" ); do { @@ -446,7 +512,7 @@ uint32_t getMcbistMaintPort( ExtensibleChip * i_mcbChip, } // Get MCAs from all targeted ports. - for ( uint8_t p = 0; p < 4; p++ ) + for ( uint8_t p = 0; p < MAX_MCA_PER_MCBIST; p++ ) { if ( 0 == (portMask & (0x8 >> p)) ) continue; diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H index 8dc192672..f5120b3b5 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -167,7 +167,7 @@ uint32_t getMemReadAddr( ExtensibleChip * i_chip, uint32_t i_pos, /** * @brief Reads the specified mainline memory read address from hardware. - * @param i_chip MCA or MBA. + * @param i_chip MCA, MBA, or OCMB. * @param i_reg The target address register. * @param o_addr The returned address from hardware. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. @@ -189,7 +189,7 @@ uint32_t getMemReadAddr( ExtensibleChip * i_chip, MemAddr::ReadReg i_reg, * mode or not. Therefore, users must call getMcbistMaintPort() to get the port * information. * - * @param i_chip An MBA or MCBIST chip. + * @param i_chip An MBA, MCBIST, or OCMB chip. * @param o_addr The returned address from hardware. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ @@ -228,11 +228,12 @@ uint32_t getMemMaintEndAddr( ExtensibleChip * i_chip, MemAddr & o_addr ); * * @note Only supported for MCBIST. * @param i_mcbChip An MCBIST chip. - * @param o_mcaList A list of all MCAs targeted by the command. + * @param o_portList A list of all MCAs targeted by the command. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ +template<TARGETING::TYPE T> uint32_t getMcbistMaintPort( ExtensibleChip * i_mcbChip, - std::vector<ExtensibleChip *> & o_mcaList ); + ExtensibleChipList & o_portList ); #endif diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemCaptureData.C b/src/usr/diag/prdf/common/plat/mem/prdfMemCaptureData.C index ebef7ae29..4d55c7c50 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemCaptureData.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemCaptureData.C @@ -39,6 +39,7 @@ #include <prdfCenMbaDataBundle.H> #include <prdfPlatServices.H> #include <prdfP9McaDataBundle.H> +#include <prdfOcmbDataBundle.H> #include <prdfMemRowRepair.H> @@ -65,8 +66,16 @@ void addExtMemMruData( const MemoryMru & i_memMru, errlHndl_t io_errl ) { TargetHandle_t trgt = i_memMru.getTrgt(); - // Get the DRAM width. - extMemMru.isX4Dram = isDramWidthX4( trgt ) ? 1 : 0; + if ( TYPE_OCMB_CHIP == getTargetType(trgt) ) + { + TargetHandle_t dimm = getConnectedDimm( trgt, i_memMru.getRank() ); + extMemMru.isX4Dram = isDramWidthX4( dimm ) ? 1 : 0; + } + else + { + // Get the DRAM width. + extMemMru.isX4Dram = isDramWidthX4( trgt ) ? 1 : 0; + } // Get the DIMM type. if ( TYPE_MBA == getTargetType(trgt) ) @@ -97,9 +106,9 @@ void addExtMemMruData( const MemoryMru & i_memMru, errlHndl_t io_errl ) { getDimmDqAttr<TYPE_DIMM>(partList[0], extMemMru.dqMapping); } - else if ( TYPE_MEM_PORT == getTargetType(trgt) ) + else if ( TYPE_OCMB_CHIP == getTargetType(trgt) ) { - getDimmDqAttr<TYPE_MEM_PORT>( trgt, extMemMru.dqMapping ); + getDimmDqAttr<TYPE_OCMB_CHIP>( trgt, extMemMru.dqMapping ); } else { @@ -172,7 +181,6 @@ void captureDramRepairsData( TARGETING::TargetHandle_t i_trgt, if( CEN_VPD_DIMM_SPARE_NO_SPARE != spareConfig ) data.header.isSpareDram = true; - // Iterate all ranks to get DRAM repair data for ( auto & rank : masterRanks ) { @@ -220,8 +228,11 @@ void captureDramRepairsData( TARGETING::TargetHandle_t i_trgt, if ( data.rankDataList.size() > 0 ) { data.header.rankCount = data.rankDataList.size(); - data.header.isEccSp = ( isDramWidthX4( i_trgt ) && - (TYPE_MBA == getTargetType(i_trgt)) ); + data.header.isEccSp = false; + if ( TYPE_MBA == getTargetType(i_trgt) ) + { + data.header.isEccSp = isDramWidthX4( i_trgt ); + } UtilMem dramStream; dramStream << data; @@ -459,6 +470,33 @@ void captureIueCounts<McaDataBundle*>( TARGETING::TargetHandle_t i_trgt, //------------------------------------------------------------------------------ template<> +void captureIueCounts<OcmbDataBundle*>( TARGETING::TargetHandle_t i_trgt, + OcmbDataBundle * i_db, + CaptureData & io_cd ) +{ + #ifdef __HOSTBOOT_MODULE + + uint8_t sz_capData = i_db->iv_iueTh.size()*2; + uint8_t capData[sz_capData] = {}; + uint8_t idx = 0; + + for ( auto & th_pair : i_db->iv_iueTh ) + { + capData[idx] = th_pair.first; + capData[idx+1] = th_pair.second.getCount(); + idx += 2; + } + + // Add data to capture data. + BitString bs ( sz_capData*8, (CPU_WORD *) &capData ); + io_cd.Add( i_trgt, Util::hashString("IUE_COUNTS"), bs ); + + #endif +} + +//------------------------------------------------------------------------------ + +template<> void addEccData<TYPE_MCA>( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) { @@ -497,6 +535,33 @@ void addEccData<TYPE_MCBIST>( ExtensibleChip * i_chip, } template<> +void addEccData<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + CaptureData & cd = io_sc.service_data->GetCaptureData(); + OcmbDataBundle * db = getOcmbDataBundle( i_chip ); + + TargetHandle_t ocmbTrgt = i_chip->getTrgt(); + + // Add DRAM repairs data from hardware. + captureDramRepairsData<TYPE_OCMB_CHIP>( ocmbTrgt, cd ); + + // Add DRAM repairs data from VPD. + captureDramRepairsVpd<TYPE_OCMB_CHIP>( ocmbTrgt, cd ); + + // Add IUE counts to capture data. + captureIueCounts<OcmbDataBundle*>( ocmbTrgt, db, cd ); + + // Add CE table to capture data. + db->iv_ceTable.addCapData( cd ); + + // Add UE table to capture data. + db->iv_ueTable.addCapData( cd ); +} + +template<> void addEccData<TYPE_MBA>( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) { @@ -558,6 +623,22 @@ void addEccData<TYPE_MBA>( TargetHandle_t i_trgt, errlHndl_t io_errl ) ErrDataService::AddCapData( cd, io_errl ); } +template<> +void addEccData<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, + errlHndl_t io_errl ) +{ + PRDF_ASSERT( TYPE_OCMB_CHIP == getTargetType(i_trgt) ); + + CaptureData cd; + + // Add DRAM repairs data from hardware. + captureDramRepairsData<TYPE_OCMB_CHIP>( i_trgt, cd ); + + // Add DRAM repairs data from VPD. + captureDramRepairsVpd<TYPE_OCMB_CHIP>( i_trgt, cd ); + + ErrDataService::AddCapData( cd, io_errl ); +} //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemCeTable.C b/src/usr/diag/prdf/common/plat/mem/prdfMemCeTable.C index 16645586b..799e32e67 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemCeTable.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemCeTable.C @@ -281,7 +281,7 @@ void MemCeTable<T>::addCapData( CaptureData & io_cd ) // Avoid linker errors with the template. template class MemCeTable<TYPE_MCA>; template class MemCeTable<TYPE_MBA>; -template class MemCeTable<TYPE_MEM_PORT>; +template class MemCeTable<TYPE_OCMB_CHIP>; //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H b/src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H index 7605a82fa..80586976e 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2018 */ +/* Contributors Listed Below - COPYRIGHT 2018,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -28,6 +28,8 @@ #include <prdfCenMbaDataBundle.H> #include <prdfP9McaDataBundle.H> +#include <prdfOcmbDataBundle.H> +#include <prdfTargetServices.H> namespace PRDF { @@ -62,6 +64,16 @@ uint32_t addCeTableEntry<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip, } template<> inline +uint32_t addCeTableEntry<TARGETING::TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemAddr & i_addr, + const MemSymbol & i_symbol, + bool i_isHard ) +{ + return getOcmbDataBundle(i_chip)->iv_ceTable.addEntry( i_addr, i_symbol, + i_isHard ); +} + +template<> inline uint32_t addCeTableEntry<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr, const MemSymbol & i_symbol, @@ -91,6 +103,14 @@ void addUeTableEntry<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip, } template<> inline +void addUeTableEntry<TARGETING::TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + UE_TABLE::Type i_type, + const MemAddr & i_addr ) +{ + getOcmbDataBundle(i_chip)->iv_ueTable.addEntry( i_type, i_addr ); +} + +template<> inline void addUeTableEntry<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, UE_TABLE::Type i_type, const MemAddr & i_addr ) @@ -118,6 +138,14 @@ void resetEccFfdc<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip, } template<> inline +void resetEccFfdc<TARGETING::TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + AddrRangeType i_type ) +{ + getOcmbDataBundle(i_chip)->iv_ceTable.deactivateRank( i_rank, i_type ); +} + +template<> inline void resetEccFfdc<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, AddrRangeType i_type ) @@ -134,7 +162,7 @@ void resetEccFfdc<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, /** * @brief Generic wrapper to push a TdEntry to the Targeted Diagnostics queue. - * @param i_chip MCA or MBA. + * @param i_chip MCA, MBA, or MEM_PORT. * @param i_entry The new TdEntry. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ @@ -155,6 +183,13 @@ void pushToQueue<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, getMbaDataBundle(i_chip)->getTdCtlr()->pushToQueue( i_entry ); } +template<> inline +void pushToQueue<TARGETING::TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + TdEntry * i_entry ) +{ + getOcmbDataBundle(i_chip)->getTdCtlr()->pushToQueue( i_entry ); +} + #endif // Hostboot IPL/Runtime //############################################################################## @@ -179,6 +214,13 @@ MemIplCeStats<TARGETING::TYPE_MCA> * getIplCeStats( ExtensibleChip * i_chip ) } template<> inline +MemIplCeStats<TARGETING::TYPE_OCMB_CHIP> * getIplCeStats( + ExtensibleChip * i_chip ) +{ + return getOcmbDataBundle(i_chip)->getIplCeStats(); +} + +template<> inline MemIplCeStats<TARGETING::TYPE_MBA> * getIplCeStats( ExtensibleChip * i_chip ) { return getMbaDataBundle(i_chip)->getIplCeStats(); @@ -211,6 +253,13 @@ uint32_t handleTdEvent<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip, } template<> inline +uint32_t handleTdEvent<TARGETING::TYPE_OCMB_CHIP>(ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc) +{ + return getOcmbDataBundle(i_chip)->getTdCtlr()->handleTdEvent( io_sc ); +} + +template<> inline uint32_t handleTdEvent<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) { @@ -242,6 +291,16 @@ void banTps<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, getMbaDataBundle(i_chip)->getTdCtlr()->banTps( i_chip, i_rank ); } +template<> inline +void banTps<TARGETING::TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank ) +{ + // Ban TPS on this rank. + getOcmbDataBundle(i_chip)->getTdCtlr()->banTps( i_chip, i_rank ); + // Permanently mask mainline NCEs and TCEs because of the TPS ban. + getOcmbDataBundle(i_chip)->iv_maskMainlineNceTce = true; +} + #endif // Hostboot Runtime only } // end namespace MemDbUtils diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C index 308e25dab..5db522818 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C @@ -450,6 +450,9 @@ std::vector<MemSymbol> MemDqBitmap::getSymbolList( uint8_t i_portSlct ) case TYPE_MEM_PORT: symbol = dq2Symbol<TYPE_MEM_PORT>( dq, i_portSlct ); break; + case TYPE_OCMB_CHIP: + symbol = dq2Symbol<TYPE_OCMB_CHIP>(dq, i_portSlct); + break; default: PRDF_ERR( "Invalid trgt type" ); PRDF_ASSERT( false ); @@ -700,7 +703,7 @@ uint32_t MemDqBitmap::setEccSpare( uint8_t i_pins ) // Utility Functions //############################################################################## -uint32_t setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, +uint32_t setDramInVpd( TargetHandle_t i_trgt, const MemRank & i_rank, MemSymbol i_symbol ) { #define PRDF_FUNC "[MemDqBitmap::__setDramInVpd] " @@ -709,14 +712,12 @@ uint32_t setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, do { - TARGETING::TargetHandle_t trgt = i_chip->getTrgt(); - MemDqBitmap dqBitmap; - o_rc = getBadDqBitmap( trgt, i_rank, dqBitmap ); + o_rc = getBadDqBitmap( i_trgt, i_rank, dqBitmap ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getBadDqBitmap(0x%08x, 0x%02x) failed.", - getHuid(trgt), i_rank.getKey() ); + getHuid(i_trgt), i_rank.getKey() ); break; } @@ -727,11 +728,11 @@ uint32_t setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, break; } - o_rc = setBadDqBitmap( trgt, i_rank, dqBitmap ); + o_rc = setBadDqBitmap( i_trgt, i_rank, dqBitmap ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "setBadDqBitmap(0x%08x, 0x%02x) failed.", - getHuid(trgt), i_rank.getKey() ); + getHuid(i_trgt), i_rank.getKey() ); break; } }while(0); @@ -743,7 +744,7 @@ uint32_t setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, //------------------------------------------------------------------------------ -uint32_t clearDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, +uint32_t clearDramInVpd( TargetHandle_t i_trgt, const MemRank & i_rank, MemSymbol i_symbol ) { #define PRDF_FUNC "[MemDqBitmap::__clearDramInVpd] " @@ -752,14 +753,12 @@ uint32_t clearDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, do { - TARGETING::TargetHandle_t trgt = i_chip->getTrgt(); - MemDqBitmap dqBitmap; - o_rc = getBadDqBitmap( trgt, i_rank, dqBitmap ); + o_rc = getBadDqBitmap( i_trgt, i_rank, dqBitmap ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getBadDqBitmap(0x%08x, 0x%02x) failed.", - getHuid(trgt), i_rank.getKey() ); + getHuid(i_trgt), i_rank.getKey() ); break; } @@ -770,11 +769,11 @@ uint32_t clearDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, break; } - o_rc = setBadDqBitmap( trgt, i_rank, dqBitmap ); + o_rc = setBadDqBitmap( i_trgt, i_rank, dqBitmap ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "setBadDqBitmap(0x%08x, 0x%02x) failed.", - getHuid(trgt), i_rank.getKey() ); + getHuid(i_trgt), i_rank.getKey() ); break; } }while(0); diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H index b407d9835..c3648dbc5 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H @@ -73,7 +73,22 @@ class MemDqBitmap /** @brief Constructor from components */ MemDqBitmap( TARGETING::TargetHandle_t i_trgt, const MemRank & i_rank, BitmapData i_d ) : iv_trgt(i_trgt), iv_rank(i_rank), - iv_x4Dram(PlatServices::isDramWidthX4(i_trgt)), iv_data(i_d){} + iv_x4Dram(true), iv_data(i_d) + { + if ( TARGETING::TYPE_MEM_PORT == PlatServices::getTargetType(iv_trgt) || + TARGETING::TYPE_OCMB_CHIP == + PlatServices::getTargetType(iv_trgt) ) + { + // TODO RTC 210072 - Support multiple ports + TARGETING::TargetHandle_t dimm = + PlatServices::getConnectedDimm( iv_trgt, iv_rank ); + iv_x4Dram = PlatServices::isDramWidthX4( dimm ); + } + else + { + iv_x4Dram = PlatServices::isDramWidthX4( iv_trgt ); + } + } public: // functions @@ -224,7 +239,7 @@ class MemDqBitmap private: // instance variables - TARGETING::TargetHandle_t iv_trgt; ///< Target MBA/MCA/MEM_PORT + TARGETING::TargetHandle_t iv_trgt; ///< Target MBA/MCA/MEM_PORT/OCMB_CHIP MemRank iv_rank; ///< Target rank bool iv_x4Dram; ///< TRUE if iv_trgt uses x4 DRAMs @@ -238,20 +253,21 @@ class MemDqBitmap /** * @brief Sets the inputted dram in DRAM repairs VPD. - * @param i_chip MBA or MCA chip. + * @param i_trgt MBA, MCA, MEM_PORT, or OCMB chip. * @param i_rank Target rank. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. */ -uint32_t setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, +uint32_t setDramInVpd( TARGETING::TargetHandle_t i_trgt, const MemRank & i_rank, MemSymbol i_symbol ); /** * @brief Clears the inputted dram in DRAM repairs VPD. - * @param i_chip MBA or MCA chip. + * @param i_trgt MBA, MCA, MEM_PORT, or OCMB chip. * @param i_rank Target rank. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. */ -uint32_t clearDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, +uint32_t clearDramInVpd( TARGETING::TargetHandle_t i_trgt, + const MemRank & i_rank, MemSymbol i_symbol ); } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C index 9869a8c08..f206a074e 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C @@ -127,6 +127,87 @@ uint32_t handleMemUe<TYPE_MCA>( ExtensibleChip * i_chip, const MemAddr & i_addr, i_chip->getHuid(), i_type ); break; } + + #ifdef __HOSTBOOT_RUNTIME + // Increment the UE counter and store the rank we're on, resetting + // the UE and CE counts if we have stopped on a new rank. + ExtensibleChip * mcb = getConnectedParent( i_chip, TYPE_MCBIST ); + McbistDataBundle * mcbdb = getMcbistDataBundle(mcb); + if ( mcbdb->iv_ceUeRank != i_addr.getRank() ) + { + mcbdb->iv_ceStopCounter.reset(); + mcbdb->iv_ueStopCounter.reset(); + } + mcbdb->iv_ueStopCounter.inc( io_sc ); + mcbdb->iv_ceUeRank = i_addr.getRank(); + #endif + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +template<> +uint32_t handleMemUe<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemAddr & i_addr, + UE_TABLE::Type i_type, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[MemEcc::handleMemUe<TYPE_OCMB_CHIP>] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + do + { + // First check to see if this is a side-effect UE. + SCAN_COMM_REGISTER_CLASS * fir = i_chip->getRegister("OCMB_LFIR"); + o_rc = fir->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on OCMB_LFIR: i_chip=0x%08x", + i_chip->getHuid() ); + break; + } + + // Check OCMB_LFIR[38] to determine if this is a side-effect. + if ( fir->IsBitSet(38) ) + { + // This is a side-effect. Callout the OCMB. + PRDF_TRAC( PRDF_FUNC "Memory UE is side-effect of DDRPHY error" ); + io_sc.service_data->SetCallout( i_chip->getTrgt() ); + io_sc.service_data->setServiceCall(); + } + else + { + // Handle the memory UE. + o_rc = __handleMemUe<TYPE_OCMB_CHIP>( i_chip, i_addr, i_type, + io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "__handleMemUe(0x%08x,%d) failed", + i_chip->getHuid(), i_type ); + break; + } + + #ifdef __HOSTBOOT_RUNTIME + // Increment the UE counter and store the rank we're on, resetting + // the UE and CE counts if we have stopped on a new rank. + OcmbDataBundle * ocmbdb = getOcmbDataBundle(i_chip); + if ( ocmbdb->iv_ceUeRank != i_addr.getRank() ) + { + ocmbdb->iv_ceStopCounter.reset(); + ocmbdb->iv_ueStopCounter.reset(); + } + ocmbdb->iv_ueStopCounter.inc( io_sc ); + ocmbdb->iv_ceUeRank = i_addr.getRank(); + #endif + } } while (0); @@ -328,6 +409,52 @@ uint32_t maskMemPort<TYPE_MCA>( ExtensibleChip * i_chip ) #undef PRDF_FUNC } +template<> +uint32_t maskMemPort<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) +{ + #define PRDF_FUNC "[MemEcc::maskMemPort<TYPE_OCMB_CHIP>] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + do + { + // Mask all FIRs on the OCMB in the chiplet FIRs. + SCAN_COMM_REGISTER_CLASS * chipletMask = + i_chip->getRegister("OCMB_CHIPLET_FIR_MASK"); + SCAN_COMM_REGISTER_CLASS * chipletSpaMask = + i_chip->getRegister("OCMB_CHIPLET_SPA_FIR_MASK"); + + chipletMask->setAllBits(); + chipletSpaMask->setAllBits(); + + o_rc = chipletMask->Write() | chipletSpaMask->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on 0x%08x", i_chip->getHuid() ); + break; + } + + #ifdef __HOSTBOOT_RUNTIME + + // Dynamically deallocate the port. + if ( SUCCESS != MemDealloc::port<TYPE_OCMB_CHIP>( i_chip ) ) + { + PRDF_ERR( PRDF_FUNC "MemDealloc::port<TYPE_OCMB_CHIP>(0x%08x) " + "failed", i_chip->getHuid() ); + } + + #endif + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + #endif // __HOSTBOOT_MODULE //------------------------------------------------------------------------------ @@ -390,6 +517,62 @@ uint32_t triggerPortFail<TYPE_MCA>( ExtensibleChip * i_chip ) #undef PRDF_FUNC } +template<> +uint32_t triggerPortFail<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) +{ + #define PRDF_FUNC "[MemEcc::triggerPortFail<TYPE_OCMB_CHIP>] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + OcmbDataBundle * db = getOcmbDataBundle( i_chip ); + + do + { + // trigger a port fail + // set FARB0[59] - MBA_FARB0Q_CFG_INJECT_PARITY_ERR_CONSTANT and + // FARB0[40] - MBA_FARB0Q_CFG_INJECT_PARITY_ERR_ADDR5 + SCAN_COMM_REGISTER_CLASS * farb0 = i_chip->getRegister("FARB0"); + + o_rc = farb0->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() FARB0 failed: i_chip=0x%08x", + i_chip->getHuid() ); + break; + } + + farb0->SetBit(59); + farb0->SetBit(40); + + o_rc = farb0->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() FARB0 failed: i_chip=0x%08x", + i_chip->getHuid() ); + break; + } + + // reset thresholds to prevent issuing multiple port failures on + // the same port + for ( auto & resetTh : db->iv_iueTh ) + { + resetTh.second.reset(); + } + + db->iv_iuePortFail = true; + + break; + }while(0); + + + return o_rc; + + #undef PRDF_FUNC +} + #endif // __HOSTBOOT_RUNTIME //------------------------------------------------------------------------------ @@ -420,6 +603,30 @@ bool queryIueTh<TYPE_MCA>( ExtensibleChip * i_chip, return iueAtTh; } +template<> +bool queryIueTh<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + bool iueAtTh = false; + + OcmbDataBundle * db = getOcmbDataBundle( i_chip ); + + // Loop through all our thresholds + for ( auto & th : db->iv_iueTh ) + { + // If threshold reached + if ( th.second.thReached(io_sc) ) + { + iueAtTh = true; + } + } + + return iueAtTh; +} + #endif //------------------------------------------------------------------------------ @@ -493,6 +700,11 @@ template uint32_t handleMpe<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr, UE_TABLE::Type i_type, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t handleMpe<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemAddr & i_addr, + UE_TABLE::Type i_type, + STEP_CODE_DATA_STRUCT & io_sc ); //------------------------------------------------------------------------------ @@ -581,6 +793,10 @@ template uint32_t analyzeFetchMpe<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeFetchMpe<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc ); //------------------------------------------------------------------------------ @@ -794,6 +1010,9 @@ uint32_t analyzeFetchNceTce<TYPE_MCA>( ExtensibleChip * i_chip, template uint32_t analyzeFetchNceTce<TYPE_MBA>( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeFetchNceTce<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); //------------------------------------------------------------------------------ @@ -871,6 +1090,9 @@ uint32_t analyzeFetchUe<TYPE_MCA>( ExtensibleChip * i_chip, template uint32_t analyzeFetchUe<TYPE_MBA>( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeFetchUe<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); //------------------------------------------------------------------------------ @@ -955,16 +1177,97 @@ uint32_t handleMemIue<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } -//------------------------------------------------------------------------------ - template<> -uint32_t analyzeMainlineIue<TYPE_MCA>( ExtensibleChip * i_chip, +uint32_t handleMemIue<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc ) { + #define PRDF_FUNC "[MemEcc::handleMemIue] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + // Add the DIMM to the callout list. + MemoryMru mm { i_chip->getTrgt(), i_rank, MemoryMruData::CALLOUT_RANK }; + io_sc.service_data->SetCallout( mm ); + + #ifdef __HOSTBOOT_MODULE + + do + { + // Nothing else to do if handling a system checkstop. + if ( CHECK_STOP == io_sc.service_data->getPrimaryAttnType() ) break; + + // Get the data bundle from chip. + OcmbDataBundle * db = getOcmbDataBundle( i_chip ); + + // If we have already caused a port fail, mask the IUE bits. + if ( true == db->iv_iuePortFail ) + { + SCAN_COMM_REGISTER_CLASS * mask_or = + i_chip->getRegister("RDFFIR_MASK_OR"); + + mask_or->SetBit(17); + mask_or->SetBit(37); + + o_rc = mask_or->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on 0x%08x", + i_chip->getHuid() ); + break; + } + } + + // Get the DIMM select. + uint8_t ds = i_rank.getDimmSlct(); + + // Initialize threshold if it doesn't exist yet. + if ( 0 == db->iv_iueTh.count(ds) ) + { + db->iv_iueTh[ds] = TimeBasedThreshold( getIueTh() ); + } + + // Increment the count and check if at threshold. + if ( db->iv_iueTh[ds].inc(io_sc) ) + { + // Make the error log predictive. + io_sc.service_data->setServiceCall(); + + // The port fail will be triggered in the PostAnalysis plugin after + // the error log has been committed. + + // Mask off the entire port to avoid collateral. + o_rc = MemEcc::maskMemPort<TYPE_OCMB_CHIP>( i_chip ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "MemEcc::maskMemPort(0x%08x) failed", + i_chip->getHuid() ); + break; + } + } + + } while (0); + + #endif // __HOSTBOOT_MODULE + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<TARGETING::TYPE T> +uint32_t analyzeMainlineIue( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ #define PRDF_FUNC "[MemEcc::analyzeMainlineIue] " PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MCA == i_chip->getType() ); + PRDF_ASSERT( T == i_chip->getType() ); uint32_t o_rc = SUCCESS; @@ -974,7 +1277,7 @@ uint32_t analyzeMainlineIue<TYPE_MCA>( ExtensibleChip * i_chip, // not likely that we will have two independent failure modes at the // same time. So we just assume the address is correct. MemAddr addr; - o_rc = getMemReadAddr<TYPE_MCA>( i_chip, MemAddr::READ_RCE_ADDR, addr ); + o_rc = getMemReadAddr<T>( i_chip, MemAddr::READ_RCE_ADDR, addr ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMemReadAddr(0x%08x, READ_RCE_ADDR) failed", @@ -983,7 +1286,7 @@ uint32_t analyzeMainlineIue<TYPE_MCA>( ExtensibleChip * i_chip, } MemRank rank = addr.getRank(); - o_rc = handleMemIue<TYPE_MCA>( i_chip, rank, io_sc ); + o_rc = handleMemIue<T>( i_chip, rank, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "handleMemIue(0x%08x,m%ds%d) failed", @@ -998,16 +1301,23 @@ uint32_t analyzeMainlineIue<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t analyzeMainlineIue<TYPE_MCA>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeMainlineIue<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); + //------------------------------------------------------------------------------ -template<> -uint32_t analyzeMaintIue<TYPE_MCA>( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & io_sc ) +template<TARGETING::TYPE T> +uint32_t analyzeMaintIue( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) { #define PRDF_FUNC "[MemEcc::analyzeMaintIue] " PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MCA == i_chip->getType() ); + PRDF_ASSERT( T == i_chip->getType() ); uint32_t o_rc = SUCCESS; @@ -1015,7 +1325,7 @@ uint32_t analyzeMaintIue<TYPE_MCA>( ExtensibleChip * i_chip, { // Use the current address in the MCBMCAT. MemAddr addr; - o_rc = getMemMaintAddr<TYPE_MCA>( i_chip, addr ); + o_rc = getMemMaintAddr<T>( i_chip, addr ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed", @@ -1024,7 +1334,7 @@ uint32_t analyzeMaintIue<TYPE_MCA>( ExtensibleChip * i_chip, } MemRank rank = addr.getRank(); - o_rc = handleMemIue<TYPE_MCA>( i_chip, rank, io_sc ); + o_rc = handleMemIue<T>( i_chip, rank, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "handleMemIue(0x%08x,m%ds%d) failed", @@ -1039,6 +1349,13 @@ uint32_t analyzeMaintIue<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t analyzeMaintIue<TYPE_MCA>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeMaintIue<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); + //------------------------------------------------------------------------------ template<> @@ -1152,6 +1469,117 @@ uint32_t analyzeImpe<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template<> +uint32_t analyzeImpe<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + + #define PRDF_FUNC "[MemEcc::analyzeImpe] " + + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + do + { + // get the mark shadow register + SCAN_COMM_REGISTER_CLASS * msr = i_chip->getRegister("EXP_MSR"); + + o_rc = msr->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on EXP_MSR: i_chip=0x%08x", + i_chip->getHuid() ); + break; + } + + TargetHandle_t trgt = i_chip->getTrgt(); + + // get galois field code - bits 8:15 of MSR + uint8_t galois = msr->GetBitFieldJustified( 8, 8 ); + + // get rank - bits 16:18 of MSR + uint8_t mrnk = msr->GetBitFieldJustified( 16, 3 ); + MemRank rank( mrnk ); + + // get symbol and DRAM + MemSymbol symbol = MemSymbol::fromGalois( trgt, rank, galois ); + if ( !symbol.isValid() ) + { + PRDF_ERR( PRDF_FUNC "Galois 0x%02x from EXP_MSR is invalid: 0x%08x," + "0x%02x", galois, i_chip->getHuid(), rank.getKey() ); + o_rc = FAIL; + break; + } + + // Add the DIMM to the callout list + MemoryMru memmru( trgt, rank, MemoryMruData::CALLOUT_RANK ); + io_sc.service_data->SetCallout( memmru ); + + #ifdef __HOSTBOOT_MODULE + // get data bundle from chip + OcmbDataBundle * db = getOcmbDataBundle( i_chip ); + uint8_t dram = symbol.getDram(); + + // Increment the count and check threshold. + if ( db->getImpeThresholdCounter()->inc(rank, dram, io_sc) ) + { + // Make the error log predictive if DRAM Repairs are disabled or if + // the number of DRAMs on this rank with IMPEs has reached threshold + if ( areDramRepairsDisabled() || + db->getImpeThresholdCounter()->queryDrams(rank, dram, io_sc) ) + { + io_sc.service_data->setServiceCall(); + } + else // Otherwise, place a chip mark on the failing DRAM. + { + MemMark chipMark( trgt, rank, galois ); + o_rc = MarkStore::writeChipMark<TYPE_OCMB_CHIP>( i_chip, rank, + chipMark ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "writeChipMark(0x%08x,0x%02x) failed", + i_chip->getHuid(), rank.getKey() ); + break; + } + + o_rc = MarkStore::chipMarkCleanup<TYPE_OCMB_CHIP>( i_chip, rank, + io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "chipMarkCleanup(0x%08x,0x%02x) failed", + i_chip->getHuid(), rank.getKey() ); + break; + } + } + } + + // If a predictive callout is made, mask both mainline and maintenance + // attentions. + if ( io_sc.service_data->queryServiceCall() ) + { + SCAN_COMM_REGISTER_CLASS * mask + = i_chip->getRegister( "RDFFIR_MASK_OR" ); + mask->SetBit(19); // mainline + mask->SetBit(39); // maintenance + o_rc = mask->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on RDFFIR_MASK_OR: " + "0x%08x", i_chip->getHuid() ); + break; + } + } + #endif // __HOSTBOOT_MODULE + + } while (0); + + + return o_rc; + + #undef PRDF_FUNC +} + //------------------------------------------------------------------------------ template<> diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H index 735ae436f..0fd71dd8b 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -51,7 +51,7 @@ namespace MemEcc * @brief Adds the memory CE to the callout list and CE table. Will also issue * dynamic memory deallocation when appropriate. Returns true if TPS is * required. - * @param i_chip MCA or MBA. + * @param i_chip MCA, MBA, or OCMB. * @param i_addr Failed address. * @param i_symbol Failed symbol. * @param o_doTps True if TPS is required. False otherwise. @@ -74,7 +74,7 @@ uint32_t handleMemCe( ExtensibleChip * i_chip, const MemAddr & i_addr, * of the DIMMs, the UE table will not be updated and no dynamic memory * deallocation. * - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param i_addr Failed address. * @param i_type The type of UE. * @param io_sc The step code data struct. @@ -96,7 +96,7 @@ uint32_t handleMemUe( ExtensibleChip * i_chip, const MemAddr & i_addr, * the port failure is issued in the PostAnalysis plugin after the error log has * been committed. * - * @param i_chip MCA chip. + * @param i_chip MCA or OCMB chip. * @param i_rank Rank containing the IUE. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. @@ -107,7 +107,7 @@ uint32_t handleMemIue( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Handles a MPE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param i_addr Failed address. * @param i_type The type of UE. * @param io_sc The step code data struct. @@ -119,7 +119,7 @@ uint32_t handleMpe( ExtensibleChip * i_chip, const MemAddr & i_addr, /** * @brief Handles a MPE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param i_rank Target rank. * @param i_type The type of UE. * @param io_sc The step code data struct. @@ -135,7 +135,7 @@ uint32_t handleMpe( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Analyzes a fetch MPE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param i_rank Target rank. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. @@ -146,7 +146,7 @@ uint32_t analyzeFetchMpe( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Analyzes a fetch NCE/TCE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. */ @@ -156,7 +156,7 @@ uint32_t analyzeFetchNceTce( ExtensibleChip * i_chip, /** * @brief Analyzes a fetch UE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. */ @@ -166,7 +166,7 @@ uint32_t analyzeFetchUe( ExtensibleChip * i_chip, /** * @brief Analyzes a fetch mainline IUE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. */ @@ -177,7 +177,7 @@ uint32_t analyzeMainlineIue( ExtensibleChip * i_chip, /** * @brief Analyzes a fetch maint IUE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param io_sc The step code data struct. * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise. */ @@ -187,7 +187,7 @@ uint32_t analyzeMaintIue( ExtensibleChip * i_chip, /** * @brief Analyzes a maint or mainline IMPE attention. - * @param i_chip MCA or MBA. + * @param i_chip MCA, OCMB, or MBA. * @param io_sc The step code data struct. * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. */ @@ -208,7 +208,7 @@ uint32_t analyzeFetchRcePue( ExtensibleChip * i_chip, /** * @brief Will trigger a port fail. - * @param i_chip MCA chip + * @param i_chip MCA/OCMB chip * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise */ template<TARGETING::TYPE T> @@ -221,7 +221,7 @@ uint32_t triggerPortFail( ExtensibleChip * i_chip ); /** * @brief Will query the data bundle and return if the IUE threshold has been * reached. - * @param i_chip MCA chip + * @param i_chip MCA/OCMB chip * @param io_sc The step code data struct. * @return True if IUE threshold is reached, false if not. */ @@ -231,7 +231,7 @@ bool queryIueTh( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ); /** * @brief Will mask off an entire memory port. At runtime will issue dynamic * memory deallocation of the port. - * @param i_chip MCA chip + * @param i_chip MCA/OCMB chip * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise */ template<TARGETING::TYPE T> diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H b/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H index 08b79922e..7bcf0e573 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H @@ -88,7 +88,7 @@ PRDR_ERROR_SIGNATURE(VttLost, 0xffff0084, "", "NVDIMM VTT Lost"); PRDR_ERROR_SIGNATURE(NotSelfRefr, 0xffff0085, "", "NVDIMM Dram Not Self Refresh"); PRDR_ERROR_SIGNATURE(CtrlHwErr, 0xffff0086, "", "NVDIMM Controller Hardware Error"); PRDR_ERROR_SIGNATURE(NvmCtrlErr, 0xffff0087, "", "NVDIMM NVM Controller Error"); -PRDR_ERROR_SIGNATURE(NvmLifeErr, 0xffff0088, "", "NVDIMM NVM Lifetime Error"); +PRDR_ERROR_SIGNATURE(NvmLifeErr, 0xffff0088, "", "NVDIMM Final NVM Lifetime Error"); PRDR_ERROR_SIGNATURE(InsuffEnergy, 0xffff0089, "", "NVDIMM Not enough energy for CSAVE"); PRDR_ERROR_SIGNATURE(InvFwErr, 0xffff008A, "", "NVDIMM Invalid Firmware Error"); @@ -98,8 +98,22 @@ PRDR_ERROR_SIGNATURE(EsPolNotSet, 0xffff008D, "", "NVDIMM Energy Source Policy PRDR_ERROR_SIGNATURE(EsHwFail, 0xffff008E, "", "NVDIMM Energy Source Hardware Fail"); PRDR_ERROR_SIGNATURE(EsHlthAssess, 0xffff008F, "", "NVDIMM Energy Source Health Assessment Error"); -PRDR_ERROR_SIGNATURE(EsLifeErr, 0xffff0090, "", "NVDIMM Energy Source Lifetime Error"); -PRDR_ERROR_SIGNATURE(EsTmpErr, 0xffff0091, "", "NVDIMM Energy Source Temp Error"); +PRDR_ERROR_SIGNATURE(EsLifeErr, 0xffff0090, "", "NVDIMM Final Energy Source Lifetime Error"); +PRDR_ERROR_SIGNATURE(EsTmpErrHigh, 0xffff0091, "", "NVDIMM Energy Source Temperature Error - High Temp Threshold"); +PRDR_ERROR_SIGNATURE(EsTmpErrLow, 0xffff0092, "", "NVDIMM Energy Source Temperature Error - Low Temp Threshold"); + +PRDR_ERROR_SIGNATURE(NvmLifeWarn1, 0xffff0093, "", "NVDIMM First NVM Lifetime Warning"); +PRDR_ERROR_SIGNATURE(NvmLifeWarn2, 0xffff0094, "", "NVDIMM Second NVM Lifetime Warning"); +PRDR_ERROR_SIGNATURE(EsLifeWarn1, 0xffff0095, "", "NVDIMM First Energy Source Lifetime Warning"); +PRDR_ERROR_SIGNATURE(EsLifeWarn2, 0xffff0096, "", "NVDIMM Second Energy Source Lifetime Warning"); +PRDR_ERROR_SIGNATURE(EsTmpWarnHigh, 0xffff0097, "", "NVDIMM Energy Source Temperature Warning - High Temp Threshold"); +PRDR_ERROR_SIGNATURE(EsTmpWarnLow, 0xffff0098, "", "NVDIMM Energy Source Temperature Warning - Low Temp Threshold"); +PRDR_ERROR_SIGNATURE(BelowWarnTh, 0xffff0099, "", "NVDIMM Below Warning Threshold"); +PRDR_ERROR_SIGNATURE(IntNvdimmErr, 0xffff009A, "", "NVDIMM Intermittent error"); +PRDR_ERROR_SIGNATURE(NotifStatErr, 0xffff009B, "", "NVDIMM Set Event Notification Status Error"); +PRDR_ERROR_SIGNATURE(FirEvntGone, 0xffff009C, "", "NVDIMM Event Triggering the FIR no longer present"); +PRDR_ERROR_SIGNATURE(EsTmpWarnFa, 0xffff009D, "", "NVDIMM Energy Source Temperature Warning - False Alarm"); +PRDR_ERROR_SIGNATURE(EsTmpErrFa, 0xffff009E, "", "NVDIMM Energy Source Temperature Error - False Alarm"); #endif // __prdfMemExtraSig_H diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C index 83bff1876..e43d844c4 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C @@ -46,7 +46,7 @@ namespace MarkStore { //############################################################################## -// Utilities to read/write markstore (MCA) +// Utilities to read/write markstore //############################################################################## // - We have the ability to set chip marks via the FWMSx registers, but there @@ -62,15 +62,19 @@ namespace MarkStore // mark per master rank. This matches the P8 behavior. This could be improved // upon later if we have the time, but doubtful. // - Summary: -// - Chip marks will use HWMS0-7 registers (0x07010AD0-0x07010AD7). -// - Symbol marks will use FWMS0-7 registers (0x07010AD8-0x07010ADF). +// - Chip marks will use HWMS0-7 registers: +// Nimbus: (0x07010AD0-0x07010AD7) +// Axone: (0x08011C10-0x08011C17) +// - Symbol marks will use FWMS0-7 registers: +// Nimbus: (0x07010AD8-0x07010ADF) +// Axone: (0x08011C18-0x08011C1F) // - Each register maps to master ranks 0-7. -template<> -uint32_t readChipMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, MemMark & o_mark ) +template<TARGETING::TYPE T> +uint32_t readChipMark( ExtensibleChip * i_chip, const MemRank & i_rank, + MemMark & o_mark ) { - #define PRDF_FUNC "[readChipMark<TYPE_MCA>] " + #define PRDF_FUNC "[readChipMark<T>] " uint32_t o_rc = SUCCESS; o_mark = MemMark(); // ensure invalid @@ -110,14 +114,21 @@ uint32_t readChipMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t readChipMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, MemMark & o_mark ); +template +uint32_t readChipMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MemMark & o_mark ); + //------------------------------------------------------------------------------ -template<> -uint32_t writeChipMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, - const MemMark & i_mark ) +template<TARGETING::TYPE T> +uint32_t writeChipMark( ExtensibleChip * i_chip, const MemRank & i_rank, + const MemMark & i_mark ) { - #define PRDF_FUNC "[writeChipMark<TYPE_MCA>] " + #define PRDF_FUNC "[writeChipMark<T>] " PRDF_ASSERT( i_mark.isValid() ); @@ -153,13 +164,21 @@ uint32_t writeChipMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t writeChipMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, + const MemMark & i_mark ); +template +uint32_t writeChipMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + const MemMark & i_mark ); + //------------------------------------------------------------------------------ -template<> -uint32_t clearChipMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank ) +template<TARGETING::TYPE T> +uint32_t clearChipMark( ExtensibleChip * i_chip, const MemRank & i_rank ) { - #define PRDF_FUNC "[clearChipMark<TYPE_MCA>] " + #define PRDF_FUNC "[clearChipMark<T>] " uint32_t o_rc = SUCCESS; @@ -185,13 +204,20 @@ uint32_t clearChipMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t clearChipMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank ); +template +uint32_t clearChipMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank ); + //------------------------------------------------------------------------------ -template<> -uint32_t readSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, MemMark & o_mark ) +template<TARGETING::TYPE T> +uint32_t readSymbolMark( ExtensibleChip * i_chip, + const MemRank & i_rank, MemMark & o_mark ) { - #define PRDF_FUNC "[readSymbolMark<TYPE_MCA>] " + #define PRDF_FUNC "[readSymbolMark<T>] " uint32_t o_rc = SUCCESS; o_mark = MemMark(); // ensure invalid @@ -247,14 +273,21 @@ uint32_t readSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t readSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, MemMark & o_mark ); +template +uint32_t readSymbolMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MemMark & o_mark ); + //------------------------------------------------------------------------------ -template<> -uint32_t writeSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, - const MemMark & i_mark ) +template<TARGETING::TYPE T> +uint32_t writeSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank, + const MemMark & i_mark ) { - #define PRDF_FUNC "[writeSymbolMark<TYPE_MCA>] " + #define PRDF_FUNC "[writeSymbolMark<T>] " PRDF_ASSERT( i_mark.isValid() ); @@ -294,36 +327,47 @@ uint32_t writeSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, msName, i_chip->getHuid() ); } - // Nimbus symbol mark performance workaround - // When a symbol mark is placed at runtime - #ifdef __HOSTBOOT_RUNTIME + // Nimbus only symbol mark performance workaround + if ( T == TYPE_MCA ) + { + // When a symbol mark is placed at runtime + #ifdef __HOSTBOOT_RUNTIME - // Trigger WAT logic to 'disable bypass' - // Get the ECC Debug/WAT Control register - SCAN_COMM_REGISTER_CLASS * dbgr = i_chip->getRegister( "DBGR" ); + // Trigger WAT logic to 'disable bypass' + // Get the ECC Debug/WAT Control register + SCAN_COMM_REGISTER_CLASS * dbgr = i_chip->getRegister( "DBGR" ); - // Set DBGR[8] = 0b1 - dbgr->SetBit( 8 ); - o_rc = dbgr->Write(); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "Write() failed on DBGR: mca=0x%08x", - i_chip->getHuid() ); + // Set DBGR[8] = 0b1 + dbgr->SetBit( 8 ); + o_rc = dbgr->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on DBGR: mca=0x%08x", + i_chip->getHuid() ); + } + #endif } - #endif return o_rc; #undef PRDF_FUNC } +template +uint32_t writeSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, + const MemMark & i_mark ); +template +uint32_t writeSymbolMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + const MemMark & i_mark ); + //------------------------------------------------------------------------------ -template<> -uint32_t clearSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank ) +template<TARGETING::TYPE T> +uint32_t clearSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank ) { - #define PRDF_FUNC "[clearSymbolMark<TYPE_MCA>] " + #define PRDF_FUNC "[clearSymbolMark<T>] " uint32_t o_rc = SUCCESS; @@ -349,6 +393,13 @@ uint32_t clearSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t clearSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank ); +template +uint32_t clearSymbolMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank ); + //############################################################################## // Utilities to read/write markstore (MBA) //############################################################################## @@ -958,7 +1009,7 @@ void __addCallout( ExtensibleChip * i_chip, const MemRank & i_rank, //------------------------------------------------------------------------------ template<TARGETING::TYPE T> -uint32_t __addRowRepairCallout( ExtensibleChip * i_chip, +uint32_t __addRowRepairCallout( TargetHandle_t i_trgt, const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc ) { @@ -967,7 +1018,7 @@ uint32_t __addRowRepairCallout( ExtensibleChip * i_chip, uint32_t o_rc = SUCCESS; // Get the dimms on this rank on either port. - TargetHandleList dimmList = getConnectedDimms( i_chip->getTrgt(), i_rank ); + TargetHandleList dimmList = getConnectedDimms( i_trgt, i_rank ); // Check for row repairs on each dimm. for ( auto const & dimm : dimmList ) @@ -1073,8 +1124,8 @@ uint32_t __applyRasPolicies<TYPE_MBA>( ExtensibleChip * i_chip, __addCallout( i_chip, i_rank, ecc, io_sc ); // Add the row repairs to the callout list if they exist - o_rc = __addRowRepairCallout<TARGETING::TYPE_MBA>( i_chip, i_rank, - io_sc ); + o_rc = __addRowRepairCallout<TARGETING::TYPE_MBA>( + i_chip->getTrgt(), i_rank, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "__addRowRepairCallout(0x%08x,0x%02x) " @@ -1136,6 +1187,125 @@ uint32_t __applyRasPolicies<TYPE_MBA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template<> +uint32_t __applyRasPolicies<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc, + const MemMark & i_chipMark, + const MemMark & i_symMark, + TdEntry * & o_dsdEvent, + bool & o_allRepairsUsed ) +{ + #define PRDF_FUNC "[__applyRasPolicies<TYPE_OCMB_CHIP>] " + + uint32_t o_rc = SUCCESS; + + do + { + const uint8_t ps = i_chipMark.getSymbol().getPortSlct(); + const uint8_t dram = i_chipMark.getSymbol().getDram(); + + TargetHandle_t memPort = getConnectedChild( i_chip->getTrgt(), + TYPE_MEM_PORT, ps ); + + TargetHandle_t dimmTrgt = getConnectedDimm( memPort, i_rank, ps ); + + const bool isX4 = isDramWidthX4( dimmTrgt ); + + // Determine if DRAM sparing is enabled. + bool isEnabled = false; + o_rc = isDramSparingEnabled<TYPE_MEM_PORT>( memPort, i_rank, ps, + isEnabled ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "isDramSparingEnabled() failed." ); + break; + } + + if ( isEnabled ) + { + // Sparing is enabled. Get the current spares in hardware. + MemSymbol sp0, sp1, ecc; + o_rc = mssGetSteerMux<TARGETING::TYPE_OCMB_CHIP>( i_chip->getTrgt(), + i_rank, sp0, sp1, + ecc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "mssGetSteerMux(0x%08x,0x%02x) failed", + i_chip->getHuid(), i_rank.getKey() ); + break; + } + + // Add the spares to the callout list if they exist. + __addCallout( i_chip, i_rank, sp0, io_sc ); + __addCallout( i_chip, i_rank, sp1, io_sc ); + __addCallout( i_chip, i_rank, ecc, io_sc ); + + // Add the row repairs to the callout list if they exist + o_rc = __addRowRepairCallout<TARGETING::TYPE_OCMB_CHIP>( memPort, + i_rank, + io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "__addRowRepairCallout(0x%08x,0x%02x) " + "failed.", i_chip->getHuid(), i_rank.getKey() ); + break; + } + + // If the chip mark is on a spare then the spare is bad and hardware + // can not steer it to another DRAM even if one is available (e.g. + // the ECC spare). In this this case, make error log predictive. + if ( ( (0 == ps) && sp0.isValid() && (dram == sp0.getDram()) ) || + ( (1 == ps) && sp1.isValid() && (dram == sp1.getDram()) ) || + ( isX4 && ecc.isValid() && (dram == ecc.getDram()) ) ) + { + o_allRepairsUsed = true; + io_sc.service_data->setSignature( i_chip->getHuid(), + PRDFSIG_VcmBadSpare ); + break; // Nothing more to do. + } + + // Certain DIMMs may have had spares intentially made unavailable by + // the manufacturer. Check the VPD for available spares. + bool spAvail, eccAvail; + o_rc = isSpareAvailable<TYPE_MEM_PORT>( memPort, i_rank, + ps, spAvail, eccAvail ); + if ( spAvail ) + { + // A spare DRAM is available. + o_dsdEvent = new DsdEvent<TYPE_OCMB_CHIP>{ i_chip, i_rank, + i_chipMark }; + } + else if ( eccAvail ) + { + // The ECC spare is available. + o_dsdEvent = new DsdEvent<TYPE_OCMB_CHIP>{ i_chip, i_rank, + i_chipMark, true }; + } + else + { + // Chip mark is in place and sparing is not possible. + o_allRepairsUsed = true; + io_sc.service_data->setSignature( i_chip->getHuid(), + PRDFSIG_AllDramRepairs ); + } + } + // There is no DRAM sparing so simply check if both the chip and symbol + // mark have been used. + else if ( i_chipMark.isValid() && i_symMark.isValid() ) + { + o_allRepairsUsed = true; + io_sc.service_data->setSignature( i_chip->getHuid(), + PRDFSIG_AllDramRepairs ); + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + //------------------------------------------------------------------------------ template<TARGETING::TYPE T> @@ -1220,6 +1390,9 @@ uint32_t applyRasPolicies( ExtensibleChip * i_chip, const MemRank & i_rank, { io_sc.service_data->setServiceCall(); + // We want to try to avoid garding NVDIMMs, so clear gard for them now. + io_sc.service_data->clearNvdimmMruListGard(); + #ifdef __HOSTBOOT_RUNTIME // No more repairs left so no point doing any more TPS procedures. MemDbUtils::banTps<T>( i_chip, i_rank ); @@ -1241,6 +1414,11 @@ uint32_t applyRasPolicies<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc, TdEntry * & o_dsdEvent ); +template +uint32_t applyRasPolicies<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc, + TdEntry * & o_dsdEvent ); //------------------------------------------------------------------------------ @@ -1290,7 +1468,8 @@ uint32_t chipMarkCleanup( ExtensibleChip * i_chip, const MemRank & i_rank, // Set the chip mark in the DRAM Repairs VPD. if ( !areDramRepairsDisabled() ) { - o_rc = setDramInVpd( i_chip, i_rank, chipMark.getSymbol() ); + o_rc = setDramInVpd( i_chip->getTrgt(), i_rank, + chipMark.getSymbol() ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "setDramInVpd(0x%08x,0x%02x) failed", @@ -1314,6 +1493,10 @@ template uint32_t chipMarkCleanup<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t chipMarkCleanup<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc ); #endif // not supported on FSP diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.H b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.H index 2cd28b8dd..86ffa1dc9 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2018 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -57,7 +57,7 @@ class MemMark /** * @brief Constructor from components. - * @param i_trgt MBA or MCA target. + * @param i_trgt MBA, MCA, or OCMB target. * @param i_rank The rank this mark is on. * @param i_galois The Galois field. */ @@ -68,7 +68,7 @@ class MemMark /** * @brief Constructor from components. - * @param i_trgt MBA or MCA target. + * @param i_trgt MBA, MCA, or OCMB target. * @param i_rank The rank this mark is on. * @param i_symbol The symbol representing this mark. */ @@ -112,7 +112,7 @@ namespace MarkStore /** * @brief Reads markstore and returns the chip mark for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @param o_mark The returned chip mark. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. @@ -123,7 +123,7 @@ uint32_t readChipMark( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Writes a chip mark into markstore for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @param i_mark Target chip mark. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. @@ -134,7 +134,7 @@ uint32_t writeChipMark( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Clear chip mark in markstore for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. */ @@ -143,7 +143,7 @@ uint32_t clearChipMark( ExtensibleChip * i_chip, const MemRank & i_rank ); /** * @brief Reads markstore and returns the symbol mark for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA. or OCMB chip. * @param i_rank Target rank. * @param o_mark The returned symbol mark. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. @@ -154,7 +154,7 @@ uint32_t readSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Writes a symbol mark into markstore for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @param i_mark Target symbol mark. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. @@ -165,7 +165,7 @@ uint32_t writeSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Clear symbol mark in markstore for the given rank. - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA. or OCMB chip. * @param i_rank Target rank. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. */ @@ -187,7 +187,7 @@ uint32_t clearSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank ); * repairs have been used. * - Returns a new DsdEvent if DRAM sparing is available. * - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @param io_sc The step code data struct. * @param o_dsdEvent A new DsdEvent if DRAM sparing is available. Otherwise, @@ -211,7 +211,7 @@ uint32_t applyRasPolicies( ExtensibleChip * i_chip, const MemRank & i_rank, * - Sets the DRAM in the DRAM Repair VPD if DRAM repairs. * - Adds a DSD procedure to the TD queue if a DRAM spare is available * - * @param i_chip MBA or MCA chip. + * @param i_chip MBA, MCA, or OCMB chip. * @param i_rank Target rank. * @param io_sc The step code data struct. * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemRowRepair.C b/src/usr/diag/prdf/common/plat/mem/prdfMemRowRepair.C index 8ebe6cea8..3ff6cd099 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemRowRepair.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemRowRepair.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2018 */ +/* Contributors Listed Below - COPYRIGHT 2018,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -126,6 +126,22 @@ uint32_t getRowRepairData<TYPE_MCA>( TargetHandle_t i_dimm, o_rowRepair ); } +template<> +uint32_t getRowRepairData<TYPE_MEM_PORT>( TargetHandle_t i_dimm, + const MemRank & i_rank, MemRowRepair & o_rowRepair ) +{ + return __getRowRepairData<TYPE_MEM_PORT, fapi2::TARGET_TYPE_MEM_PORT>( + i_dimm, i_rank, o_rowRepair ); +} + +template<> +uint32_t getRowRepairData<TYPE_OCMB_CHIP>( TargetHandle_t i_dimm, + const MemRank & i_rank, MemRowRepair & o_rowRepair ) +{ + return __getRowRepairData<TYPE_OCMB_CHIP, fapi2::TARGET_TYPE_OCMB_CHIP>( + i_dimm, i_rank, o_rowRepair ); +} + //------------------------------------------------------------------------------ template<TARGETING::TYPE T, fapi2::TargetType F> @@ -190,34 +206,19 @@ uint32_t setRowRepairData<TYPE_MCA>( TargetHandle_t i_dimm, i_rowRepair ); } -//------------------------------------------------------------------------------ - -template<TARGETING::TYPE T> -void __setRowRepairDataHelper( const MemAddr & i_addr, uint32_t & io_tmp ); - template<> -void __setRowRepairDataHelper<TYPE_MBA>( const MemAddr & i_addr, - uint32_t & io_tmp ) +uint32_t setRowRepairData<TYPE_OCMB_CHIP>( TargetHandle_t i_dimm, + const MemRank & i_rank, + const MemRowRepair & i_rowRepair ) { - #ifdef __HOSTBOOT_MODULE - - // Bank is stored as MBA "(DDR4): bg1-bg0,b1-b0 (4-bit)" in a MemAddr. - // bank group - 2 bits (bg1-bg0) - io_tmp = ( io_tmp << 2 ) | ( (i_addr.getBank() >> 2) & 0x03 ); - - // bank - 3 bits (b2-b0) - io_tmp = ( io_tmp << 3 ) | ( i_addr.getBank() & 0x03 ); - - // Row is stored as "MBA: r17-r0 (18-bit)" in a MemAddr. - // row - 18 bits (r17-r0) - io_tmp = ( io_tmp << 18 ) | ( i_addr.getRow() & 0x0003ffff ); - - #endif // __HOSTBOOT_MODULE + return __setRowRepairData<TYPE_OCMB_CHIP, fapi2::TARGET_TYPE_OCMB_CHIP>( + i_dimm, i_rank, i_rowRepair ); } -template<> -void __setRowRepairDataHelper<TYPE_MCA>( const MemAddr & i_addr, - uint32_t & io_tmp ) +//------------------------------------------------------------------------------ + +template<TARGETING::TYPE T> +void __setRowRepairDataHelper( const MemAddr & i_addr, uint32_t & io_tmp ) { #ifdef __HOSTBOOT_MODULE @@ -242,6 +243,32 @@ void __setRowRepairDataHelper<TYPE_MCA>( const MemAddr & i_addr, #endif // __HOSTBOOT_MODULE } +template +void __setRowRepairDataHelper<TYPE_MCA>( const MemAddr & i_addr, + uint32_t & io_tmp ); +template +void __setRowRepairDataHelper<TYPE_OCMB_CHIP>( const MemAddr & i_addr, + uint32_t & io_tmp ); + +template<> +void __setRowRepairDataHelper<TYPE_MBA>( const MemAddr & i_addr, + uint32_t & io_tmp ) +{ + #ifdef __HOSTBOOT_MODULE + + // Bank is stored as MBA "(DDR4): bg1-bg0,b1-b0 (4-bit)" in a MemAddr. + // bank group - 2 bits (bg1-bg0) + io_tmp = ( io_tmp << 2 ) | ( (i_addr.getBank() >> 2) & 0x03 ); + + // bank - 3 bits (b2-b0) + io_tmp = ( io_tmp << 3 ) | ( i_addr.getBank() & 0x03 ); + + // Row is stored as "MBA: r17-r0 (18-bit)" in a MemAddr. + // row - 18 bits (r17-r0) + io_tmp = ( io_tmp << 18 ) | ( i_addr.getRow() & 0x0003ffff ); + + #endif // __HOSTBOOT_MODULE +} //------------------------------------------------------------------------------ @@ -297,7 +324,7 @@ uint32_t setRowRepairData( TargetHandle_t i_dimm, MemRowRepair l_rowRepair( i_dimm, i_rank, l_data ); - o_rc = setRowRepairData<TYPE_MBA>( i_dimm, i_rank, l_rowRepair ); + o_rc = setRowRepairData<T>( i_dimm, i_rank, l_rowRepair ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "setRowRepairData() failed" ); @@ -323,6 +350,11 @@ uint32_t setRowRepairData<TYPE_MCA>( TargetHandle_t i_dimm, const MemRank & i_rank, const MemAddr & i_addr, uint8_t i_dram ); +template +uint32_t setRowRepairData<TYPE_OCMB_CHIP>( TargetHandle_t i_dimm, + const MemRank & i_rank, + const MemAddr & i_addr, + uint8_t i_dram ); //------------------------------------------------------------------------------ @@ -362,6 +394,9 @@ uint32_t clearRowRepairData<TYPE_MBA>( TargetHandle_t i_dimm, template uint32_t clearRowRepairData<TYPE_MCA>( TargetHandle_t i_dimm, const MemRank & i_rank ); +template +uint32_t clearRowRepairData<TYPE_OCMB_CHIP>( TargetHandle_t i_dimm, + const MemRank & i_rank ); //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.C b/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.C index 561c11dda..d58d6a177 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.C @@ -53,7 +53,7 @@ MemSymbol::MemSymbol( TARGETING::TargetHandle_t i_trgt, const MemRank & i_rank, PRDF_ASSERT( nullptr != i_trgt ); PRDF_ASSERT( TYPE_MBA == getTargetType(i_trgt) || TYPE_MCA == getTargetType(i_trgt) || - TYPE_MEM_PORT == getTargetType(i_trgt) ); + TYPE_OCMB_CHIP == getTargetType(i_trgt) ); // Allowing an invalid symbol. Use isValid() to check validity. PRDF_ASSERT( i_pins <= CEN_SYMBOL::BOTH_SYMBOL_DQS ); } @@ -83,9 +83,9 @@ MemSymbol MemSymbol::fromGalois( TargetHandle_t i_trgt, const MemRank & i_rank, if ( 0 != (i_mask & 0xaa) ) pins |= EVEN_SYMBOL_DQ; if ( 0 != (i_mask & 0x55) ) pins |= ODD_SYMBOL_DQ; } - else if ( TYPE_MCA == trgtType || TYPE_MEM_PORT == trgtType ) + else if ( TYPE_MCA == trgtType || TYPE_OCMB_CHIP == trgtType ) { - // 1 pin for MCA/MEM_PORT. + // 1 pin for MCA/TYPE_OCMB_CHIP. if ( 0 != (i_mask & 0xff) ) pins |= ODD_SYMBOL_DQ; } else @@ -112,9 +112,9 @@ uint8_t MemSymbol::getDq() const { dq = symbol2Dq<TYPE_MCA>( iv_symbol ); } - else if ( TYPE_MEM_PORT == trgtType ) + else if ( TYPE_OCMB_CHIP == trgtType ) { - dq = symbol2Dq<TYPE_MEM_PORT>( iv_symbol ); + dq = symbol2Dq<TYPE_OCMB_CHIP>( iv_symbol ); } else { @@ -140,9 +140,9 @@ uint8_t MemSymbol::getPortSlct() const { portSlct = symbol2PortSlct<TYPE_MCA>( iv_symbol ); } - else if ( TYPE_MEM_PORT == trgtType ) + else if ( TYPE_OCMB_CHIP == trgtType ) { - portSlct = symbol2PortSlct<TYPE_MEM_PORT>( iv_symbol ); + portSlct = symbol2PortSlct<TYPE_OCMB_CHIP>( iv_symbol ); } else { @@ -159,22 +159,26 @@ uint8_t MemSymbol::getDram() const { uint8_t dram = 0; TYPE trgtType = getTargetType( iv_trgt ); - bool isX4 = isDramWidthX4( iv_trgt ); + bool isX4 = true; if ( TYPE_MBA == trgtType ) { + isX4 = isDramWidthX4( iv_trgt ); dram = isX4 ? symbol2Nibble<TYPE_MBA>( iv_symbol ) : symbol2Byte <TYPE_MBA>( iv_symbol ); } else if ( TYPE_MCA == trgtType ) { + isX4 = isDramWidthX4( iv_trgt ); dram = isX4 ? symbol2Nibble<TYPE_MCA>( iv_symbol ) : symbol2Byte <TYPE_MCA>( iv_symbol ); } - else if ( TYPE_MEM_PORT == trgtType ) + else if ( TYPE_OCMB_CHIP == trgtType ) { - dram = isX4 ? symbol2Nibble<TYPE_MEM_PORT>( iv_symbol ) - : symbol2Byte <TYPE_MEM_PORT>( iv_symbol ); + TargetHandle_t dimm = getConnectedDimm(iv_trgt, iv_rank, getPortSlct()); + isX4 = isDramWidthX4( dimm ); + dram = isX4 ? symbol2Nibble<TYPE_OCMB_CHIP>( iv_symbol ) + : symbol2Byte <TYPE_OCMB_CHIP>( iv_symbol ); } else { @@ -200,14 +204,24 @@ uint8_t MemSymbol::getDramRelCenDqs() const const uint8_t X4_DRAM_SPARE_UPPER = 19; const uint8_t X8_DRAM_SPARE = 9; + bool isX4 = true; + if ( TYPE_OCMB_CHIP == getTargetType(iv_trgt) ) + { + TargetHandle_t dimm = getConnectedDimm(iv_trgt, iv_rank, getPortSlct()); + isX4 = isDramWidthX4( dimm ); + } + else + { + isX4 = isDramWidthX4( iv_trgt ); + } - uint8_t l_dramWidth = ( isDramWidthX4(iv_trgt) ) ? 4 : 8; + uint8_t l_dramWidth = ( isX4 ) ? 4 : 8; uint8_t l_dram = getDq() / l_dramWidth; // (x8: 0-9, x4: 0-19) // Adjust for spares if ( isDramSpared() ) { - if ( isDramWidthX4(iv_trgt) ) + if ( isX4 ) { uint8_t l_bit = getDq() % DQS_PER_BYTE; l_dram = ( l_bit < 4 ) ? X4_DRAM_SPARE_LOWER : X4_DRAM_SPARE_UPPER; @@ -219,7 +233,7 @@ uint8_t MemSymbol::getDramRelCenDqs() const } else if ( isEccSpared() ) { - l_dram = ( isDramWidthX4(iv_trgt) ) ? X4_ECC_SPARE : X8_ECC_SPARE; + l_dram = ( isX4 ) ? X4_ECC_SPARE : X8_ECC_SPARE; } return l_dram; @@ -231,7 +245,16 @@ uint8_t MemSymbol::getDramRelCenDqs() const uint8_t MemSymbol::getDramPins() const { TYPE trgtType = getTargetType( iv_trgt ); - bool isX4 = isDramWidthX4( iv_trgt ); + bool isX4 = true; + if ( TYPE_OCMB_CHIP == trgtType ) + { + TargetHandle_t dimm = getConnectedDimm(iv_trgt, iv_rank, getPortSlct()); + isX4 = isDramWidthX4( dimm ); + } + else + { + isX4 = isDramWidthX4( iv_trgt ); + } uint32_t dps = 0; uint32_t spd = 0; @@ -241,7 +264,7 @@ uint8_t MemSymbol::getDramPins() const dps = MBA_DQS_PER_SYMBOL; spd = isX4 ? MBA_SYMBOLS_PER_NIBBLE : MBA_SYMBOLS_PER_BYTE; } - else if ( TYPE_MCA == trgtType || TYPE_MEM_PORT == trgtType ) + else if ( TYPE_MCA == trgtType || TYPE_OCMB_CHIP == trgtType ) { dps = MEM_DQS_PER_SYMBOL; spd = isX4 ? MEM_SYMBOLS_PER_NIBBLE : MEM_SYMBOLS_PER_BYTE; @@ -261,7 +284,16 @@ uint8_t MemSymbol::getDramSymbol() const { uint8_t dramSymbol = SYMBOLS_PER_RANK; TYPE trgtType = getTargetType( iv_trgt ); - bool isX4 = isDramWidthX4( iv_trgt ); + bool isX4 = true; + if ( TYPE_OCMB_CHIP == trgtType ) + { + TargetHandle_t dimm = getConnectedDimm(iv_trgt, iv_rank, getPortSlct()); + isX4 = isDramWidthX4( dimm ); + } + else + { + isX4 = isDramWidthX4( iv_trgt ); + } uint8_t dram = getDram(); if ( TYPE_MBA == trgtType ) @@ -274,10 +306,10 @@ uint8_t MemSymbol::getDramSymbol() const dramSymbol = isX4 ? nibble2Symbol<TYPE_MCA>( dram ) : byte2Symbol <TYPE_MCA>( dram ); } - else if ( TYPE_MEM_PORT == trgtType ) + else if ( TYPE_OCMB_CHIP == trgtType ) { - dramSymbol = isX4 ? nibble2Symbol<TYPE_MEM_PORT>( dram ) - : byte2Symbol <TYPE_MEM_PORT>( dram ); + dramSymbol = isX4 ? nibble2Symbol<TYPE_OCMB_CHIP>( dram ) + : byte2Symbol <TYPE_OCMB_CHIP>( dram ); } else { @@ -435,16 +467,16 @@ uint32_t getMemReadSymbol<TYPE_MBA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ template<> -uint32_t getMemReadSymbol<TYPE_MEM_PORT>( ExtensibleChip * i_chip, - const MemRank & i_rank, - MemSymbol & o_sym1, - MemSymbol & o_sym2 ) +uint32_t getMemReadSymbol<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MemSymbol & o_sym1, + MemSymbol & o_sym2 ) { - #define PRDF_FUNC "[getMemReadSymbol<TYPE_MEM_PORT>] " + #define PRDF_FUNC "[getMemReadSymbol<TYPE_OCMB_CHIP>] " // Check parameters PRDF_ASSERT( nullptr != i_chip ); - PRDF_ASSERT( TYPE_MEM_PORT == i_chip->getType() ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); uint32_t o_rc = SUCCESS; @@ -453,14 +485,12 @@ uint32_t getMemReadSymbol<TYPE_MEM_PORT>( ExtensibleChip * i_chip, do { // Get the NCE/TCE galois and mask from hardware. - ExtensibleChip * ocmbChip = getConnectedParent(i_chip, TYPE_OCMB_CHIP); - - SCAN_COMM_REGISTER_CLASS * reg = ocmbChip->getRegister("MBSEVR0"); + SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister("MBSEVR0"); o_rc = reg->Read(); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "Read() failed on MBSEVR0: " - "ocmbChip=0x%08x", ocmbChip->getHuid() ); + "i_chip=0x%08x", i_chip->getHuid() ); break; } @@ -480,8 +510,8 @@ uint32_t getMemReadSymbol<TYPE_MEM_PORT>( ExtensibleChip * i_chip, tceGalois, tceMask ); MemSymbol sp0, sp1, ecc; - o_rc = mssGetSteerMux<TYPE_MEM_PORT>( i_chip->getTrgt(), i_rank, - sp0, sp1, ecc ); + o_rc = mssGetSteerMux<TYPE_OCMB_CHIP>( i_chip->getTrgt(), i_rank, + sp0, sp1, ecc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "mssGetSteerMux() failed. HUID: 0x%08x " diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.H b/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.H index c16972fd8..00b0c7cfd 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemSymbol.H @@ -79,7 +79,7 @@ class MemSymbol /** * @brief Creates a MemSymbol from a symbol. - * @param i_trgt MBA, MCA, or MEM_PORT target. + * @param i_trgt MBA, MCA, or OCMB_CHIP target. * @param i_rank The rank this symbol is on. * @param i_symbol The input symbol. * @param i_pins See enum DqMask. @@ -95,7 +95,7 @@ class MemSymbol /** * @brief Creates a MemSymbol from a Galois field. - * @param i_trgt MBA, MCA, or MEM_PORT target. + * @param i_trgt MBA, MCA, or OCMB_CHIP target. * @param i_rank The rank this symbol is on. * @param i_galois The Galois field. * @param i_mask The bit mask. @@ -122,7 +122,7 @@ class MemSymbol MemRank getRank() const { return iv_rank; }; /** @return The port select for this symbol. Only relevant on MBA. Will - * always return 0 for MCA and MEM_PORT. */ + * always return 0 for MCA and OCMB. */ uint8_t getPortSlct() const; /** @return The DRAM index for this symbol. */ @@ -218,7 +218,7 @@ class MemSymbol /** * @brief Reads the memory NCE/TCE vector trap register from hardware. - * @param i_chip MCA, MBA, or MEM_PORT. + * @param i_chip MCA, MBA, or OCMB_CHIP. * @param i_rank The rank this symbol is on. * @param o_sym1 The first symbol. Should always be valid for both NCE/TCE. * @param o_sym2 The second symbol. Only valid for TCEs. diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.C b/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.C index f6403f219..f9c73b739 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemThresholds.C @@ -173,7 +173,8 @@ void getMnfgMemCeTh( ExtensibleChip * i_chip, const MemRank & i_rank, else { // Get DRAM size - uint8_t size = MemUtils::getDramSize<T>( i_chip, i_rank.getDimmSlct() ); + uint8_t size = MemUtils::getDramSize<T>( i_chip->getTrgt(), + i_rank.getDimmSlct() ); // Get number of ranks per DIMM select. uint8_t rankCount = getNumRanksPerDimm<T>( i_chip->getTrgt(), @@ -209,7 +210,7 @@ void getMnfgMemCeTh<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, uint32_t & o_cePerDimm ); template -void getMnfgMemCeTh<TYPE_MEM_PORT>( ExtensibleChip * i_chip, +void getMnfgMemCeTh<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, const MemRank & i_rank, uint32_t & o_cePerDram, uint32_t & o_cePerRank, uint32_t & o_cePerDimm ); @@ -236,14 +237,8 @@ uint32_t getScrubCeThreshold( ExtensibleChip * i_chip, const MemRank & i_rank ) // need these templates to avoid linker errors template -uint32_t getScrubCeThreshold<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank ); -template uint32_t getScrubCeThreshold<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank ); -template -uint32_t getScrubCeThreshold<TYPE_MEM_PORT>( ExtensibleChip * i_chip, - const MemRank & i_rank ); } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C index 744e55e69..64677f1ae 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2013,2019 */ +/* Contributors Listed Below - COPYRIGHT 2013,2020 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -31,11 +31,14 @@ // Framework includes #include <iipServiceDataCollector.h> +#include <iipSystem.h> #include <prdfExtensibleChip.H> +#include <prdfGlobal_common.H> #include <UtilHash.H> // Platform includes #include <prdfCenMbaDataBundle.H> +#include <prdfOcmbDataBundle.H> #include <prdfCenMembufDataBundle.H> #include <prdfCenMembufExtraSig.H> #include <prdfMemSymbol.H> @@ -224,12 +227,12 @@ int32_t collectCeStats<TYPE_MCA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ template<> -int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, - const MemRank & i_rank, - MaintSymbols & o_maintStats, - MemSymbol & o_chipMark, uint8_t i_thr ) +int32_t collectCeStats<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MaintSymbols & o_maintStats, + MemSymbol & o_chipMark, uint8_t i_thr ) { - #define PRDF_FUNC "[MemUtils::collectCeStats<TYPE_MEM_PORT>] " + #define PRDF_FUNC "[MemUtils::collectCeStats<TYPE_OCMB_CHIP>] " int32_t o_rc = SUCCESS; o_chipMark = MemSymbol(); // Initially invalid. @@ -238,10 +241,13 @@ int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, { PRDF_ASSERT( 0 != i_thr ); - TargetHandle_t memPortTrgt = i_chip->getTrgt(); - ExtensibleChip * ocmbChip = getConnectedParent(i_chip, TYPE_OCMB_CHIP); + TargetHandle_t ocmbTrgt = i_chip->getTrgt(); - const bool isX4 = isDramWidthX4(memPortTrgt); + // TODO RTC 210072 - support for multiple ports + TargetHandle_t memPortTrgt = getConnectedChild( ocmbTrgt, + TYPE_MEM_PORT, 0 ); + TargetHandle_t dimm = getConnectedDimm( memPortTrgt, i_rank ); + const bool isX4 = isDramWidthX4( dimm ); // Use this map to keep track of the total counts per DRAM. DramCountMap dramCounts; @@ -252,7 +258,7 @@ int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, for ( uint8_t regIdx = 0; regIdx < CE_REGS_PER_PORT; regIdx++ ) { reg_str = ocmbCeStatReg[regIdx]; - reg = ocmbChip->getRegister( reg_str ); + reg = i_chip->getRegister( reg_str ); o_rc = reg->Read(); if ( SUCCESS != o_rc ) @@ -272,8 +278,8 @@ int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, uint8_t sym = baseSymbol + i; PRDF_ASSERT( sym < SYMBOLS_PER_RANK ); - uint8_t dram = isX4 ? symbol2Nibble<TYPE_MEM_PORT>( sym ) - : symbol2Byte <TYPE_MEM_PORT>( sym ); + uint8_t dram = isX4 ? symbol2Nibble<TYPE_OCMB_CHIP>( sym ) + : symbol2Byte <TYPE_OCMB_CHIP>( sym ); // Keep track of the total DRAM counts. dramCounts[dram].totalCount += count; @@ -286,7 +292,7 @@ int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, dramCounts[dram].symbolCount++; SymbolData symData; - symData.symbol = MemSymbol::fromSymbol( memPortTrgt, i_rank, + symData.symbol = MemSymbol::fromSymbol( ocmbTrgt, i_rank, sym, CEN_SYMBOL::ODD_SYMBOL_DQ ); if ( !symData.symbol.isValid() ) { @@ -329,11 +335,11 @@ int32_t collectCeStats<TYPE_MEM_PORT>( ExtensibleChip * i_chip, if ( 0 != highestCount ) { - uint8_t sym = isX4 ? nibble2Symbol<TYPE_MEM_PORT>( highestDram ) - : byte2Symbol <TYPE_MEM_PORT>( highestDram ); + uint8_t sym = isX4 ? nibble2Symbol<TYPE_OCMB_CHIP>( highestDram ) + : byte2Symbol <TYPE_OCMB_CHIP>( highestDram ); PRDF_ASSERT( sym < SYMBOLS_PER_RANK ); - o_chipMark = MemSymbol::fromSymbol( memPortTrgt, i_rank, sym ); + o_chipMark = MemSymbol::fromSymbol( ocmbTrgt, i_rank, sym ); } } while(0); @@ -514,19 +520,18 @@ int32_t collectCeStats<TYPE_MBA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ template<> -uint8_t getDramSize<TYPE_MCA>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) +uint8_t getDramSize<TYPE_MCA>( TargetHandle_t i_trgt, uint8_t i_dimmSlct ) { #define PRDF_FUNC "[MemUtils::getDramSize] " - PRDF_ASSERT( TYPE_MCA == i_chip->getType() ); + PRDF_ASSERT( TYPE_MCA == getTargetType(i_trgt) ); PRDF_ASSERT( i_dimmSlct < DIMM_SLCT_PER_PORT ); - TargetHandle_t mcaTrgt = i_chip->getTrgt(); - TargetHandle_t mcsTrgt = getConnectedParent( mcaTrgt, TYPE_MCS ); + TargetHandle_t mcsTrgt = getConnectedParent( i_trgt, TYPE_MCS ); PRDF_ASSERT( nullptr != mcsTrgt ); - uint8_t mcaRelPos = i_chip->getPos() % MAX_MCA_PER_MCS; + uint8_t mcaRelPos = getTargetPosition(i_trgt) % MAX_MCA_PER_MCS; uint8_t tmp[MAX_MCA_PER_MCS][DIMM_SLCT_PER_PORT]; @@ -542,19 +547,22 @@ uint8_t getDramSize<TYPE_MCA>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) } template<> -uint8_t getDramSize<TYPE_MBA>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) +uint8_t getDramSize<TYPE_MBA>( TargetHandle_t i_trgt, uint8_t i_dimmSlct ) { #define PRDF_FUNC "[MemUtils::getDramSize] " - PRDF_ASSERT( TYPE_MBA == i_chip->getType() ); + PRDF_ASSERT( TYPE_MBA == getTargetType(i_trgt) ); uint8_t o_size = 0; do { - ExtensibleChip * membufChip = getConnectedParent(i_chip, TYPE_MEMBUF); + TargetHandle_t membuf = getConnectedParent(i_trgt, TYPE_MEMBUF); + ExtensibleChip * membufChip = + (ExtensibleChip*)systemPtr->GetChip(membuf); + PRDF_ASSERT( nullptr != membufChip ); - uint32_t pos = i_chip->getPos(); + uint32_t pos = getTargetPosition(i_trgt); const char * reg_str = (0 == pos) ? "MBA0_MBAXCR" : "MBA1_MBAXCR"; SCAN_COMM_REGISTER_CLASS * reg = membufChip->getRegister( reg_str ); @@ -562,7 +570,7 @@ uint8_t getDramSize<TYPE_MBA>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) if ( SUCCESS != rc ) { PRDF_ERR( PRDF_FUNC "Read() failed on %s. Target=0x%08x", - reg_str, i_chip->getHuid() ); + reg_str, getHuid(i_trgt) ); break; } @@ -579,18 +587,16 @@ uint8_t getDramSize<TYPE_MBA>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) } template<> -uint8_t getDramSize<TYPE_MEM_PORT>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) +uint8_t getDramSize<TYPE_MEM_PORT>( TargetHandle_t i_trgt, uint8_t i_dimmSlct ) { #define PRDF_FUNC "[MemUtils::getDramSize] " - PRDF_ASSERT( TYPE_MEM_PORT == i_chip->getType() ); + PRDF_ASSERT( TYPE_MEM_PORT == getTargetType(i_trgt) ); PRDF_ASSERT( i_dimmSlct < DIMM_SLCT_PER_PORT ); - TargetHandle_t memPortTrgt = i_chip->getTrgt(); - uint8_t tmp[DIMM_SLCT_PER_PORT]; - if ( !memPortTrgt->tryGetAttr<TARGETING::ATTR_MEM_EFF_DRAM_DENSITY>(tmp) ) + if ( !i_trgt->tryGetAttr<TARGETING::ATTR_MEM_EFF_DRAM_DENSITY>(tmp) ) { PRDF_ERR( PRDF_FUNC "Failed to get ATTR_MEM_EFF_DRAM_DENSITY" ); PRDF_ASSERT( false ); @@ -601,6 +607,25 @@ uint8_t getDramSize<TYPE_MEM_PORT>(ExtensibleChip *i_chip, uint8_t i_dimmSlct) #undef PRDF_FUNC } +template<> +uint8_t getDramSize<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, uint8_t i_dimmSlct ) +{ + #define PRDF_FUNC "[MemUtils::getDramSize] " + + PRDF_ASSERT( TYPE_OCMB_CHIP == getTargetType(i_trgt) ); + PRDF_ASSERT( i_dimmSlct < DIMM_SLCT_PER_PORT ); + + // TODO RTC 210072 - Explorer only has one port, however, multiple ports + // will be supported in the future. Updates will need to be made here so we + // can get the relevant port. + + TargetHandle_t memPort = getConnectedChild( i_trgt, TYPE_MEM_PORT, 0 ); + + return getDramSize<TYPE_MEM_PORT>( memPort, i_dimmSlct ); + + #undef PRDF_FUNC +} + //------------------------------------------------------------------------------ template<> @@ -639,6 +664,34 @@ void cleanupChnlAttns<TYPE_MEMBUF>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template<> +void cleanupChnlAttns<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[MemUtils::cleanupChnlAttns] " + + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + // No cleanup if this is a checkstop attention. + if ( CHECK_STOP == io_sc.service_data->getPrimaryAttnType() ) return; + + #ifdef __HOSTBOOT_MODULE // only do cleanup in Hostboot, no-op in FSP + + // Clear the associated FIR bits for all attention types. DSTLFIR[0:7] + ExtensibleChip * mcc = getConnectedParent( i_chip, TYPE_MCC ); + + SCAN_COMM_REGISTER_CLASS * reg = mcc->getRegister( "DSTLFIR_AND" ); + + reg->setAllBits(); + reg->SetBitFieldJustified( 0, 8, 0 ); + reg->Write(); + + #endif // Hostboot only + + #undef PRDF_FUNC +} + //------------------------------------------------------------------------------ template<TARGETING::TYPE T> @@ -1288,6 +1341,361 @@ bool analyzeChnlFail<TYPE_MC>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ +bool __queryUcsOmic( ExtensibleChip * i_omic, ExtensibleChip * i_mcc, + TargetHandle_t i_omi ) +{ + PRDF_ASSERT( nullptr != i_omic ); + PRDF_ASSERT( nullptr != i_mcc ); + PRDF_ASSERT( nullptr != i_omi ); + PRDF_ASSERT( TYPE_OMIC == i_omic->getType() ); + PRDF_ASSERT( TYPE_MCC == i_mcc->getType() ); + PRDF_ASSERT( TYPE_OMI == getTargetType(i_omi) ); + + bool o_activeAttn = false; + + do + { + // Get the DSTLCFG2 register to check whether channel fail is enabled + // NOTE: DSTLCFG2[22] = 0b0 to enable chnl fail for subchannel A + // NOTE: DSTLCFG2[23] = 0b0 to enable chnl fail for subchannel B + SCAN_COMM_REGISTER_CLASS * cnfg = i_mcc->getRegister( "DSTLCFG2" ); + + // Get the position of the inputted OMI relative to the parent MCC (0-1) + // to determine which channel we need to check. + uint8_t omiPosRelMcc = getTargetPosition(i_omi) % MAX_OMI_PER_MCC; + + // If channel fail isn't configured, no need to continue. + if ( cnfg->IsBitSet(22 + omiPosRelMcc) ) break; + + // Check the OMIDLFIR for UCS (relevant bits: 0,20,40) + SCAN_COMM_REGISTER_CLASS * fir = i_omic->getRegister("OMIDLFIR"); + SCAN_COMM_REGISTER_CLASS * mask = i_omic->getRegister("OMIDLFIR_MASK"); + SCAN_COMM_REGISTER_CLASS * act0 = i_omic->getRegister("OMIDLFIR_ACT0"); + SCAN_COMM_REGISTER_CLASS * act1 = i_omic->getRegister("OMIDLFIR_ACT1"); + + if ( SUCCESS == ( fir->Read() | mask->Read() | + act0->Read() | act1->Read() ) ) + { + // Get the position of the inputted OMI relative to the parent + // OMIC (0-2). We'll need to use ATTR_OMI_DL_GROUP_POS for this. + uint8_t omiPosRelOmic = i_omi->getAttr<ATTR_OMI_DL_GROUP_POS>(); + + // Get the bit offset for the bit relevant to the inputted OMI. + // 0 : OMI-DL 0 + // 20: OMI-DL 1 + // 40: OMI-DL 2 + uint8_t bitOff = omiPosRelOmic * 20; + + // Check if there is a UNIT_CS for the relevant bits in the OMIDLFIR + // Note: The OMIDLFIR can't actually be set up to report UNIT_CS + // attentions, instead, as a workaround, the relevant channel fail + // bits will be set as recoverable bits and we will manually set + // the attention types to UNIT_CS in our handling of those errors. + if ( fir->IsBitSet(bitOff) && !mask->IsBitSet(bitOff) && + !act0->IsBitSet(bitOff) && act1->IsBitSet(bitOff) ) + { + o_activeAttn = true; + } + } + }while(0); + + return o_activeAttn; +} + +bool __queryUcsMcc( ExtensibleChip * i_mcc, TargetHandle_t i_omi ) +{ + PRDF_ASSERT( nullptr != i_mcc ); + PRDF_ASSERT( nullptr != i_omi ); + PRDF_ASSERT( TYPE_MCC == i_mcc->getType() ); + PRDF_ASSERT( TYPE_OMI == getTargetType(i_omi) ); + + bool o_activeAttn = false; + + // Get the position of the inputted OMI relative to the parent MCC (0-1) + // to determine which channel we need to check. + uint8_t omiPos = getTargetPosition(i_omi) % MAX_OMI_PER_MCC; + + // Maps of the DSTLFIR UCS bits to their relevant channel fail + // configuration bit in DSTLCFG2. Ex: {12,28} = DSTLFIR[12], DSTLCFG2[28] + // NOTE: there is a separate map for each subchannel. + const std::map<uint8_t,uint8_t> dstlfirMapChanA = + { {12,28}, {16,30}, {22,24} }; + + const std::map<uint8_t,uint8_t> dstlfirMapChanB = + { {13,29}, {17,31}, {23,25} }; + + // Check the DSTLFIR for UCS + SCAN_COMM_REGISTER_CLASS * fir = i_mcc->getRegister( "DSTLFIR" ); + SCAN_COMM_REGISTER_CLASS * mask = i_mcc->getRegister( "DSTLFIR_MASK" ); + SCAN_COMM_REGISTER_CLASS * act0 = i_mcc->getRegister( "DSTLFIR_ACT0" ); + SCAN_COMM_REGISTER_CLASS * act1 = i_mcc->getRegister( "DSTLFIR_ACT1" ); + SCAN_COMM_REGISTER_CLASS * cnfg = i_mcc->getRegister( "DSTLCFG2" ); + + if ( SUCCESS == (fir->Read() | mask->Read() | act0->Read() | act1->Read() | + cnfg->Read()) ) + { + // Get which relevant channel we need to check. + std::map<uint8_t,uint8_t> dstlfirMap; + dstlfirMap = (0 == omiPos) ? dstlfirMapChanA : dstlfirMapChanB; + + for ( auto const & bits : dstlfirMap ) + { + uint8_t firBit = bits.first; + uint8_t cnfgBit = bits.second; + + // NOTE: Channel fail is enabled if the config bit is set to 0b0 + if ( !cnfg->IsBitSet(cnfgBit) && fir->IsBitSet(firBit) && + !mask->IsBitSet(firBit) && act0->IsBitSet(firBit) && + act1->IsBitSet(firBit) ) + { + o_activeAttn = true; + } + } + } + + // Maps of the USTLFIR UCS bits to their relevant channel fail + // config bit in USTLFAILMASK. Ex: {0,54} = USTLFIR[0], USTLFAILMASK[54] + // NOTE: there is a separate map for each subchannel. + const std::map<uint8_t,uint8_t> ustlfirMapChanA = + { { 0,54}, { 2,48}, {27,56}, {35,49}, {37,50}, {39,51}, {41,52}, {43,53}, + {49,55}, {51,50}, {53,50}, {55,48}, {59,56} }; + const std::map<uint8_t,uint8_t> ustlfirMapChanB = + { { 1,54}, { 3,48}, {28,56}, {36,49}, {38,50}, {40,51}, {42,52}, {44,53}, + {50,55}, {52,50}, {54,50}, {56,48}, {60,56} }; + + // Check the USTLFIR for UCS + fir = i_mcc->getRegister( "USTLFIR" ); + mask = i_mcc->getRegister( "USTLFIR_MASK" ); + act0 = i_mcc->getRegister( "USTLFIR_ACT0" ); + act1 = i_mcc->getRegister( "USTLFIR_ACT1" ); + cnfg = i_mcc->getRegister( "USTLFAILMASK" ); + + if ( SUCCESS == (fir->Read() | mask->Read() | act0->Read() | act1->Read() | + cnfg->Read()) ) + { + // Get which relevant channel we need to check. + std::map<uint8_t,uint8_t> ustlfirMap; + ustlfirMap = (0 == omiPos) ? ustlfirMapChanA : ustlfirMapChanB; + + for ( auto const & bits : ustlfirMap ) + { + uint8_t firBit = bits.first; + uint8_t cnfgBit = bits.second; + + // NOTE: Channel fail is enabled if the config bit is set to 0b0 + if ( !cnfg->IsBitSet(cnfgBit) && fir->IsBitSet(firBit) && + !mask->IsBitSet(firBit) && act0->IsBitSet(firBit) && + act1->IsBitSet(firBit) ) + { + o_activeAttn = true; + } + } + } + + return o_activeAttn; +} + +bool __queryUcsOcmb( ExtensibleChip * i_ocmb ) +{ + PRDF_ASSERT( nullptr != i_ocmb ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_ocmb->getType() ); + + bool o_activeAttn = false; + + // We can't use the GLOBAL_CS_FIR. It will not clear automatically when a + // channel has failed because the hardware clocks have stopped. Also, since + // it is a virtual register there really is no way to clear it. Fortunately + // we have the INTER_STATUS_REG that will tell us if there is an active + // attention. Note that we clear this register as part of the channel + // failure cleanup. So we can rely on this register to determine if there is + // a new channel failure. + + SCAN_COMM_REGISTER_CLASS * fir = i_ocmb->getRegister("INTER_STATUS_REG"); + + if ( SUCCESS == fir->Read() ) + { + o_activeAttn = fir->IsBitSet(2); // Checkstop bit. + } + + return o_activeAttn; +} + +//------------------------------------------------------------------------------ + +template<TARGETING::TYPE T> +bool __analyzeChnlFail( TargetHandle_t i_trgt, + STEP_CODE_DATA_STRUCT & io_sc ); + +template<> +bool __analyzeChnlFail<TYPE_OMI>( TargetHandle_t i_omi, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[MemUtils::__analyzeChnlFail<TYPE_OMI>] " + + PRDF_ASSERT( nullptr != i_omi ); + PRDF_ASSERT( TYPE_OMI == getTargetType(i_omi) ); + + uint32_t o_analyzed = false; + + do + { + // Skip if currently analyzing a host attention. This is a required for + // a rare scenario when a channel failure occurs after PRD is called to + // handle the host attention. + if ( HOST_ATTN == io_sc.service_data->getPrimaryAttnType() ) break; + + // Get the needed ExtensibleChips for analysis + TargetHandle_t ocmb = getConnectedChild( i_omi, TYPE_OCMB_CHIP, 0 ); + ExtensibleChip * ocmbChip = (ExtensibleChip *)systemPtr->GetChip(ocmb); + + TargetHandle_t omic = getConnectedParent( i_omi, TYPE_OMIC ); + ExtensibleChip * omicChip = (ExtensibleChip *)systemPtr->GetChip(omic); + + TargetHandle_t mcc = getConnectedParent( i_omi, TYPE_MCC ); + ExtensibleChip * mccChip = (ExtensibleChip *)systemPtr->GetChip(mcc); + + // Do an initial query for channel fail attentions from the targets. + // This is to check whether we actually have an active channel fail + // attention before checking whether it is a side effect of some + // recoverable attention or not. + if ( !__queryUcsOmic(omicChip, mccChip, i_omi) && + !__queryUcsMcc(mccChip, i_omi) && + !__queryUcsOcmb(ocmbChip) ) + { + // If no channel fail attentions found, just break out. + break; + } + + // There was a channel fail found, so take the following actions. + + // Set the MEM_CHNL_FAIL flag in the SDC to indicate a channel failure + // has been detected and there is no need to check again. + io_sc.service_data->setMemChnlFail(); + + // Make the error log predictive and set threshold. + io_sc.service_data->setFlag( ServiceDataCollector::SERVICE_CALL ); + io_sc.service_data->setFlag( ServiceDataCollector::AT_THRESHOLD ); + + // Channel failures will always send SUEs. + io_sc.service_data->setFlag( ServiceDataCollector::UERE ); + + // Indicate cleanup is required on this channel. + getOcmbDataBundle(ocmbChip)->iv_doChnlFailCleanup = true; + + // Check for recoverable attentions that could have a channel failure + // as a side effect. These include: N/A + // TODO RTC 243518 -requires more input from the test team to determine + + // Check OMIC for unit checkstops + if ( __queryUcsOmic( omicChip, mccChip, i_omi ) ) + { + // Analyze UNIT_CS on the OMIC chip + // Note: The OMIDLFIR can't actually be set up to report UNIT_CS + // attentions, instead, as a workaround, the relevant channel fail + // bits will be set as recoverable bits and we will manually set + // the attention types to UNIT_CS in our handling of those errors. + if ( SUCCESS == omicChip->Analyze(io_sc, RECOVERABLE) ) + { + o_analyzed = true; + break; + } + } + + // Check MCC for unit checkstops + if ( __queryUcsMcc( mccChip, i_omi ) ) + { + // Analyze UNIT_CS on the MCC chip + if ( SUCCESS == mccChip->Analyze(io_sc, UNIT_CS) ) + { + o_analyzed = true; + break; + } + } + + // Check OCMB for unit checkstops + if ( __queryUcsOcmb( ocmbChip ) ) + { + // Analyze UNIT_CS on the OCMB chip + if ( SUCCESS == ocmbChip->Analyze(io_sc, UNIT_CS) ) + { + o_analyzed = true; + break; + } + + } + PRDF_INF( PRDF_FUNC "Failed channel detected on 0x%08x, but no active " + "attentions found", getHuid(i_omi) ); + }while(0); + + return o_analyzed; + + #undef PRDF_FUNC +} + +template<> +bool analyzeChnlFail<TYPE_MCC>( ExtensibleChip * i_mcc, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_mcc ); + PRDF_ASSERT( TYPE_MCC == i_mcc->getType() ); + + uint32_t o_analyzed = false; + + if ( !io_sc.service_data->isMemChnlFail() ) + { + // Loop through all the connected OMIs + for ( auto & omi : getConnected(i_mcc->getTrgt(), TYPE_OMI) ) + { + o_analyzed = __analyzeChnlFail<TYPE_OMI>( omi, io_sc ); + if ( o_analyzed ) break; + } + } + + return o_analyzed; +} + +template<> +bool analyzeChnlFail<TYPE_OMIC>( ExtensibleChip * i_omic, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_omic ); + PRDF_ASSERT( TYPE_OMIC == i_omic->getType() ); + + uint32_t o_analyzed = false; + + if ( !io_sc.service_data->isMemChnlFail() ) + { + // Loop through all the connected OMIs + for ( auto & omi : getConnected(i_omic->getTrgt(), TYPE_OMI) ) + { + o_analyzed = __analyzeChnlFail<TYPE_OMI>( omi, io_sc ); + if ( o_analyzed ) break; + } + } + + return o_analyzed; +} + +template<> +bool analyzeChnlFail<TYPE_OCMB_CHIP>( ExtensibleChip * i_ocmb, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_ocmb ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_ocmb->getType() ); + + uint32_t o_analyzed = false; + + if ( !io_sc.service_data->isMemChnlFail() ) + { + TargetHandle_t omi = getConnectedParent( i_ocmb->getTrgt(), TYPE_OMI ); + o_analyzed = __analyzeChnlFail<TYPE_OMI>( omi, io_sc ); + } + + return o_analyzed; +} + +//------------------------------------------------------------------------------ + template<TARGETING::TYPE T1, TARGETING::TYPE T2, TARGETING::TYPE T3> void __cleanupChnlFail( ExtensibleChip * i_chip1, ExtensibleChip * i_chip2, ExtensibleChip * i_chip3, @@ -1415,6 +1823,158 @@ void cleanupChnlFail<TYPE_MEMBUF>( ExtensibleChip * i_chip, cleanupChnlFail<TYPE_DMI>( dmiChip, io_sc ); } +template<TARGETING::TYPE T> +void __cleanupChnlFail( TargetHandle_t i_trgt, STEP_CODE_DATA_STRUCT & io_sc ); + +template<> +void __cleanupChnlFail<TYPE_OMI>( TargetHandle_t i_omi, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[MemUtils::__cleanupChnlFail] " + + PRDF_ASSERT( nullptr != i_omi ); + PRDF_ASSERT( TYPE_OMI == getTargetType(i_omi) ); + + do + { + // No cleanup if this is a checkstop attention. + if ( CHECK_STOP == io_sc.service_data->getPrimaryAttnType() ) break; + + TargetHandle_t ocmb = getConnectedChild(i_omi, TYPE_OCMB_CHIP, 0); + ExtensibleChip * ocmbChip = (ExtensibleChip *)systemPtr->GetChip(ocmb); + + // Check if cleanup is still required or has already been done. + if ( !getOcmbDataBundle(ocmbChip)->iv_doChnlFailCleanup ) break; + + // Cleanup is complete and no longer required on this channel. + getOcmbDataBundle(ocmbChip)->iv_doChnlFailCleanup = false; + + #ifdef __HOSTBOOT_MODULE // only do cleanup in Hostboot, no-op in FSP + + TargetHandle_t omic = getConnectedParent( i_omi, TYPE_OMIC ); + ExtensibleChip * omicChip = (ExtensibleChip *)systemPtr->GetChip(omic); + + TargetHandle_t mcc = getConnectedParent( i_omi, TYPE_MCC ); + ExtensibleChip * mccChip = (ExtensibleChip *)systemPtr->GetChip(mcc); + + // Get the OMI position relative to the OMIC (0,1,2) and the MCC (0,1) + uint8_t omiPosRelOmic = i_omi->getAttr<ATTR_OMI_DL_GROUP_POS>(); + uint8_t omiPosRelMcc = getTargetPosition(i_omi) % MAX_OMI_PER_MCC; + + // Note that this is a clean up function. If there are any SCOM errors + // we will just move on and try the rest. + SCAN_COMM_REGISTER_CLASS * reg = nullptr; + + // Mask off attentions from the OMIDLFIR in the OMIC based on the + // OMI position. 0-19, 20-39, 40-59 + reg = omicChip->getRegister( "OMIDLFIR_MASK_OR" ); + reg->SetBitFieldJustified( (omiPosRelOmic * 20), 20, 0xfffff ); + reg->Write(); + + // Mask off attentions from the DSTLFIR and USTLFIR in the MCC based on + // the OMI position. + // DSTLFIR Generic Bits: 8,9,10,11,24,25,26,27 + uint64_t mask = 0x00f000f000000000ull; + if ( 0 == omiPosRelMcc ) + { + // DSTLFIR Subchannel A Bits: 0,1,2,3,12,14,16,18,20,22 + mask |= 0xf00aaa0000000000ull; + } + else + { + // DSTLFIR Subchannel B Bits: 4,5,6,7,13,15,17,19,21,23 + mask |= 0x0f05550000000000ull; + } + reg = mccChip->getRegister( "DSTLFIR_MASK_OR" ); + reg->SetBitFieldJustified( 0, 64, mask ); + reg->Write(); + + // USTLFIR Generic Bits: 6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21, + // 22,23,24,25,26,57,58,61,62,63 + mask = 0x03ffffe000000067ull; + if ( 0 == omiPosRelMcc ) + { + // USTLFIR Subchannel A Bits: 0,2,4,27,29,31,33,35,37,39,41,43,45, + // 47,49,51,53,55,59 + mask |= 0xa800001555555510ull; + } + else + { + // USTLFIR Subchannel B Bits: 1,3,5,28,30,32,34,36,38,40,42,44,46, + // 48,50,52,54,56,60 + mask |= 0x5400000aaaaaaa88ull; + } + reg = mccChip->getRegister( "USTLFIR_MASK_OR" ); + reg->SetBitFieldJustified( 0, 64, mask ); + reg->Write(); + + // Mask off all attentions from the chiplet FIRs in the OCMB + reg = ocmbChip->getRegister( "OCMB_CHIPLET_FIR_MASK" ); + reg->setAllBits(); // Blindly mask everything + reg->Write(); + + + // To ensure FSP ATTN doesn't think there is an active attention on this + // OCMB, manually clear the interrupt status register. + reg = ocmbChip->getRegister( "INTER_STATUS_REG" ); + reg->clearAllBits(); // Blindly clear everything + reg->Write(); + + // During runtime, send a dynamic memory deallocation message. + // During Memory Diagnostics, tell MDIA to stop pattern tests. + #ifdef __HOSTBOOT_RUNTIME + MemDealloc::port<TYPE_OCMB_CHIP>( ocmbChip ); + #else + if ( isInMdiaMode() ) + { + mdiaSendEventMsg( ocmb, MDIA::STOP_TESTING ); + } + #endif + + #endif // Hostboot only + + }while(0); + + #undef PRDF_FUNC +} + +template<> +void cleanupChnlFail<TYPE_MCC>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_MCC == i_chip->getType() ); + + for ( auto & omi : getConnected(i_chip->getTrgt(), TYPE_OMI) ) + { + __cleanupChnlFail<TYPE_OMI>( omi, io_sc ); + } +} + +template<> +void cleanupChnlFail<TYPE_OMIC>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OMIC == i_chip->getType() ); + + for ( auto & omi : getConnected(i_chip->getTrgt(), TYPE_OMI) ) + { + __cleanupChnlFail<TYPE_OMI>( omi, io_sc ); + } +} + +template<> +void cleanupChnlFail<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( nullptr != i_chip ); + PRDF_ASSERT( TYPE_OCMB_CHIP == i_chip->getType() ); + + TargetHandle_t omi = getConnectedParent( i_chip->getTrgt(), TYPE_OMI ); + __cleanupChnlFail<TYPE_OMI>( omi, io_sc ); +} + //------------------------------------------------------------------------------ uint64_t reverseBits( uint64_t i_val, uint64_t i_numBits ) diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.H b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.H index 9759cd010..39a6051fe 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.H @@ -102,12 +102,12 @@ int32_t collectCeStats( ExtensibleChip * i_chip, const MemRank & i_rank, /** * @brief Gets DRAM size for an MBA, MCA, or MEM_PORT. - * @param i_chip MBA, MCA, or MEM_PORT chip. + * @param i_trgt MBA, MCA, or MEM_PORT target. * @param i_dimmSlct DIMM select. Optional for MBA chip. * @return size for a DRAM */ template<TARGETING::TYPE T> -uint8_t getDramSize( ExtensibleChip * i_chip, uint8_t i_dimmSlct = 0 ); +uint8_t getDramSize( TARGETING::TargetHandle_t i_trgt, uint8_t i_dimmSlct = 0 ); /** * @brief determines the type of Centaur based raw card associated with MBA. diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemoryMru.C b/src/usr/diag/prdf/common/plat/mem/prdfMemoryMru.C index bb911847e..4cd596514 100755 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemoryMru.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemoryMru.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2013,2018 */ +/* Contributors Listed Below - COPYRIGHT 2013,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -70,42 +70,78 @@ MemoryMru::MemoryMru( uint32_t i_memMru ) : PRDF_ASSERT( false ); } - // If our target is MBA, get the chnlPos from the membuf - if ( 0 == iv_memMruMeld.s.isMca ) + // If our target is MCA + if ( 1 == iv_memMruMeld.s.isMca ) { - TargetHandle_t membuf = getConnectedChild( proc, TYPE_MEMBUF, + iv_target = getConnectedChild( proc, TYPE_MCA, iv_memMruMeld.s.chnlPos ); - if ( NULL == membuf ) + if ( NULL == iv_target ) { - PRDF_ERR( PRDF_FUNC "Could not find functional membuf " + PRDF_ERR( PRDF_FUNC "Could not find functional mca " "attached to proc 0x%08X at pos: %u", getHuid( proc ), iv_memMruMeld.s.chnlPos ); PRDF_ASSERT( false ); } + } + // If our target is OCMB + else if ( 1 == iv_memMruMeld.s.isOcmb ) + { + // chnlPos specifies the position of the MCC relative to the proc + TargetHandle_t mcc = getConnectedChild( proc, TYPE_MCC, + iv_memMruMeld.s.chnlPos ); + if ( nullptr == mcc ) + { + PRDF_ERR( PRDF_FUNC "Could not find functional mcc attached to " + "proc 0x%08x at pos: %u", getHuid(proc), + iv_memMruMeld.s.chnlPos ); + PRDF_ASSERT( false ); + } - iv_target = getConnectedChild( membuf, TYPE_MBA, - iv_memMruMeld.s.mbaPos ); - if ( NULL == iv_target ) + // mbaPos specifies the position of the OMI relative to the MCC + TargetHandle_t omi = getConnectedChild( mcc, TYPE_OMI, + iv_memMruMeld.s.mbaPos ); + if ( nullptr == omi ) { - PRDF_ERR( PRDF_FUNC "Could not find functional mba attached " - "to 0x%08X at pos: %u", getHuid( membuf ), - iv_memMruMeld.s.mbaPos ); + PRDF_ERR( PRDF_FUNC "Could not find functional omi attached to " + "mcc 0x%08x at pos: %u", getHuid(mcc), + iv_memMruMeld.s.mbaPos ); + PRDF_ASSERT( false ); + } + + // There is only one OCMB attached per OMI + iv_target = getConnectedChild( omi, TYPE_OCMB_CHIP, 0 ); + if ( nullptr == iv_target ) + { + PRDF_ERR( PRDF_FUNC "Could not find functional ocmb attached to " + "omi 0x%08x", getHuid(mcc) ); PRDF_ASSERT( false ); } + + } + // If our target is MBA, get the chnlPos from the membuf else { - iv_target = getConnectedChild( proc, TYPE_MCA, + TargetHandle_t membuf = getConnectedChild( proc, TYPE_MEMBUF, iv_memMruMeld.s.chnlPos ); - if ( NULL == iv_target ) + if ( nullptr == membuf ) { - PRDF_ERR( PRDF_FUNC "Could not find functional mca " + PRDF_ERR( PRDF_FUNC "Could not find functional membuf " "attached to proc 0x%08X at pos: %u", getHuid( proc ), iv_memMruMeld.s.chnlPos ); PRDF_ASSERT( false ); } - } + iv_target = getConnectedChild( membuf, TYPE_MBA, + iv_memMruMeld.s.mbaPos ); + if ( nullptr == iv_target ) + { + PRDF_ERR( PRDF_FUNC "Could not find functional mba attached " + "to 0x%08X at pos: %u", getHuid( membuf ), + iv_memMruMeld.s.mbaPos ); + PRDF_ASSERT( false ); + } + } // Get the rank iv_rank = MemRank( iv_memMruMeld.s.mrank, iv_memMruMeld.s.srank ); @@ -247,7 +283,8 @@ TargetHandleList MemoryMru::getCalloutList() const } } } - else if ( TARGETING::TYPE_MCA == getTargetType(iv_target) ) + else if ( TARGETING::TYPE_MCA == getTargetType(iv_target) || + TARGETING::TYPE_OCMB_CHIP == getTargetType(iv_target) ) { if ( CALLOUT_ALL_MEM == iv_special ) { @@ -304,6 +341,11 @@ void MemoryMru::getCommonVars() { proc = getConnectedParent( iv_target, TYPE_PROC ); } + else if ( TYPE_OCMB_CHIP == trgtType ) + { + TargetHandle_t mcc = getConnectedParent( iv_target, TYPE_MCC ); + proc = getConnectedParent( mcc, TYPE_PROC ); + } else { PRDF_ERR( PRDF_FUNC "Invalid target type" ); @@ -323,11 +365,27 @@ void MemoryMru::getCommonVars() } // If our target is an MCA, then chnlPos will specify the MCA position // and mbaPos will be an unused field - else + else if ( TYPE_MCA == getTargetType(iv_target) ) { iv_memMruMeld.s.isMca = 1; iv_memMruMeld.s.chnlPos = getTargetPosition( iv_target ); } + // If our target is an OCMB, then chnlPos will specify the MCC position and + // mbaPos will specify the OMI position. + else if ( TYPE_OCMB_CHIP == getTargetType(iv_target) ) + { + TargetHandle_t omi = getConnectedParent( iv_target, TYPE_OMI ); + TargetHandle_t mcc = getConnectedParent( omi, TYPE_MCC ); + + iv_memMruMeld.s.isOcmb = 1; + iv_memMruMeld.s.chnlPos = getTargetPosition(mcc) % MAX_MCC_PER_PROC; + iv_memMruMeld.s.mbaPos = getTargetPosition(omi) % MAX_OMI_PER_MCC; + } + else + { + PRDF_ERR( PRDF_FUNC "Invalid target type" ); + PRDF_ASSERT(false); + } iv_memMruMeld.s.nodePos = getTargetPosition( node ); iv_memMruMeld.s.procPos = getTargetPosition( proc ); diff --git a/src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H b/src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H new file mode 100644 index 000000000..75d7dd53e --- /dev/null +++ b/src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H @@ -0,0 +1,247 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#ifndef __prdfOcmbDataBundle_H +#define __prdfOcmbDataBundle_H + +/** @file prdfOcmbDataBundle.H + * @brief Contains the data bundle for a P9 OCMB_CHIP object. + */ + +// Framework includes +#include <prdfExtensibleChip.H> + +// Platform includes +#include <prdfPlatServices.H> +#include <prdfMemCeTable.H> +#include <prdfMemUeTable.H> + +#ifdef __HOSTBOOT_MODULE + +#include <prdfMemScrubUtils.H> +#include <prdfMemTdFalseAlarm.H> +#include <prdfMemThresholds.H> +#include <prdfMemTdCtlr.H> + +#ifndef __HOSTBOOT_RUNTIME +#include <prdfMemIplCeStats.H> +#endif + +#endif // __HOSTBOOT_MODULE + +namespace PRDF +{ + +/** @brief P9 OCMB data bundle. */ +class OcmbDataBundle : public DataBundle +{ + public: // functions + + /** + * @brief Constructor. + * @param i_ocmbChip The OCMB chip. + */ + explicit OcmbDataBundle( ExtensibleChip * i_ocmbChip ) : + iv_chip(i_ocmbChip), iv_ceTable(i_ocmbChip), iv_ueTable(i_ocmbChip) + {} + + /** @brief Destructor. */ + ~OcmbDataBundle() + { + #ifdef __HOSTBOOT_MODULE + #ifdef __HOSTBOOT_RUNTIME + delete iv_vcmFalseAlarmCounter; + delete iv_tpsFalseAlarmCounter; + #else // IPL only + delete iv_iplCeStats; + #endif + delete iv_tdCtlr; iv_tdCtlr = nullptr; + #endif // __HOSTBOOT_MODULE + } + + // Don't allow copy or assignment. + OcmbDataBundle( const OcmbDataBundle & ) = delete; + const OcmbDataBundle & operator=( const OcmbDataBundle & ) = delete; + + #ifdef __HOSTBOOT_MODULE + + /** @return The Targeted Diagnostics controller. */ + MemTdCtlr<TARGETING::TYPE_OCMB_CHIP> * getTdCtlr() + { + if ( nullptr == iv_tdCtlr ) + { + iv_tdCtlr = new MemTdCtlr<TARGETING::TYPE_OCMB_CHIP>{iv_chip}; + } + + return iv_tdCtlr; + } + + /** @return The IMPE threshold counter. */ + VcmFalseAlarm * getImpeThresholdCounter() + { + if ( nullptr == iv_impeThresholdCounter ) + { + iv_impeThresholdCounter = new VcmFalseAlarm( + TimeBasedThreshold { getImpeTh() } ); + } + + return iv_impeThresholdCounter; + } + + #ifdef __HOSTBOOT_RUNTIME + + /** @return The VCM false alarm counter. */ + VcmFalseAlarm * getVcmFalseAlarmCounter() + { + if ( nullptr == iv_vcmFalseAlarmCounter ) + { + iv_vcmFalseAlarmCounter = new VcmFalseAlarm( + TimeBasedThreshold { 4, ThresholdResolution::ONE_DAY } ); + } + + return iv_vcmFalseAlarmCounter; + } + + /** @return The TPS false alarm counter. */ + TpsFalseAlarm * getTpsFalseAlarmCounter() + { + if ( nullptr == iv_tpsFalseAlarmCounter ) + { + iv_tpsFalseAlarmCounter = new TpsFalseAlarm( + TimeBasedThreshold{ 3, ThresholdResolution::ONE_DAY } ); + } + + return iv_tpsFalseAlarmCounter; + } + + #else // IPL only + + /** @return The IPL CE statistics object. */ + MemIplCeStats<TARGETING::TYPE_OCMB_CHIP> * getIplCeStats() + { + if ( nullptr == iv_iplCeStats ) + { + iv_iplCeStats = + new MemIplCeStats<TARGETING::TYPE_OCMB_CHIP>( iv_chip ); + } + + return iv_iplCeStats; + } + + #endif + + #endif // __HOSTBOOT_MODULE + + private: // instance variables + + /** The OCMB chip associated with this data bundle. */ + ExtensibleChip * const iv_chip; + + #ifdef __HOSTBOOT_MODULE + + /** The Targeted Diagnostics controller. */ + MemTdCtlr<TARGETING::TYPE_OCMB_CHIP> * iv_tdCtlr = nullptr; + + /** IMPE threshold counter. */ + VcmFalseAlarm * iv_impeThresholdCounter = nullptr; + + #endif // __HOSTBOOT_MODULE + + public: // instance variables + + MemCeTable<TARGETING::TYPE_OCMB_CHIP> iv_ceTable; ///< CE table for FFDC + MemUeTable iv_ueTable; ///< UE table for FFDC + + /** If there is a channel failure detected on this bus, there will be some + * required cleanup after analysis to mask off all further attentions from + * the bus. A channel failure could occur on either side of the bus and it + * is possible the cleanup function could be called in multiple + * PostAnalysis plugins depending on where the channel failure occurred. + * Since we only want to do one cleanup, we will use this variable to + * indicate if a cleanup is still required or has already been done. */ + bool iv_doChnlFailCleanup = false; + + #ifdef __HOSTBOOT_MODULE + + /** Threshold table for RCD parity errors. */ + TimeBasedThreshold iv_rcdParityTh = TimeBasedThreshold( getRcdParityTh() ); + + /** Threshold table for IUEs. Threshold per DIMM */ + std::map<uint8_t, TimeBasedThreshold> iv_iueTh; + + /** Bool to indicate if we've triggered a port fail because of IUEs. */ + bool iv_iuePortFail = false; + + #ifdef __HOSTBOOT_RUNTIME + + /** VCM false alarm counter. */ + VcmFalseAlarm * iv_vcmFalseAlarmCounter = nullptr; + + /** TPS false alarm counter. */ + TpsFalseAlarm * iv_tpsFalseAlarmCounter = nullptr; + + /** Set to true if mainline NCEs and TCEs should be permanently masked. This + * is checked at the end of targeted diagnostics before background + * scrubbing is resumed. */ + bool iv_maskMainlineNceTce = false; + + // These are used to limit the number of times a scrub command will stop + // on a UE or CE on a rank. This is to prevent potential flooding of + // maintenance UEs or CEs. The threshold will be 16 per rank for each. + TimeBasedThreshold iv_ueStopCounter = + TimeBasedThreshold( 16, ThresholdResolution::TEN_HOURS ); + TimeBasedThreshold iv_ceStopCounter = + TimeBasedThreshold( 16, ThresholdResolution::TEN_HOURS );; + + // If we stop on a UE or a CE, we will need to store the rank that the + // error is on so that we can clear our respective thresholds if the + // next error we stop on is on a different rank. + MemRank iv_ceUeRank; + + #else // IPL only + + /** MNFG IPL CE statistics. */ + MemIplCeStats<TARGETING::TYPE_OCMB_CHIP> * iv_iplCeStats = nullptr; + + #endif + + #endif // __HOSTBOOT_MODULE + +}; + +/** + * @brief Wrapper function for the OcmbDataBundle. + * @param i_ocmbChip The OCMB chip. + * @return This MBA's data bundle. + */ +inline OcmbDataBundle * getOcmbDataBundle( ExtensibleChip * i_ocmbChip ) +{ + return static_cast<OcmbDataBundle *>(i_ocmbChip->getDataBundle()); +} + +} // end namespace PRDF + +#endif // __prdfOcmbDataBundle_H + diff --git a/src/usr/diag/prdf/common/plat/mem/prdf_plat_mem.mk b/src/usr/diag/prdf/common/plat/mem/prdf_plat_mem.mk index 087214ece..2ea0712d3 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdf_plat_mem.mk +++ b/src/usr/diag/prdf/common/plat/mem/prdf_plat_mem.mk @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2018 +# Contributors Listed Below - COPYRIGHT 2016,2019 # [+] International Business Machines Corp. # # @@ -51,6 +51,7 @@ prd_obj += prdfMemoryMru.o prd_obj += prdfMemUeTable.o prd_obj += prdfMemUtils.o prd_obj += prdfMemThresholds.o +prd_obj += prdfP9OcmbChipDomain.o # rule plugin related prd_rule_plugin += prdfP9Mca_common.o |