diff options
Diffstat (limited to 'src/usr/diag/prdf/common/plat/mem/prdfMemMark.C')
-rw-r--r-- | src/usr/diag/prdf/common/plat/mem/prdfMemMark.C | 279 |
1 files changed, 231 insertions, 48 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C index 83bff1876..e43d844c4 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C @@ -46,7 +46,7 @@ namespace MarkStore { //############################################################################## -// Utilities to read/write markstore (MCA) +// Utilities to read/write markstore //############################################################################## // - We have the ability to set chip marks via the FWMSx registers, but there @@ -62,15 +62,19 @@ namespace MarkStore // mark per master rank. This matches the P8 behavior. This could be improved // upon later if we have the time, but doubtful. // - Summary: -// - Chip marks will use HWMS0-7 registers (0x07010AD0-0x07010AD7). -// - Symbol marks will use FWMS0-7 registers (0x07010AD8-0x07010ADF). +// - Chip marks will use HWMS0-7 registers: +// Nimbus: (0x07010AD0-0x07010AD7) +// Axone: (0x08011C10-0x08011C17) +// - Symbol marks will use FWMS0-7 registers: +// Nimbus: (0x07010AD8-0x07010ADF) +// Axone: (0x08011C18-0x08011C1F) // - Each register maps to master ranks 0-7. -template<> -uint32_t readChipMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, MemMark & o_mark ) +template<TARGETING::TYPE T> +uint32_t readChipMark( ExtensibleChip * i_chip, const MemRank & i_rank, + MemMark & o_mark ) { - #define PRDF_FUNC "[readChipMark<TYPE_MCA>] " + #define PRDF_FUNC "[readChipMark<T>] " uint32_t o_rc = SUCCESS; o_mark = MemMark(); // ensure invalid @@ -110,14 +114,21 @@ uint32_t readChipMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t readChipMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, MemMark & o_mark ); +template +uint32_t readChipMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MemMark & o_mark ); + //------------------------------------------------------------------------------ -template<> -uint32_t writeChipMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, - const MemMark & i_mark ) +template<TARGETING::TYPE T> +uint32_t writeChipMark( ExtensibleChip * i_chip, const MemRank & i_rank, + const MemMark & i_mark ) { - #define PRDF_FUNC "[writeChipMark<TYPE_MCA>] " + #define PRDF_FUNC "[writeChipMark<T>] " PRDF_ASSERT( i_mark.isValid() ); @@ -153,13 +164,21 @@ uint32_t writeChipMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t writeChipMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, + const MemMark & i_mark ); +template +uint32_t writeChipMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + const MemMark & i_mark ); + //------------------------------------------------------------------------------ -template<> -uint32_t clearChipMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank ) +template<TARGETING::TYPE T> +uint32_t clearChipMark( ExtensibleChip * i_chip, const MemRank & i_rank ) { - #define PRDF_FUNC "[clearChipMark<TYPE_MCA>] " + #define PRDF_FUNC "[clearChipMark<T>] " uint32_t o_rc = SUCCESS; @@ -185,13 +204,20 @@ uint32_t clearChipMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t clearChipMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank ); +template +uint32_t clearChipMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank ); + //------------------------------------------------------------------------------ -template<> -uint32_t readSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, MemMark & o_mark ) +template<TARGETING::TYPE T> +uint32_t readSymbolMark( ExtensibleChip * i_chip, + const MemRank & i_rank, MemMark & o_mark ) { - #define PRDF_FUNC "[readSymbolMark<TYPE_MCA>] " + #define PRDF_FUNC "[readSymbolMark<T>] " uint32_t o_rc = SUCCESS; o_mark = MemMark(); // ensure invalid @@ -247,14 +273,21 @@ uint32_t readSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t readSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, MemMark & o_mark ); +template +uint32_t readSymbolMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MemMark & o_mark ); + //------------------------------------------------------------------------------ -template<> -uint32_t writeSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, - const MemMark & i_mark ) +template<TARGETING::TYPE T> +uint32_t writeSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank, + const MemMark & i_mark ) { - #define PRDF_FUNC "[writeSymbolMark<TYPE_MCA>] " + #define PRDF_FUNC "[writeSymbolMark<T>] " PRDF_ASSERT( i_mark.isValid() ); @@ -294,36 +327,47 @@ uint32_t writeSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, msName, i_chip->getHuid() ); } - // Nimbus symbol mark performance workaround - // When a symbol mark is placed at runtime - #ifdef __HOSTBOOT_RUNTIME + // Nimbus only symbol mark performance workaround + if ( T == TYPE_MCA ) + { + // When a symbol mark is placed at runtime + #ifdef __HOSTBOOT_RUNTIME - // Trigger WAT logic to 'disable bypass' - // Get the ECC Debug/WAT Control register - SCAN_COMM_REGISTER_CLASS * dbgr = i_chip->getRegister( "DBGR" ); + // Trigger WAT logic to 'disable bypass' + // Get the ECC Debug/WAT Control register + SCAN_COMM_REGISTER_CLASS * dbgr = i_chip->getRegister( "DBGR" ); - // Set DBGR[8] = 0b1 - dbgr->SetBit( 8 ); - o_rc = dbgr->Write(); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "Write() failed on DBGR: mca=0x%08x", - i_chip->getHuid() ); + // Set DBGR[8] = 0b1 + dbgr->SetBit( 8 ); + o_rc = dbgr->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on DBGR: mca=0x%08x", + i_chip->getHuid() ); + } + #endif } - #endif return o_rc; #undef PRDF_FUNC } +template +uint32_t writeSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, + const MemMark & i_mark ); +template +uint32_t writeSymbolMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + const MemMark & i_mark ); + //------------------------------------------------------------------------------ -template<> -uint32_t clearSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank ) +template<TARGETING::TYPE T> +uint32_t clearSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank ) { - #define PRDF_FUNC "[clearSymbolMark<TYPE_MCA>] " + #define PRDF_FUNC "[clearSymbolMark<T>] " uint32_t o_rc = SUCCESS; @@ -349,6 +393,13 @@ uint32_t clearSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template +uint32_t clearSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank ); +template +uint32_t clearSymbolMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank ); + //############################################################################## // Utilities to read/write markstore (MBA) //############################################################################## @@ -958,7 +1009,7 @@ void __addCallout( ExtensibleChip * i_chip, const MemRank & i_rank, //------------------------------------------------------------------------------ template<TARGETING::TYPE T> -uint32_t __addRowRepairCallout( ExtensibleChip * i_chip, +uint32_t __addRowRepairCallout( TargetHandle_t i_trgt, const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc ) { @@ -967,7 +1018,7 @@ uint32_t __addRowRepairCallout( ExtensibleChip * i_chip, uint32_t o_rc = SUCCESS; // Get the dimms on this rank on either port. - TargetHandleList dimmList = getConnectedDimms( i_chip->getTrgt(), i_rank ); + TargetHandleList dimmList = getConnectedDimms( i_trgt, i_rank ); // Check for row repairs on each dimm. for ( auto const & dimm : dimmList ) @@ -1073,8 +1124,8 @@ uint32_t __applyRasPolicies<TYPE_MBA>( ExtensibleChip * i_chip, __addCallout( i_chip, i_rank, ecc, io_sc ); // Add the row repairs to the callout list if they exist - o_rc = __addRowRepairCallout<TARGETING::TYPE_MBA>( i_chip, i_rank, - io_sc ); + o_rc = __addRowRepairCallout<TARGETING::TYPE_MBA>( + i_chip->getTrgt(), i_rank, io_sc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "__addRowRepairCallout(0x%08x,0x%02x) " @@ -1136,6 +1187,125 @@ uint32_t __applyRasPolicies<TYPE_MBA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template<> +uint32_t __applyRasPolicies<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc, + const MemMark & i_chipMark, + const MemMark & i_symMark, + TdEntry * & o_dsdEvent, + bool & o_allRepairsUsed ) +{ + #define PRDF_FUNC "[__applyRasPolicies<TYPE_OCMB_CHIP>] " + + uint32_t o_rc = SUCCESS; + + do + { + const uint8_t ps = i_chipMark.getSymbol().getPortSlct(); + const uint8_t dram = i_chipMark.getSymbol().getDram(); + + TargetHandle_t memPort = getConnectedChild( i_chip->getTrgt(), + TYPE_MEM_PORT, ps ); + + TargetHandle_t dimmTrgt = getConnectedDimm( memPort, i_rank, ps ); + + const bool isX4 = isDramWidthX4( dimmTrgt ); + + // Determine if DRAM sparing is enabled. + bool isEnabled = false; + o_rc = isDramSparingEnabled<TYPE_MEM_PORT>( memPort, i_rank, ps, + isEnabled ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "isDramSparingEnabled() failed." ); + break; + } + + if ( isEnabled ) + { + // Sparing is enabled. Get the current spares in hardware. + MemSymbol sp0, sp1, ecc; + o_rc = mssGetSteerMux<TARGETING::TYPE_OCMB_CHIP>( i_chip->getTrgt(), + i_rank, sp0, sp1, + ecc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "mssGetSteerMux(0x%08x,0x%02x) failed", + i_chip->getHuid(), i_rank.getKey() ); + break; + } + + // Add the spares to the callout list if they exist. + __addCallout( i_chip, i_rank, sp0, io_sc ); + __addCallout( i_chip, i_rank, sp1, io_sc ); + __addCallout( i_chip, i_rank, ecc, io_sc ); + + // Add the row repairs to the callout list if they exist + o_rc = __addRowRepairCallout<TARGETING::TYPE_OCMB_CHIP>( memPort, + i_rank, + io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "__addRowRepairCallout(0x%08x,0x%02x) " + "failed.", i_chip->getHuid(), i_rank.getKey() ); + break; + } + + // If the chip mark is on a spare then the spare is bad and hardware + // can not steer it to another DRAM even if one is available (e.g. + // the ECC spare). In this this case, make error log predictive. + if ( ( (0 == ps) && sp0.isValid() && (dram == sp0.getDram()) ) || + ( (1 == ps) && sp1.isValid() && (dram == sp1.getDram()) ) || + ( isX4 && ecc.isValid() && (dram == ecc.getDram()) ) ) + { + o_allRepairsUsed = true; + io_sc.service_data->setSignature( i_chip->getHuid(), + PRDFSIG_VcmBadSpare ); + break; // Nothing more to do. + } + + // Certain DIMMs may have had spares intentially made unavailable by + // the manufacturer. Check the VPD for available spares. + bool spAvail, eccAvail; + o_rc = isSpareAvailable<TYPE_MEM_PORT>( memPort, i_rank, + ps, spAvail, eccAvail ); + if ( spAvail ) + { + // A spare DRAM is available. + o_dsdEvent = new DsdEvent<TYPE_OCMB_CHIP>{ i_chip, i_rank, + i_chipMark }; + } + else if ( eccAvail ) + { + // The ECC spare is available. + o_dsdEvent = new DsdEvent<TYPE_OCMB_CHIP>{ i_chip, i_rank, + i_chipMark, true }; + } + else + { + // Chip mark is in place and sparing is not possible. + o_allRepairsUsed = true; + io_sc.service_data->setSignature( i_chip->getHuid(), + PRDFSIG_AllDramRepairs ); + } + } + // There is no DRAM sparing so simply check if both the chip and symbol + // mark have been used. + else if ( i_chipMark.isValid() && i_symMark.isValid() ) + { + o_allRepairsUsed = true; + io_sc.service_data->setSignature( i_chip->getHuid(), + PRDFSIG_AllDramRepairs ); + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + //------------------------------------------------------------------------------ template<TARGETING::TYPE T> @@ -1220,6 +1390,9 @@ uint32_t applyRasPolicies( ExtensibleChip * i_chip, const MemRank & i_rank, { io_sc.service_data->setServiceCall(); + // We want to try to avoid garding NVDIMMs, so clear gard for them now. + io_sc.service_data->clearNvdimmMruListGard(); + #ifdef __HOSTBOOT_RUNTIME // No more repairs left so no point doing any more TPS procedures. MemDbUtils::banTps<T>( i_chip, i_rank ); @@ -1241,6 +1414,11 @@ uint32_t applyRasPolicies<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc, TdEntry * & o_dsdEvent ); +template +uint32_t applyRasPolicies<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc, + TdEntry * & o_dsdEvent ); //------------------------------------------------------------------------------ @@ -1290,7 +1468,8 @@ uint32_t chipMarkCleanup( ExtensibleChip * i_chip, const MemRank & i_rank, // Set the chip mark in the DRAM Repairs VPD. if ( !areDramRepairsDisabled() ) { - o_rc = setDramInVpd( i_chip, i_rank, chipMark.getSymbol() ); + o_rc = setDramInVpd( i_chip->getTrgt(), i_rank, + chipMark.getSymbol() ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "setDramInVpd(0x%08x,0x%02x) failed", @@ -1314,6 +1493,10 @@ template uint32_t chipMarkCleanup<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t chipMarkCleanup<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip, + const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc ); #endif // not supported on FSP |