summaryrefslogtreecommitdiffstats
path: root/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C
diff options
context:
space:
mode:
Diffstat (limited to 'src/usr/diag/prdf/common/plat/mem/prdfMemMark.C')
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemMark.C279
1 files changed, 231 insertions, 48 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C
index 83bff1876..e43d844c4 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C
@@ -46,7 +46,7 @@ namespace MarkStore
{
//##############################################################################
-// Utilities to read/write markstore (MCA)
+// Utilities to read/write markstore
//##############################################################################
// - We have the ability to set chip marks via the FWMSx registers, but there
@@ -62,15 +62,19 @@ namespace MarkStore
// mark per master rank. This matches the P8 behavior. This could be improved
// upon later if we have the time, but doubtful.
// - Summary:
-// - Chip marks will use HWMS0-7 registers (0x07010AD0-0x07010AD7).
-// - Symbol marks will use FWMS0-7 registers (0x07010AD8-0x07010ADF).
+// - Chip marks will use HWMS0-7 registers:
+// Nimbus: (0x07010AD0-0x07010AD7)
+// Axone: (0x08011C10-0x08011C17)
+// - Symbol marks will use FWMS0-7 registers:
+// Nimbus: (0x07010AD8-0x07010ADF)
+// Axone: (0x08011C18-0x08011C1F)
// - Each register maps to master ranks 0-7.
-template<>
-uint32_t readChipMark<TYPE_MCA>( ExtensibleChip * i_chip,
- const MemRank & i_rank, MemMark & o_mark )
+template<TARGETING::TYPE T>
+uint32_t readChipMark( ExtensibleChip * i_chip, const MemRank & i_rank,
+ MemMark & o_mark )
{
- #define PRDF_FUNC "[readChipMark<TYPE_MCA>] "
+ #define PRDF_FUNC "[readChipMark<T>] "
uint32_t o_rc = SUCCESS;
o_mark = MemMark(); // ensure invalid
@@ -110,14 +114,21 @@ uint32_t readChipMark<TYPE_MCA>( ExtensibleChip * i_chip,
#undef PRDF_FUNC
}
+template
+uint32_t readChipMark<TYPE_MCA>( ExtensibleChip * i_chip,
+ const MemRank & i_rank, MemMark & o_mark );
+template
+uint32_t readChipMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip,
+ const MemRank & i_rank,
+ MemMark & o_mark );
+
//------------------------------------------------------------------------------
-template<>
-uint32_t writeChipMark<TYPE_MCA>( ExtensibleChip * i_chip,
- const MemRank & i_rank,
- const MemMark & i_mark )
+template<TARGETING::TYPE T>
+uint32_t writeChipMark( ExtensibleChip * i_chip, const MemRank & i_rank,
+ const MemMark & i_mark )
{
- #define PRDF_FUNC "[writeChipMark<TYPE_MCA>] "
+ #define PRDF_FUNC "[writeChipMark<T>] "
PRDF_ASSERT( i_mark.isValid() );
@@ -153,13 +164,21 @@ uint32_t writeChipMark<TYPE_MCA>( ExtensibleChip * i_chip,
#undef PRDF_FUNC
}
+template
+uint32_t writeChipMark<TYPE_MCA>( ExtensibleChip * i_chip,
+ const MemRank & i_rank,
+ const MemMark & i_mark );
+template
+uint32_t writeChipMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip,
+ const MemRank & i_rank,
+ const MemMark & i_mark );
+
//------------------------------------------------------------------------------
-template<>
-uint32_t clearChipMark<TYPE_MCA>( ExtensibleChip * i_chip,
- const MemRank & i_rank )
+template<TARGETING::TYPE T>
+uint32_t clearChipMark( ExtensibleChip * i_chip, const MemRank & i_rank )
{
- #define PRDF_FUNC "[clearChipMark<TYPE_MCA>] "
+ #define PRDF_FUNC "[clearChipMark<T>] "
uint32_t o_rc = SUCCESS;
@@ -185,13 +204,20 @@ uint32_t clearChipMark<TYPE_MCA>( ExtensibleChip * i_chip,
#undef PRDF_FUNC
}
+template
+uint32_t clearChipMark<TYPE_MCA>( ExtensibleChip * i_chip,
+ const MemRank & i_rank );
+template
+uint32_t clearChipMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip,
+ const MemRank & i_rank );
+
//------------------------------------------------------------------------------
-template<>
-uint32_t readSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip,
- const MemRank & i_rank, MemMark & o_mark )
+template<TARGETING::TYPE T>
+uint32_t readSymbolMark( ExtensibleChip * i_chip,
+ const MemRank & i_rank, MemMark & o_mark )
{
- #define PRDF_FUNC "[readSymbolMark<TYPE_MCA>] "
+ #define PRDF_FUNC "[readSymbolMark<T>] "
uint32_t o_rc = SUCCESS;
o_mark = MemMark(); // ensure invalid
@@ -247,14 +273,21 @@ uint32_t readSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip,
#undef PRDF_FUNC
}
+template
+uint32_t readSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip,
+ const MemRank & i_rank, MemMark & o_mark );
+template
+uint32_t readSymbolMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip,
+ const MemRank & i_rank,
+ MemMark & o_mark );
+
//------------------------------------------------------------------------------
-template<>
-uint32_t writeSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip,
- const MemRank & i_rank,
- const MemMark & i_mark )
+template<TARGETING::TYPE T>
+uint32_t writeSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank,
+ const MemMark & i_mark )
{
- #define PRDF_FUNC "[writeSymbolMark<TYPE_MCA>] "
+ #define PRDF_FUNC "[writeSymbolMark<T>] "
PRDF_ASSERT( i_mark.isValid() );
@@ -294,36 +327,47 @@ uint32_t writeSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip,
msName, i_chip->getHuid() );
}
- // Nimbus symbol mark performance workaround
- // When a symbol mark is placed at runtime
- #ifdef __HOSTBOOT_RUNTIME
+ // Nimbus only symbol mark performance workaround
+ if ( T == TYPE_MCA )
+ {
+ // When a symbol mark is placed at runtime
+ #ifdef __HOSTBOOT_RUNTIME
- // Trigger WAT logic to 'disable bypass'
- // Get the ECC Debug/WAT Control register
- SCAN_COMM_REGISTER_CLASS * dbgr = i_chip->getRegister( "DBGR" );
+ // Trigger WAT logic to 'disable bypass'
+ // Get the ECC Debug/WAT Control register
+ SCAN_COMM_REGISTER_CLASS * dbgr = i_chip->getRegister( "DBGR" );
- // Set DBGR[8] = 0b1
- dbgr->SetBit( 8 );
- o_rc = dbgr->Write();
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "Write() failed on DBGR: mca=0x%08x",
- i_chip->getHuid() );
+ // Set DBGR[8] = 0b1
+ dbgr->SetBit( 8 );
+ o_rc = dbgr->Write();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "Write() failed on DBGR: mca=0x%08x",
+ i_chip->getHuid() );
+ }
+ #endif
}
- #endif
return o_rc;
#undef PRDF_FUNC
}
+template
+uint32_t writeSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip,
+ const MemRank & i_rank,
+ const MemMark & i_mark );
+template
+uint32_t writeSymbolMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip,
+ const MemRank & i_rank,
+ const MemMark & i_mark );
+
//------------------------------------------------------------------------------
-template<>
-uint32_t clearSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip,
- const MemRank & i_rank )
+template<TARGETING::TYPE T>
+uint32_t clearSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank )
{
- #define PRDF_FUNC "[clearSymbolMark<TYPE_MCA>] "
+ #define PRDF_FUNC "[clearSymbolMark<T>] "
uint32_t o_rc = SUCCESS;
@@ -349,6 +393,13 @@ uint32_t clearSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip,
#undef PRDF_FUNC
}
+template
+uint32_t clearSymbolMark<TYPE_MCA>( ExtensibleChip * i_chip,
+ const MemRank & i_rank );
+template
+uint32_t clearSymbolMark<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip,
+ const MemRank & i_rank );
+
//##############################################################################
// Utilities to read/write markstore (MBA)
//##############################################################################
@@ -958,7 +1009,7 @@ void __addCallout( ExtensibleChip * i_chip, const MemRank & i_rank,
//------------------------------------------------------------------------------
template<TARGETING::TYPE T>
-uint32_t __addRowRepairCallout( ExtensibleChip * i_chip,
+uint32_t __addRowRepairCallout( TargetHandle_t i_trgt,
const MemRank & i_rank,
STEP_CODE_DATA_STRUCT & io_sc )
{
@@ -967,7 +1018,7 @@ uint32_t __addRowRepairCallout( ExtensibleChip * i_chip,
uint32_t o_rc = SUCCESS;
// Get the dimms on this rank on either port.
- TargetHandleList dimmList = getConnectedDimms( i_chip->getTrgt(), i_rank );
+ TargetHandleList dimmList = getConnectedDimms( i_trgt, i_rank );
// Check for row repairs on each dimm.
for ( auto const & dimm : dimmList )
@@ -1073,8 +1124,8 @@ uint32_t __applyRasPolicies<TYPE_MBA>( ExtensibleChip * i_chip,
__addCallout( i_chip, i_rank, ecc, io_sc );
// Add the row repairs to the callout list if they exist
- o_rc = __addRowRepairCallout<TARGETING::TYPE_MBA>( i_chip, i_rank,
- io_sc );
+ o_rc = __addRowRepairCallout<TARGETING::TYPE_MBA>(
+ i_chip->getTrgt(), i_rank, io_sc );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "__addRowRepairCallout(0x%08x,0x%02x) "
@@ -1136,6 +1187,125 @@ uint32_t __applyRasPolicies<TYPE_MBA>( ExtensibleChip * i_chip,
#undef PRDF_FUNC
}
+template<>
+uint32_t __applyRasPolicies<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip,
+ const MemRank & i_rank,
+ STEP_CODE_DATA_STRUCT & io_sc,
+ const MemMark & i_chipMark,
+ const MemMark & i_symMark,
+ TdEntry * & o_dsdEvent,
+ bool & o_allRepairsUsed )
+{
+ #define PRDF_FUNC "[__applyRasPolicies<TYPE_OCMB_CHIP>] "
+
+ uint32_t o_rc = SUCCESS;
+
+ do
+ {
+ const uint8_t ps = i_chipMark.getSymbol().getPortSlct();
+ const uint8_t dram = i_chipMark.getSymbol().getDram();
+
+ TargetHandle_t memPort = getConnectedChild( i_chip->getTrgt(),
+ TYPE_MEM_PORT, ps );
+
+ TargetHandle_t dimmTrgt = getConnectedDimm( memPort, i_rank, ps );
+
+ const bool isX4 = isDramWidthX4( dimmTrgt );
+
+ // Determine if DRAM sparing is enabled.
+ bool isEnabled = false;
+ o_rc = isDramSparingEnabled<TYPE_MEM_PORT>( memPort, i_rank, ps,
+ isEnabled );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "isDramSparingEnabled() failed." );
+ break;
+ }
+
+ if ( isEnabled )
+ {
+ // Sparing is enabled. Get the current spares in hardware.
+ MemSymbol sp0, sp1, ecc;
+ o_rc = mssGetSteerMux<TARGETING::TYPE_OCMB_CHIP>( i_chip->getTrgt(),
+ i_rank, sp0, sp1,
+ ecc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "mssGetSteerMux(0x%08x,0x%02x) failed",
+ i_chip->getHuid(), i_rank.getKey() );
+ break;
+ }
+
+ // Add the spares to the callout list if they exist.
+ __addCallout( i_chip, i_rank, sp0, io_sc );
+ __addCallout( i_chip, i_rank, sp1, io_sc );
+ __addCallout( i_chip, i_rank, ecc, io_sc );
+
+ // Add the row repairs to the callout list if they exist
+ o_rc = __addRowRepairCallout<TARGETING::TYPE_OCMB_CHIP>( memPort,
+ i_rank,
+ io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "__addRowRepairCallout(0x%08x,0x%02x) "
+ "failed.", i_chip->getHuid(), i_rank.getKey() );
+ break;
+ }
+
+ // If the chip mark is on a spare then the spare is bad and hardware
+ // can not steer it to another DRAM even if one is available (e.g.
+ // the ECC spare). In this this case, make error log predictive.
+ if ( ( (0 == ps) && sp0.isValid() && (dram == sp0.getDram()) ) ||
+ ( (1 == ps) && sp1.isValid() && (dram == sp1.getDram()) ) ||
+ ( isX4 && ecc.isValid() && (dram == ecc.getDram()) ) )
+ {
+ o_allRepairsUsed = true;
+ io_sc.service_data->setSignature( i_chip->getHuid(),
+ PRDFSIG_VcmBadSpare );
+ break; // Nothing more to do.
+ }
+
+ // Certain DIMMs may have had spares intentially made unavailable by
+ // the manufacturer. Check the VPD for available spares.
+ bool spAvail, eccAvail;
+ o_rc = isSpareAvailable<TYPE_MEM_PORT>( memPort, i_rank,
+ ps, spAvail, eccAvail );
+ if ( spAvail )
+ {
+ // A spare DRAM is available.
+ o_dsdEvent = new DsdEvent<TYPE_OCMB_CHIP>{ i_chip, i_rank,
+ i_chipMark };
+ }
+ else if ( eccAvail )
+ {
+ // The ECC spare is available.
+ o_dsdEvent = new DsdEvent<TYPE_OCMB_CHIP>{ i_chip, i_rank,
+ i_chipMark, true };
+ }
+ else
+ {
+ // Chip mark is in place and sparing is not possible.
+ o_allRepairsUsed = true;
+ io_sc.service_data->setSignature( i_chip->getHuid(),
+ PRDFSIG_AllDramRepairs );
+ }
+ }
+ // There is no DRAM sparing so simply check if both the chip and symbol
+ // mark have been used.
+ else if ( i_chipMark.isValid() && i_symMark.isValid() )
+ {
+ o_allRepairsUsed = true;
+ io_sc.service_data->setSignature( i_chip->getHuid(),
+ PRDFSIG_AllDramRepairs );
+ }
+
+ } while (0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
//------------------------------------------------------------------------------
template<TARGETING::TYPE T>
@@ -1220,6 +1390,9 @@ uint32_t applyRasPolicies( ExtensibleChip * i_chip, const MemRank & i_rank,
{
io_sc.service_data->setServiceCall();
+ // We want to try to avoid garding NVDIMMs, so clear gard for them now.
+ io_sc.service_data->clearNvdimmMruListGard();
+
#ifdef __HOSTBOOT_RUNTIME
// No more repairs left so no point doing any more TPS procedures.
MemDbUtils::banTps<T>( i_chip, i_rank );
@@ -1241,6 +1414,11 @@ uint32_t applyRasPolicies<TYPE_MBA>( ExtensibleChip * i_chip,
const MemRank & i_rank,
STEP_CODE_DATA_STRUCT & io_sc,
TdEntry * & o_dsdEvent );
+template
+uint32_t applyRasPolicies<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip,
+ const MemRank & i_rank,
+ STEP_CODE_DATA_STRUCT & io_sc,
+ TdEntry * & o_dsdEvent );
//------------------------------------------------------------------------------
@@ -1290,7 +1468,8 @@ uint32_t chipMarkCleanup( ExtensibleChip * i_chip, const MemRank & i_rank,
// Set the chip mark in the DRAM Repairs VPD.
if ( !areDramRepairsDisabled() )
{
- o_rc = setDramInVpd( i_chip, i_rank, chipMark.getSymbol() );
+ o_rc = setDramInVpd( i_chip->getTrgt(), i_rank,
+ chipMark.getSymbol() );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "setDramInVpd(0x%08x,0x%02x) failed",
@@ -1314,6 +1493,10 @@ template
uint32_t chipMarkCleanup<TYPE_MBA>( ExtensibleChip * i_chip,
const MemRank & i_rank,
STEP_CODE_DATA_STRUCT & io_sc );
+template
+uint32_t chipMarkCleanup<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip,
+ const MemRank & i_rank,
+ STEP_CODE_DATA_STRUCT & io_sc );
#endif // not supported on FSP
OpenPOWER on IntegriCloud