summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZane Shelley <zshelle@us.ibm.com>2018-05-20 15:30:09 -0500
committerZane C. Shelley <zshelle@us.ibm.com>2018-05-23 15:11:41 -0400
commitb8037fcdbe638eff35b075b571f4c709689a2b21 (patch)
tree36b8e517864fa0938a9bb9fb7ffb3c84f1f733c0
parentc7867f1449a1434338c2513c90b4a40438fa94d2 (diff)
downloadtalos-hostboot-b8037fcdbe638eff35b075b571f4c709689a2b21.tar.gz
talos-hostboot-b8037fcdbe638eff35b075b571f4c709689a2b21.zip
PRD: cleanup after placing/verifying a chip mark on Centaur
Change-Id: I0688d00875ce97595a18b3338aff3f8f59e19ff3 RTC: 193261 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/59117 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/59233 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C17
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemMark.C287
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemMark.H67
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C81
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemDsd.H9
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C24
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemVcm.H34
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C40
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemVcm_rt.C232
9 files changed, 455 insertions, 336 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C
index 467441c5a..bf0508d70 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C
@@ -1111,25 +1111,14 @@ uint32_t analyzeImpe<TYPE_MCA>( ExtensibleChip * i_chip,
break;
}
- o_rc = MarkStore::balance<TYPE_MCA>( i_chip, rank, io_sc );
+ o_rc = MarkStore::chipMarkCleanup<TYPE_MCA>( i_chip, rank,
+ io_sc );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "balance(0x%08x,0x%02x) failed",
+ PRDF_ERR( PRDF_FUNC "chipMarkCleanup(0x%08x,0x%02x) failed",
i_chip->getHuid(), rank.getKey() );
break;
}
-
- // Set the dram in DRAM Repairs VPD.
- o_rc = setDramInVpd<TYPE_MCA>( i_chip, rank, symbol );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "setDramInVpd(0x%08x,0x%02x) failed",
- i_chip->getHuid(), rank.getKey() );
- break;
- }
-
- // Add a DRAM sparing procedure to the queue, if supported.
- // TODO: RTC 157888
}
}
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C
index 84e7c09b5..35f7803e7 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C
@@ -30,6 +30,8 @@
#include <prdfMemDbUtils.H>
#ifdef __HOSTBOOT_MODULE
+#include <prdfCenMbaExtraSig.H>
+#include <prdfMemDsd.H>
#include <prdfMemVcm.H>
#endif
@@ -872,7 +874,290 @@ uint32_t writeSymbolMark<TYPE_MBA>( ExtensibleChip * i_chip,
#undef PRDF_FUNC
}
-//------------------------------------------------------------------------------
+//##############################################################################
+// Utilities to cleanup markstore after a chip mark is verified
+//##############################################################################
+
+#ifdef __HOSTBOOT_MODULE // Not supported on FSP.
+
+template<TARGETING::TYPE T>
+uint32_t __applyRasPolicies( ExtensibleChip * i_chip, const MemRank & i_rank,
+ STEP_CODE_DATA_STRUCT & io_sc,
+ const MemMark & i_chipMark,
+ const MemMark & i_symMark );
+
+template<>
+uint32_t __applyRasPolicies<TYPE_MCA>( ExtensibleChip * i_chip,
+ const MemRank & i_rank,
+ STEP_CODE_DATA_STRUCT & io_sc,
+ const MemMark & i_chipMark,
+ const MemMark & i_symMark )
+{
+ // There is no DRAM sparing on Nimbus so simply check if both the chip and
+ // symbol mark have been used.
+ if ( i_chipMark.isValid() && i_symMark.isValid() )
+ {
+ io_sc.service_data->setServiceCall();
+ io_sc.service_data->setSignature( i_chip->getHuid(),
+ PRDFSIG_AllDramRepairs );
+
+ #ifdef __HOSTBOOT_RUNTIME
+ // No more repairs left so no point doing any more TPS procedures.
+ MemDbUtils::banTps<TYPE_MCA>( i_chip, i_rank );
+ #endif
+ }
+
+ return SUCCESS;
+}
+
+template<>
+uint32_t __applyRasPolicies<TYPE_MBA>( ExtensibleChip * i_chip,
+ const MemRank & i_rank,
+ STEP_CODE_DATA_STRUCT & io_sc,
+ const MemMark & i_chipMark,
+ const MemMark & i_symMark )
+{
+ #define PRDF_FUNC "[__applyRasPolicies<TYPE_MBA>] "
+
+ uint32_t o_rc = SUCCESS;
+
+ bool allRepairsUsed = false;
+
+ do
+ {
+ const uint8_t ps = i_chipMark.getSymbol().getPortSlct();
+ const uint8_t dram = i_chipMark.getSymbol().getDram();
+
+ const bool isX4 = isDramWidthX4( i_chip->getTrgt() );
+
+ // Determine if DRAM sparing is enabled.
+ bool isEnabled = isX4; // Always an ECC spare in x4 mode.
+
+ if ( !isEnabled )
+ {
+ /* TODO RTC 189221
+ // Check for any DRAM spares.
+ uint8_t cnfg = ENUM_ATTR_VPD_DIMM_SPARE_NO_SPARE;
+ o_rc = getDimmSpareConfig<TYPE_MBA>( i_chip, i_rank, ps, cnfg );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "getDimmSpareConfig(0x%08x,0x%02x,%d) "
+ "failed", i_chip->getHuid(), i_rank.getKey(), ps );
+ break;
+ }
+ isEnabled = (ENUM_ATTR_VPD_DIMM_SPARE_NO_SPARE != cnfg);
+ */
+ }
+
+ if ( isEnabled )
+ {
+ // Sparing is enabled. Get the current spares in hardware.
+ MemSymbol sp0, sp1, ecc;
+ /* TODO RTC 189221
+ o_rc = mssGetSteerMux<TYPE_MBA>( i_chip, i_rank, sp0, sp1, ecc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "mssGetSteerMux(0x%08x,0x%02x) failed",
+ i_chip->getHuid(), i_rank.getKey() );
+ break;
+ }
+ */
+
+ // Add the spares to the callout list if they exist.
+ if ( sp0.isValid() )
+ {
+ MemoryMru mm { i_chip->getTrgt(), i_rank, sp0 };
+ io_sc.service_data->SetCallout( mm );
+ }
+ if ( sp1.isValid() )
+ {
+ MemoryMru mm { i_chip->getTrgt(), i_rank, sp1 };
+ io_sc.service_data->SetCallout( mm );
+ }
+ if ( ecc.isValid() )
+ {
+ MemoryMru mm { i_chip->getTrgt(), i_rank, ecc };
+ io_sc.service_data->SetCallout( mm );
+ }
+
+ // If the chip mark is on a spare then the spare is bad and hardware
+ // can not steer it to another DRAM even if one is available (e.g.
+ // the ECC spare). In this this case, make error log predictive.
+ if ( ( dram == (0 == ps ? sp0.getDram() : sp1.getDram()) ) ||
+ ( dram == ecc.getDram() ) )
+ {
+ allRepairsUsed = true;
+ io_sc.service_data->setSignature( i_chip->getHuid(),
+ PRDFSIG_VcmBadSpare );
+ break; // Nothing more to do.
+ }
+
+ // Certain DIMMs may have had spares intentially made unavailable by
+ // the manufacturer. Check the VPD for available spares.
+ bool dramSparePossible = false;
+ bool eccSparePossible = false;
+ /* TODO RTC 189221
+ o_rc = bitmap.isSpareAvailable( ps, dramSparePossible,
+ eccSparePossible );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "isDramSpareAvailable() failed" );
+ break;
+ }
+ */
+
+ if ( dramSparePossible &&
+ (0 == ps ? !sp0.isValid() : !sp1.isValid()) )
+ {
+ // A spare DRAM is available.
+ TdEntry * e = new DsdEvent<TYPE_MBA>{ i_chip, i_rank,
+ i_chipMark };
+ MemDbUtils::pushToQueue<TYPE_MBA>( i_chip, e );
+ }
+ else if ( eccSparePossible && !ecc.isValid() )
+ {
+ // The ECC spare is available.
+ TdEntry * e = new DsdEvent<TYPE_MBA>{ i_chip, i_rank,
+ i_chipMark, true };
+ MemDbUtils::pushToQueue<TYPE_MBA>( i_chip, e );
+ }
+ else
+ {
+ // Chip mark is in place and sparing is not possible.
+ allRepairsUsed = true;
+ io_sc.service_data->setSignature( i_chip->getHuid(),
+ PRDFSIG_AllDramRepairs );
+ }
+ }
+ // There is no DRAM sparing so simply check if both the chip and symbol
+ // mark have been used.
+ else if ( i_chipMark.isValid() && i_symMark.isValid() )
+ {
+ allRepairsUsed = true;
+ io_sc.service_data->setSignature( i_chip->getHuid(),
+ PRDFSIG_AllDramRepairs );
+ }
+
+ } while (0);
+
+ if ( allRepairsUsed )
+ {
+ io_sc.service_data->setServiceCall();
+
+ #ifdef __HOSTBOOT_RUNTIME
+ // No more repairs left so no point doing any more TPS procedures.
+ MemDbUtils::banTps<TYPE_MCA>( i_chip, i_rank );
+ #endif
+ }
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+template<TARGETING::TYPE T>
+uint32_t chipMarkCleanup( ExtensibleChip * i_chip, const MemRank & i_rank,
+ STEP_CODE_DATA_STRUCT & io_sc )
+{
+ #define PRDF_FUNC "[chipMarkCleanup] "
+
+ uint32_t o_rc = SUCCESS;
+
+ do
+ {
+ // Get the chip mark.
+ MemMark chipMark;
+ o_rc = readChipMark<T>( i_chip, i_rank, chipMark );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "readChipMark(0x%08x,0x%02x) failed",
+ i_chip->getHuid(), i_rank.getKey() );
+ break;
+ }
+
+ // There is nothing else to do if there is no chip mark.
+ if ( !chipMark.isValid() ) break;
+
+ // Add the chip mark to the callout list.
+ MemoryMru cm_mm { i_chip->getTrgt(), i_rank, chipMark.getSymbol() };
+ io_sc.service_data->SetCallout( cm_mm );
+
+ // Get the symbol mark.
+ MemMark symMark;
+ o_rc = readSymbolMark<T>( i_chip, i_rank, symMark );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "readSymbolMark(0x%08x,0x%02x) failed",
+ i_chip->getHuid(), i_rank.getKey() );
+ break;
+ }
+
+ // If both the chip and symbol mark are on the same DRAM, clear the
+ // symbol mark.
+ if ( chipMark.getSymbol().getDram() == symMark.getSymbol().getDram() )
+ {
+ o_rc = clearSymbolMark<T>( i_chip, i_rank );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "clearSymbolMark(0x%08x,0x%02x) failed",
+ i_chip->getHuid(), i_rank.getKey() );
+ break;
+ }
+
+ // Reset the symbol mark variable to invalid.
+ symMark = MemMark();
+ }
+
+ // Add the symbol mark to the callout list if it exists.
+ if ( symMark.isValid() )
+ {
+ MemoryMru sm_mm { i_chip->getTrgt(), i_rank, symMark.getSymbol() };
+ io_sc.service_data->SetCallout( sm_mm );
+ }
+
+ // Make the error log predictive and exit if DRAM repairs are disabled.
+ if ( areDramRepairsDisabled() )
+ {
+ io_sc.service_data->setServiceCall();
+ break; // nothing else to do
+ }
+
+ // Set the chip mark in the DRAM Repairs VPD.
+ o_rc = setDramInVpd<TYPE_MCA>( i_chip, i_rank, chipMark.getSymbol() );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "setDramInVpd(0x%08x,0x%02x) failed",
+ i_chip->getHuid(), i_rank.getKey() );
+ break;
+ }
+
+ // Apply RAS policies.
+ o_rc = __applyRasPolicies<T>( i_chip, i_rank, io_sc, chipMark,
+ symMark );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "__applyRasPolicies(0x%08x,0x%02x) failed",
+ i_chip->getHuid(), i_rank.getKey() );
+ break;
+ }
+
+ } while (0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+template
+uint32_t chipMarkCleanup<TYPE_MCA>( ExtensibleChip * i_chip,
+ const MemRank & i_rank,
+ STEP_CODE_DATA_STRUCT & io_sc );
+template
+uint32_t chipMarkCleanup<TYPE_MBA>( ExtensibleChip * i_chip,
+ const MemRank & i_rank,
+ STEP_CODE_DATA_STRUCT & io_sc );
+
+#endif // not supported on FSP
} // end namespace MarkStore
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.H b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.H
index 8ea692ea9..fca039258 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.H
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.H
@@ -174,62 +174,29 @@ template<TARGETING::TYPE T>
uint32_t clearSymbolMark( ExtensibleChip * i_chip, const MemRank & i_rank );
/**
- * @brief If a rank contains a symbol mark that is on the same DRAM as the chip
- * mark, the symbol mark is removed. This is done to free up available
- * repairs. Will also apply RAS policies where necessary.
+ * @brief If a chip mark has been verified or explicitly set due other RAS
+ * policies, this function does all the necessary cleanup.
+ *
+ * Function details:
+ * - Adds the chip mark to the callout list.
+ * - Removes the symbol mark if it is on the same DRAM as the chip mark.
+ * - Adds the symbol mark to the callout list if it exists on another DRAM.
+ * - If DRAM repairs are disabled:
+ * - Makes the error log predictive.
+ * - Otherwise:
+ * - Sets the DRAM in the DRAM Repair VPD.
+ * - Makes the error log predictive if RAS policies apply.
+ * - Adds a DSD procedure to the TD queue is a DRAM spare is available.
+ * - Bans TPS on the rank if all repairs are used.
+ *
* @param i_chip MBA or MCA chip.
* @param i_rank Target rank.
* @param io_sc The step code data struct.
* @return Non-SUCCESS if an internal function fails. SUCCESS otherwise.
*/
template<TARGETING::TYPE T>
-uint32_t balance( ExtensibleChip * i_chip, const MemRank & i_rank,
- STEP_CODE_DATA_STRUCT & io_sc )
-{
- uint32_t o_rc = SUCCESS;
-
- do
- {
- // Get the chip mark.
- MemMark chipMark;
- o_rc = readChipMark<T>( i_chip, i_rank, chipMark );
- if ( SUCCESS != o_rc ) break;
- if ( !chipMark.isValid() ) break; // nothing to do.
-
- // Get the symbol mark.
- MemMark symMark;
- o_rc = readSymbolMark<T>( i_chip, i_rank, symMark );
- if ( SUCCESS != o_rc ) break;
- if ( !symMark.isValid() ) break; // nothing to do.
-
- // If both the chip and symbol mark are on the same DRAM, clear the
- // symbol mark.
- if ( chipMark.getSymbol().getDram() == symMark.getSymbol().getDram() )
- {
- o_rc = clearSymbolMark<T>( i_chip, i_rank );
- if ( SUCCESS != o_rc ) break;
- }
- else
- {
- // Both a chip and symbol mark exist, but they are on separate
- // DRAMs. So, make the error log predictive.
- io_sc.service_data->setServiceCall();
- io_sc.service_data->setSignature( i_chip->getHuid(),
- PRDFSIG_AllDramRepairs );
-
- // The chip and symbol mark may be on different DIMMs (Centaur ranks
- // span two DIMMs). Therefore, we must add both to the callout list
- // to ensure all DIMMs are in the callout list.
- MemoryMru cm_mm { i_chip->getTrgt(), i_rank, chipMark.getSymbol() };
- MemoryMru sm_mm { i_chip->getTrgt(), i_rank, symMark.getSymbol() };
- io_sc.service_data->SetCallout( cm_mm );
- io_sc.service_data->SetCallout( sm_mm );
- }
-
- } while (0);
-
- return o_rc;
-}
+uint32_t chipMarkCleanup( ExtensibleChip * i_chip, const MemRank & i_rank,
+ STEP_CODE_DATA_STRUCT & io_sc );
} // end namespace MarkStore
diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C
index 88fd4dc00..f3ee6884f 100755
--- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C
+++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C
@@ -596,87 +596,6 @@ int32_t CenMbaTdCtlr::startTpsPhase1( STEP_CODE_DATA_STRUCT & io_sc )
//------------------------------------------------------------------------------
-int32_t CenMbaTdCtlr::addTdQueueEntryTPS( const CenRank & i_rank,
- STEP_CODE_DATA_STRUCT & io_sc,
- bool i_banTps )
-{
- #define PRDF_FUNC "[CenMbaTdCtlr::addTdQueueEntryTPS] "
-
- int32_t o_rc = SUCCESS;
-
- do
- {
- if ( iv_tpsRankData.isBanned(i_rank, io_sc) )
- {
- // TPS is banned, do not add the request to the queue.
- break;
- }
-
- // Check for any available repairs. There is no point doing TPS if we
- // cannot apply a repair.
- CenMark mark;
- o_rc = mssGetMarkStore( iv_mbaTrgt, i_rank, mark );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "mssGetMarkStore() failed." );
- break;
- }
- if ( mark.getCM().isValid() &&
- (iv_x4Dimm || (!iv_x4Dimm && mark.getSM().isValid())) )
- {
- bool port0Available, port1Available;
- o_rc = checkForAvailableSpares( 0, port0Available );
- o_rc |= checkForAvailableSpares( 1, port1Available );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "checkForAvailableSpares() failed." );
- break;
- }
-
- if ( !port0Available && !port1Available )
- {
- // Ban TPS to avoid rechecking with subsequent TPS requests.
- iv_tpsRankData.ban( iv_rank );
-
- // TPS is banned, do not add the request to the queue.
- break;
- }
- }
-
- if ( i_banTps )
- {
- // Ban all future TPS requests for this rank (not including
- // this one).
- iv_tpsRankData.ban( i_rank );
- }
-
- // Push the TD request to the queue.
- iv_queue.push( TdQueueEntry(TPS_EVENT, i_rank) );
-
- // Mark this rank as bad.
- o_rc = iv_masterRanks.setBad( i_rank );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "setBad() failed" );
- break;
- }
-
- } while(0);
-
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "Failed: i_rank=m%ds%d i_banTps=%c",
- i_rank.getMaster(), i_rank.getSlave(),
- i_banTps ? 'T' : 'F' );
- }
-
- return o_rc;
-
- #undef PRDF_FUNC
-}
-
-//------------------------------------------------------------------------------
-
int32_t CenMbaTdCtlr::handleUe_Td( STEP_CODE_DATA_STRUCT & io_sc,
const CenAddr & i_stopAddr,
bool i_addTpsRequest )
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemDsd.H b/src/usr/diag/prdf/plat/mem/prdfMemDsd.H
index de1816927..73f133832 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemDsd.H
+++ b/src/usr/diag/prdf/plat/mem/prdfMemDsd.H
@@ -51,8 +51,9 @@ class DsdEvent : public TdEntry
* @param i_rank Rank reporting chip mark.
*/
DsdEvent<T>( ExtensibleChip * i_chip, const MemRank & i_rank,
- const MemMark & i_mark ) :
- TdEntry(DSD_EVENT, i_chip, i_rank), iv_mark(i_mark)
+ const MemMark & i_mark, bool i_eccSpare = false ) :
+ TdEntry(DSD_EVENT, i_chip, i_rank), iv_mark(i_mark),
+ iv_eccSpare(i_eccSpare)
{
PRDF_ASSERT( nullptr != i_chip );
PRDF_ASSERT( T == i_chip->getType() );
@@ -166,7 +167,9 @@ class DsdEvent : public TdEntry
private: // instance variables
- const MemMark iv_mark; ///< The chip mark from hardware.
+ const MemMark iv_mark; ///< The chip mark from hardware.
+ const bool iv_eccSpare; ///< True if the spare should be applied to the x4
+ ///< DRAM ECC spare.
};
} // end namespace PRDF
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C
index 57443c036..97cfeb557 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C
@@ -947,16 +947,6 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
}
}
- // We may have placed a chip mark, so if a symbol mark is being used on
- // the same chip, undo the symbol mark after the chip mark is in place.
- o_rc = MarkStore::balance<TYPE_MCA>( iv_chip, iv_rank, io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "MarkStore::balance(0x%08x,0x%02x) failed",
- iv_chip->getHuid(), getKey() );
- break;
- }
-
// Write any updates to VPD.
o_rc = setBadDqBitmap<DIMMS_PER_RANK::MCA>(mcaTrgt, iv_rank, dqBitmap);
if ( SUCCESS != o_rc )
@@ -966,7 +956,19 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
iv_rank.getKey());
break;
}
- }while(0);
+
+ // We may have placed a chip mark so do any necessary cleanup. This must
+ // be called after writing the bad DQ bitmap because the this function
+ // will also write it if necessary.
+ o_rc = MarkStore::chipMarkCleanup<TYPE_MCA>( iv_chip, iv_rank, io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "MarkStore::chipMarkCleanup(0x%08x,0x%02x) "
+ "failed", iv_chip->getHuid(), getKey() );
+ break;
+ }
+
+ } while (0);
return o_rc;
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemVcm.H b/src/usr/diag/prdf/plat/mem/prdfMemVcm.H
index ccfa4475c..0de5f2fcf 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemVcm.H
+++ b/src/usr/diag/prdf/plat/mem/prdfMemVcm.H
@@ -277,19 +277,11 @@ class VcmEvent : public TdEntry
io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_VcmVerified );
- if ( PlatServices::areDramRepairsDisabled() )
+ // Leave the chip mark in place and do any necessary cleanup.
+ o_rc = cleanup( io_sc );
+ if ( SUCCESS != o_rc )
{
- // Make the error log predictive, nothing else to do.
- io_sc.service_data->setServiceCall();
- }
- else
- {
- // Leave the chip mark in place and do any necessary cleanup.
- o_rc = cleanup( io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "cleanup() failed" );
- }
+ PRDF_ERR( PRDF_FUNC "cleanup() failed" );
}
return o_rc;
@@ -305,7 +297,23 @@ class VcmEvent : public TdEntry
* @param io_sc The step code data struct.
* @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
*/
- uint32_t cleanup( STEP_CODE_DATA_STRUCT & io_sc );
+ uint32_t cleanup( STEP_CODE_DATA_STRUCT & io_sc )
+ {
+ #define PRDF_FUNC "[VcmEvent::cleanup] "
+
+ uint32_t o_rc = SUCCESS;
+
+ o_rc = MarkStore::chipMarkCleanup<T>( iv_chip, iv_rank, io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "chipMarkCleanup(0x%08x,0x%02x) failed",
+ iv_chip->getHuid(), iv_rank.getKey() );
+ }
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+ }
/**
* @brief Verification failed. Do additional processing such as removing
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C
index 69e8a26e6..d3de6d20d 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C
@@ -83,46 +83,6 @@ uint32_t VcmEvent<T>::falseAlarm( STEP_CODE_DATA_STRUCT & io_sc )
//------------------------------------------------------------------------------
template<TARGETING::TYPE T>
-uint32_t VcmEvent<T>::cleanup( STEP_CODE_DATA_STRUCT & io_sc )
-{
- #define PRDF_FUNC "[VcmEvent::cleanup] "
-
- uint32_t o_rc = SUCCESS;
-
- do
- {
- // If there is a symbol mark on the same DRAM as the newly verified chip
- // mark, remove the symbol mark.
- o_rc = MarkStore::balance<T>( iv_chip, iv_rank, io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "MarkStore::balance(0x%08x,0x%02x) failed",
- iv_chip->getHuid(), getKey() );
- break;
- }
-
- // Set the dram in DRAM Repairs VPD.
- o_rc = setDramInVpd<T>( iv_chip, iv_rank, iv_mark.getSymbol() );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "setDramInVpd(0x%08x,0x%02x) failed",
- iv_chip->getHuid(), iv_rank.getKey() );
- break;
- }
-
- // Add a DRAM sparing procedure to the queue, if supported.
- // TODO: RTC 157888
-
- } while (0);
-
- return o_rc;
-
- #undef PRDF_FUNC
-}
-
-//------------------------------------------------------------------------------
-
-template<TARGETING::TYPE T>
bool __iueCheck( uint32_t i_eccAttns );
template<> inline
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemVcm_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemVcm_rt.C
index 64f760486..b7cb13653 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemVcm_rt.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemVcm_rt.C
@@ -62,129 +62,6 @@ VcmFalseAlarm * __getFalseAlarmCounter<TYPE_MBA>( ExtensibleChip * i_chip )
//##############################################################################
//
-// Generic template functions
-//
-//##############################################################################
-
-template<TARGETING::TYPE T>
-uint32_t VcmEvent<T>::falseAlarm( STEP_CODE_DATA_STRUCT & io_sc )
-{
- #define PRDF_FUNC "[VcmEvent::falseAlarm] "
-
- uint32_t o_rc = SUCCESS;
-
- PRDF_TRAC( PRDF_FUNC "Chip mark false alarm: 0x%08x,0x%02x",
- iv_chip->getHuid(), getKey() );
-
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_VcmFalseAlarm );
-
- do
- {
- // If DRAM repairs are disabled, make the error log predictive.
- if ( areDramRepairsDisabled() )
- {
- io_sc.service_data->setServiceCall();
- break; // Nothing more to do.
- }
-
- // Increment the false alarm counter and check threshold.
- uint8_t dram = iv_mark.getSymbol().getDram();
- if ( __getFalseAlarmCounter<T>(iv_chip)->inc(iv_rank, dram, io_sc) )
- {
- // False alarm threshold has been reached.
-
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_VcmFalseAlarmTH );
-
- PRDF_TRAC( PRDF_FUNC "False alarm threshold: 0x%08x,0x%02x",
- iv_chip->getHuid(), getKey() );
-
- // Leave the chip mark in place and do any necessary cleanup.
- o_rc = cleanup( io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "cleanup() failed" );
- break;
- }
- }
- else
- {
- // Remove the chip mark.
- o_rc = MarkStore::clearChipMark<T>( iv_chip, iv_rank );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "clearChipMark(0x%08x,0x%02x) failed",
- iv_chip->getHuid(), getKey() );
- break;
- }
- }
-
- } while (0);
-
- return o_rc;
-
- #undef PRDF_FUNC
-}
-
-//------------------------------------------------------------------------------
-
-template<TARGETING::TYPE T>
-uint32_t VcmEvent<T>::cleanup( STEP_CODE_DATA_STRUCT & io_sc )
-{
- #define PRDF_FUNC "[VcmEvent::cleanup] "
-
- uint32_t o_rc = SUCCESS;
-
- do
- {
- // If there is a symbol mark on the same DRAM as the newly verified chip
- // mark, remove the symbol mark.
- o_rc = MarkStore::balance<T>( iv_chip, iv_rank, io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "MarkStore::balance(0x%08x,0x%02x) failed",
- iv_chip->getHuid(), getKey() );
- break;
- }
-
- // Set the dram in DRAM Repairs VPD.
- o_rc = setDramInVpd<T>( iv_chip, iv_rank, iv_mark.getSymbol() );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "setDramInVpd(0x%08x,0x%02x) failed",
- iv_chip->getHuid(), iv_rank.getKey() );
- break;
- }
-
- // Add a DRAM sparing procedure to the queue, if supported.
- // TODO: RTC 157888
-
- // The cleanup() function is called by both verified() and falseAlarm().
- // In either case, we can pass in the DRAM characterized by iv_mark to
- // determine if there has been a least one false alarm on any DRAM on
- // this rank other than this DRAM. If so, the error log should be
- // predictive.
- VcmFalseAlarm * faCntr = __getFalseAlarmCounter<T>(iv_chip);
- uint8_t dram = iv_mark.getSymbol().getDram();
- if ( faCntr->queryDrams(iv_rank, dram, io_sc) )
- io_sc.service_data->setServiceCall();
-
- } while (0);
-
- return o_rc;
-
- #undef PRDF_FUNC
-}
-
-//------------------------------------------------------------------------------
-
-// Avoid linker errors with the template.
-template class VcmEvent<TYPE_MCA>;
-template class VcmEvent<TYPE_MBA>;
-
-//##############################################################################
-//
// Specializations for MCA
//
//##############################################################################
@@ -288,6 +165,42 @@ uint32_t VcmEvent<TYPE_MCA>::checkEcc( const uint32_t & i_eccAttns,
#undef PRDF_FUNC
}
+//------------------------------------------------------------------------------
+
+template<>
+uint32_t VcmEvent<TYPE_MCA>::cleanup( STEP_CODE_DATA_STRUCT & io_sc )
+{
+ #define PRDF_FUNC "[VcmEvent::cleanup] "
+
+ uint32_t o_rc = SUCCESS;
+
+ do
+ {
+ o_rc = MarkStore::chipMarkCleanup<TYPE_MCA>( iv_chip, iv_rank, io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "chipMarkCleanup(0x%08x,0x%02x) failed",
+ iv_chip->getHuid(), iv_rank.getKey() );
+ break;
+ }
+
+ // The cleanup() function is called by both verified() and falseAlarm().
+ // In either case, the error log should be predictive if there has been
+ // a least one false alarm on any DRAM on this rank other than this
+ // DRAM. This is required on Nimbus because of two symbol correction,
+ // which does not exist on Centaur.
+ VcmFalseAlarm * faCntr = __getFalseAlarmCounter<TYPE_MCA>(iv_chip);
+ uint8_t dram = iv_mark.getSymbol().getDram();
+ if ( faCntr->queryDrams(iv_rank, dram, io_sc) )
+ io_sc.service_data->setServiceCall();
+
+ } while (0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
//##############################################################################
//
// Specializations for MBA
@@ -400,6 +313,79 @@ uint32_t VcmEvent<TYPE_MBA>::checkEcc( const uint32_t & i_eccAttns,
#undef PRDF_FUNC
}
+//##############################################################################
+//
+// Generic template functions
+//
+//##############################################################################
+
+template<TARGETING::TYPE T>
+uint32_t VcmEvent<T>::falseAlarm( STEP_CODE_DATA_STRUCT & io_sc )
+{
+ #define PRDF_FUNC "[VcmEvent::falseAlarm] "
+
+ uint32_t o_rc = SUCCESS;
+
+ PRDF_TRAC( PRDF_FUNC "Chip mark false alarm: 0x%08x,0x%02x",
+ iv_chip->getHuid(), getKey() );
+
+ io_sc.service_data->setSignature( iv_chip->getHuid(),
+ PRDFSIG_VcmFalseAlarm );
+
+ do
+ {
+ // If DRAM repairs are disabled, make the error log predictive.
+ if ( areDramRepairsDisabled() )
+ {
+ io_sc.service_data->setServiceCall();
+ break; // Nothing more to do.
+ }
+
+ // Increment the false alarm counter and check threshold.
+ uint8_t dram = iv_mark.getSymbol().getDram();
+ if ( __getFalseAlarmCounter<T>(iv_chip)->inc(iv_rank, dram, io_sc) )
+ {
+ // False alarm threshold has been reached.
+
+ io_sc.service_data->setSignature( iv_chip->getHuid(),
+ PRDFSIG_VcmFalseAlarmTH );
+
+ PRDF_TRAC( PRDF_FUNC "False alarm threshold: 0x%08x,0x%02x",
+ iv_chip->getHuid(), getKey() );
+
+ // Leave the chip mark in place and do any necessary cleanup.
+ o_rc = cleanup( io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "cleanup() failed" );
+ break;
+ }
+ }
+ else
+ {
+ // Remove the chip mark.
+ o_rc = MarkStore::clearChipMark<T>( iv_chip, iv_rank );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "clearChipMark(0x%08x,0x%02x) failed",
+ iv_chip->getHuid(), getKey() );
+ break;
+ }
+ }
+
+ } while (0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
+
+// Avoid linker errors with the template.
+template class VcmEvent<TYPE_MCA>;
+template class VcmEvent<TYPE_MBA>;
+
//------------------------------------------------------------------------------
} // end namespace PRDF
OpenPOWER on IntegriCloud