summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorCaleb Palmer <cnpalmer@us.ibm.com>2019-09-25 14:31:16 -0500
committerZane C Shelley <zshelle@us.ibm.com>2019-10-04 12:57:48 -0500
commite017df4c986601fab97f6c5a5c488a0ffeef12e1 (patch)
treec4aa3d6fc6bef1e08693ded703cbc1b107f1d955 /src
parent195dd6a1589e10d837d8d626da72e7c66dbad58d (diff)
downloadtalos-hostboot-e017df4c986601fab97f6c5a5c488a0ffeef12e1.tar.gz
talos-hostboot-e017df4c986601fab97f6c5a5c488a0ffeef12e1.zip
PRD: Axone Two Phase Scrub (TPS) support
Change-Id: If2540907b7ceb51612b0a68142d54836b89f1e13 RTC: 208263 Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/84537 Reviewed-by: Benjamen G Tyner <ben.tyner@ibm.com> Reviewed-by: Brian J Stegmiller <bjs@us.ibm.com> Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Zane C Shelley <zshelle@us.ibm.com> Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/84783 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src')
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C97
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C1153
2 files changed, 196 insertions, 1054 deletions
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C
index 6aaa2702a..d9ebc24a5 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C
@@ -295,53 +295,6 @@ uint32_t TpsEvent<TYPE_MCA>::startCmd()
#undef PRDF_FUNC
}
-template<>
-uint32_t TpsEvent<TYPE_OCMB_CHIP>::startCmd()
-{
- #define PRDF_FUNC "[TpsEvent::startCmd] "
-
- uint32_t o_rc = SUCCESS;
-
- PRDF_ERR( PRDF_FUNC "Function not supported yet" );
- /* TODO RTC 208263
- // We don't need to set any stop-on-error conditions or thresholds for
- // soft/inter/hard CEs during Memory Diagnostics. The design is to let the
- // command continue to the end of the rank and we do diagnostics on the
- // CE counts found in the per-symbol counters. Therefore, all we need to do
- // is tell the hardware which CE types to count.
-
- mss::mcbist::stop_conditions stopCond;
-
- switch ( iv_phase )
- {
- case TD_PHASE_1:
- // Set the per symbol counters to count only soft/inter CEs.
- stopCond.set_nce_soft_symbol_count_enable( mss::ON);
- stopCond.set_nce_inter_symbol_count_enable(mss::ON);
- break;
-
- case TD_PHASE_2:
- // Set the per symbol counters to count only hard CEs.
- stopCond.set_nce_hard_symbol_count_enable(mss::ON);
- break;
-
- default: PRDF_ASSERT( false ); // invalid phase
- }
-
- // Start the time based scrub procedure on this slave rank.
- o_rc = startTdScrub<TYPE_MCA>( iv_chip, iv_rank, SLAVE_RANK, stopCond );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "startTdScrub(0x%08x,0x%2x) failed",
- iv_chip->getHuid(), getKey() );
- }
- */
-
- return o_rc;
-
- #undef PRDF_FUNC
-}
-
//##############################################################################
//
// Specializations for MBA
@@ -415,6 +368,56 @@ uint32_t TpsEvent<TYPE_MBA>::startCmd()
#undef PRDF_FUNC
}
+//##############################################################################
+//
+// Specializations for OCMB
+//
+//##############################################################################
+
+template<>
+uint32_t TpsEvent<TYPE_OCMB_CHIP>::startCmd()
+{
+ #define PRDF_FUNC "[TpsEvent::startCmd] "
+
+ uint32_t o_rc = SUCCESS;
+
+ // We don't need to set any stop-on-error conditions or thresholds for
+ // soft/inter/hard CEs during Memory Diagnostics. The design is to let the
+ // command continue to the end of the rank and we do diagnostics on the
+ // CE counts found in the per-symbol counters. Therefore, all we need to do
+ // is tell the hardware which CE types to count.
+
+ mss::mcbist::stop_conditions<> stopCond;
+
+ switch ( iv_phase )
+ {
+ case TD_PHASE_1:
+ // Set the per symbol counters to count only soft/inter CEs.
+ stopCond.set_nce_soft_symbol_count_enable( mss::ON);
+ stopCond.set_nce_inter_symbol_count_enable(mss::ON);
+ break;
+
+ case TD_PHASE_2:
+ // Set the per symbol counters to count only hard CEs.
+ stopCond.set_nce_hard_symbol_count_enable(mss::ON);
+ break;
+
+ default: PRDF_ASSERT( false ); // invalid phase
+ }
+
+ // Start the time based scrub procedure on this slave rank.
+ o_rc = startTdScrub<TYPE_OCMB_CHIP>(iv_chip, iv_rank, SLAVE_RANK, stopCond);
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "startTdScrub(0x%08x,0x%2x) failed",
+ iv_chip->getHuid(), getKey() );
+ }
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
//------------------------------------------------------------------------------
// Avoid linker errors with the template.
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C
index e5b3ef74f..f7023c07e 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C
@@ -54,6 +54,13 @@ static const char *mcbCeStatReg[CE_REGS_PER_PORT] =
"MCB_MBSSYMEC6", "MCB_MBSSYMEC7", "MCB_MBSSYMEC8"
};
+static const char *ocmbCeStatReg[CE_REGS_PER_PORT] =
+ {
+ "OCMB_MBSSYMEC0", "OCMB_MBSSYMEC1", "OCMB_MBSSYMEC2",
+ "OCMB_MBSSYMEC3", "OCMB_MBSSYMEC4", "OCMB_MBSSYMEC5",
+ "OCMB_MBSSYMEC6", "OCMB_MBSSYMEC7", "OCMB_MBSSYMEC8"
+ };
+
//------------------------------------------------------------------------------
template <TARGETING::TYPE T>
@@ -80,6 +87,23 @@ TpsFalseAlarm * __getTpsFalseAlarmCounter<TYPE_MBA>( ExtensibleChip * i_chip )
//------------------------------------------------------------------------------
+template <TARGETING::TYPE T>
+void __maskMainlineNceTces( ExtensibleChip * i_chip );
+
+template<>
+void __maskMainlineNceTces<TYPE_MCA>( ExtensibleChip * i_chip )
+{
+ getMcaDataBundle(i_chip)->iv_maskMainlineNceTce = true;
+}
+
+template<>
+void __maskMainlineNceTces<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip )
+{
+ getOcmbDataBundle(i_chip)->iv_maskMainlineNceTce = true;
+}
+
+//------------------------------------------------------------------------------
+
template<TARGETING::TYPE T>
void __getNextPhase( ExtensibleChip * i_chip, const MemRank & i_rank,
STEP_CODE_DATA_STRUCT & io_sc,
@@ -105,12 +129,7 @@ void __getNextPhase( ExtensibleChip * i_chip, const MemRank & i_rank,
//------------------------------------------------------------------------------
template<TARGETING::TYPE T>
-bool __badDqCount( MemUtils::MaintSymbols i_nibbleStats,
- CeCount & io_badDqCount );
-
-template<>
-bool __badDqCount<TYPE_MCA>( MemUtils::MaintSymbols i_nibbleStats,
- CeCount & io_badDqCount )
+bool __badDqCount(MemUtils::MaintSymbols i_nibbleStats, CeCount & io_badDqCount)
{
bool badDqFound = false;
@@ -145,102 +164,13 @@ bool __badDqCount<TYPE_MCA>( MemUtils::MaintSymbols i_nibbleStats,
return badDqFound;
}
-template<>
-bool __badDqCount<TYPE_OCMB_CHIP>( MemUtils::MaintSymbols i_nibbleStats,
- CeCount & io_badDqCount )
-{
- bool badDqFound = false;
-
- PRDF_ERR( "__badDqCount: Function not supported yet" );
- /* TODO RTC 208263
- for ( auto symData : i_nibbleStats )
- {
- // If one of the four symbols has a count of at least 8.
- if ( symData.count >= 8 )
- {
- // And the sum of the other three symbols is 1 or less.
- uint8_t sum = 0;
- for ( auto sumCheck : i_nibbleStats)
- {
- if ( !(symData.symbol == sumCheck.symbol) )
- {
- // Check for overflow.
- if ( (sum + sumCheck.count) > 0xFF )
- sum = 0xFF;
- else
- sum += sumCheck.count;
- }
- }
- if ( sum <= 1 )
- {
- io_badDqCount.count++;
- io_badDqCount.symList.push_back(symData);
- badDqFound = true;
- break;
- }
- }
- }
- */
-
- return badDqFound;
-}
//------------------------------------------------------------------------------
template<TARGETING::TYPE T>
bool __badChipCount( MemUtils::MaintSymbols i_nibbleStats,
- CeCount & io_badChipCount );
-
-template<>
-bool __badChipCount<TYPE_MCA>( MemUtils::MaintSymbols i_nibbleStats,
- CeCount & io_badChipCount )
-{
- bool badChipFound = false;
- uint8_t nonZeroCount = 0;
- uint8_t minCountTwo = 0;
- uint8_t sum = 0;
- MemUtils::SymbolData highSym;
-
- for ( auto symData : i_nibbleStats )
- {
- // Check for overflow.
- if ( (sum + symData.count) > 0xFF )
- sum = 0xFF;
- else
- sum += symData.count;
-
- if ( symData.count > 0 )
- nonZeroCount++;
- if ( symData.count >= 2 )
- minCountTwo++;
- if ( symData.count > highSym.count )
- highSym = symData;
- }
-
- // If the total sum for all four symbols has a count of at least 5
- if ( sum >= 5 )
- {
- // And either:
- // 3 or more symbols have a non-zero value.
- // or 2 symbols, both with a minimum count of 2.
- if ( nonZeroCount >= 3 || minCountTwo >= 2 )
- {
- io_badChipCount.count++;
- io_badChipCount.symList.push_back(highSym);
- badChipFound = true;
- }
- }
-
- return badChipFound;
-}
-
-template<>
-bool __badChipCount<TYPE_OCMB_CHIP>( MemUtils::MaintSymbols i_nibbleStats,
- CeCount & io_badChipCount )
+ CeCount & io_badChipCount )
{
bool badChipFound = false;
-
- PRDF_ERR( "__badChipCount: Function not supported yet" );
- /* TODO RTC 208263
uint8_t nonZeroCount = 0;
uint8_t minCountTwo = 0;
uint8_t sum = 0;
@@ -275,7 +205,6 @@ bool __badChipCount<TYPE_OCMB_CHIP>( MemUtils::MaintSymbols i_nibbleStats,
badChipFound = true;
}
}
- */
return badChipFound;
}
@@ -284,43 +213,8 @@ bool __badChipCount<TYPE_OCMB_CHIP>( MemUtils::MaintSymbols i_nibbleStats,
template<TARGETING::TYPE T>
void __sumAboveOneCount( MemUtils::MaintSymbols i_nibbleStats,
- CeCount & io_sumAboveOneCount );
-
-template<>
-void __sumAboveOneCount<TYPE_MCA>( MemUtils::MaintSymbols i_nibbleStats,
- CeCount & io_sumAboveOneCount )
-{
- uint8_t sum = 0;
- MemUtils::MaintSymbols symList;
- for ( auto symData : i_nibbleStats )
- {
- if ( symData.count > 0 )
- {
- if ( (sum + symData.count) > 0xFF )
- sum = 0xFF;
- else
- sum += symData.count;
-
- symList.push_back(symData);
- }
- }
- // If the sum is greater than 1
- if ( sum > 1 )
- {
- io_sumAboveOneCount.count++;
- for ( auto sym : symList )
- {
- io_sumAboveOneCount.symList.push_back(sym);
- }
- }
-}
-
-template<>
-void __sumAboveOneCount<TYPE_OCMB_CHIP>( MemUtils::MaintSymbols i_nibbleStats,
- CeCount & io_sumAboveOneCount )
+ CeCount & io_sumAboveOneCount )
{
- PRDF_ERR( "__sumAboveOneCount: Function not supported yet" );
- /* TODO RTC 208263
uint8_t sum = 0;
MemUtils::MaintSymbols symList;
for ( auto symData : i_nibbleStats )
@@ -344,47 +238,14 @@ void __sumAboveOneCount<TYPE_OCMB_CHIP>( MemUtils::MaintSymbols i_nibbleStats,
io_sumAboveOneCount.symList.push_back(sym);
}
}
- */
}
//------------------------------------------------------------------------------
template<TARGETING::TYPE T>
void __singleSymbolCount( MemUtils::MaintSymbols i_nibbleStats,
- CeCount & io_singleSymCount );
-
-template<>
-void __singleSymbolCount<TYPE_MCA>( MemUtils::MaintSymbols i_nibbleStats,
- CeCount & io_singleSymCount )
-{
- uint8_t count = 0;
- bool multNonZeroSyms = false;
-
- for ( auto symData : i_nibbleStats )
- {
- if ( symData.count > 0 )
- {
- if ( 0 != count )
- {
- // There are more than one symbol counts that are non-zero
- multNonZeroSyms = true;
- break;
- }
- count = symData.count;
- }
- }
-
- // If there is only one symbol with a non-zero count and that count > 1
- if ( count > 1 && !multNonZeroSyms )
- io_singleSymCount.count++;
-}
-
-template<>
-void __singleSymbolCount<TYPE_OCMB_CHIP>( MemUtils::MaintSymbols i_nibbleStats,
- CeCount & io_singleSymCount )
+ CeCount & io_singleSymCount )
{
- PRDF_ERR( "__singleSymbolCount: Function not supported yet" );
- /* TODO RTC 208263
uint8_t count = 0;
bool multNonZeroSyms = false;
@@ -405,7 +266,6 @@ void __singleSymbolCount<TYPE_OCMB_CHIP>( MemUtils::MaintSymbols i_nibbleStats,
// If there is only one symbol with a non-zero count and that count > 1
if ( count > 1 && !multNonZeroSyms )
io_singleSymCount.count++;
- */
}
//------------------------------------------------------------------------------
@@ -469,12 +329,12 @@ uint32_t __updateVpdSumAboveOne( CeCount i_sumAboveOneCount,
//------------------------------------------------------------------------------
-template <>
-uint32_t TpsEvent<TYPE_MCA>::analyzeEccErrors( const uint32_t & i_eccAttns,
- STEP_CODE_DATA_STRUCT & io_sc,
- bool & o_done )
+template <TARGETING::TYPE T>
+uint32_t TpsEvent<T>::analyzeEccErrors( const uint32_t & i_eccAttns,
+ STEP_CODE_DATA_STRUCT & io_sc,
+ bool & o_done )
{
- #define PRDF_FUNC "[TpsEvent<TYPE_MCA>::analyzeEccErrors] "
+ #define PRDF_FUNC "[TpsEvent<T>::analyzeEccErrors] "
uint32_t o_rc = SUCCESS;
@@ -492,7 +352,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeEccErrors( const uint32_t & i_eccAttns,
// At this point we don't actually have an address for the UE. The
// best we can do is get the address in which the command stopped.
MemAddr addr;
- o_rc = getMemMaintAddr<TYPE_MCA>( iv_chip, addr );
+ o_rc = getMemMaintAddr<T>( iv_chip, addr );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed",
@@ -500,8 +360,8 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeEccErrors( const uint32_t & i_eccAttns,
break;
}
- o_rc = MemEcc::handleMemUe<TYPE_MCA>( iv_chip, addr,
- UE_TABLE::SCRUB_UE, io_sc );
+ o_rc = MemEcc::handleMemUe<T>( iv_chip, addr,
+ UE_TABLE::SCRUB_UE, io_sc );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "handleMemUe(0x%08x,0x%02x) failed",
@@ -511,7 +371,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeEccErrors( const uint32_t & i_eccAttns,
// Because of the UE, any further TPS requests will likely have no
// effect. So ban all subsequent requests.
- MemDbUtils::banTps<TYPE_MCA>( iv_chip, addr.getRank() );
+ MemDbUtils::banTps<T>( iv_chip, addr.getRank() );
// Abort this procedure because additional repairs will likely
// not help (also avoids complication of having UE and MPE at
@@ -525,7 +385,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeEccErrors( const uint32_t & i_eccAttns,
io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_MaintIUE );
- o_rc = MemEcc::handleMemIue<TYPE_MCA>( iv_chip, iv_rank, io_sc );
+ o_rc = MemEcc::handleMemIue<T>( iv_chip, iv_rank, io_sc );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "handleMemIue(0x%08x,0x%02x) failed",
@@ -551,8 +411,8 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeEccErrors( const uint32_t & i_eccAttns,
io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_MaintMPE );
- o_rc = MemEcc::handleMpe<TYPE_MCA>( iv_chip, iv_rank,
- UE_TABLE::SCRUB_MPE, io_sc );
+ o_rc = MemEcc::handleMpe<T>( iv_chip, iv_rank,
+ UE_TABLE::SCRUB_MPE, io_sc );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "handleMpe<T>(0x%08x, 0x%02x) failed",
@@ -573,169 +433,51 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeEccErrors( const uint32_t & i_eccAttns,
}
-//------------------------------------------------------------------------------
-
-template <>
+template
+uint32_t TpsEvent<TYPE_MCA>::analyzeEccErrors( const uint32_t & i_eccAttns,
+ STEP_CODE_DATA_STRUCT & io_sc,
+ bool & o_done );
+template
uint32_t TpsEvent<TYPE_OCMB_CHIP>::analyzeEccErrors(const uint32_t & i_eccAttns,
STEP_CODE_DATA_STRUCT & io_sc,
- bool & o_done)
-{
- #define PRDF_FUNC "[TpsEvent<TYPE_OCMB_CHIP>::analyzeEccErrors] "
-
- uint32_t o_rc = SUCCESS;
- PRDF_ERR( PRDF_FUNC "Function not supported yet" );
- /* TODO RTC 208263
- do
- {
- // If there was a UE.
- if ( i_eccAttns & MAINT_UE )
- {
- PRDF_TRAC( PRDF_FUNC "UE Detected: 0x%08x,0x%02x",
- iv_chip->getHuid(), getKey() );
-
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_MaintUE );
-
- // At this point we don't actually have an address for the UE. The
- // best we can do is get the address in which the command stopped.
- MemAddr addr;
- o_rc = getMemMaintAddr<TYPE_MCA>( iv_chip, addr );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed",
- iv_chip->getHuid() );
- break;
- }
-
- o_rc = MemEcc::handleMemUe<TYPE_MCA>( iv_chip, addr,
- UE_TABLE::SCRUB_UE, io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "handleMemUe(0x%08x,0x%02x) failed",
- iv_chip->getHuid(), getKey() );
- break;
- }
-
- // Because of the UE, any further TPS requests will likely have no
- // effect. So ban all subsequent requests.
- MemDbUtils::banTps<TYPE_MCA>( iv_chip, addr.getRank() );
-
- // Abort this procedure because additional repairs will likely
- // not help (also avoids complication of having UE and MPE at
- // the same time).
- o_done = true; break;
- }
-
- // If there was an IUE (MNFG only).
- if ( mfgMode() && (i_eccAttns & MAINT_IUE) )
- {
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_MaintIUE );
-
- o_rc = MemEcc::handleMemIue<TYPE_MCA>( iv_chip, iv_rank, io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "handleMemIue(0x%08x,0x%02x) failed",
- iv_chip->getHuid(), getKey() );
- break;
- }
-
- // If service call is set, then IUE threshold was reached.
- if ( io_sc.service_data->queryServiceCall() )
- {
- PRDF_TRAC( PRDF_FUNC "IUE threshold detected: 0x%08x,0x%02x",
- iv_chip->getHuid(), getKey() );
-
- // Abort this procedure because port failure will be triggered
- // after analysis is complete.
- o_done = true; break;
- }
- }
-
- // If there was an MPE.
- if ( i_eccAttns & MAINT_MPE )
- {
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_MaintMPE );
-
- o_rc = MemEcc::handleMpe<TYPE_MCA>( iv_chip, iv_rank,
- UE_TABLE::SCRUB_MPE, io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "handleMpe<T>(0x%08x, 0x%02x) failed",
- iv_chip->getHuid(), iv_rank.getKey() );
- break;
- }
-
- // Abort this procedure because the chip mark may have fixed the
- // symbol that triggered TPS
- o_done = true; break;
- }
-
- }while(0);
- */
-
- return o_rc;
-
- #undef PRDF_FUNC
-
-}
+ bool & o_done);
//------------------------------------------------------------------------------
-template<>
-uint32_t TpsEvent<TYPE_MCA>::handleFalseAlarm( STEP_CODE_DATA_STRUCT & io_sc )
+template<TARGETING::TYPE T>
+uint32_t TpsEvent<T>::handleFalseAlarm( STEP_CODE_DATA_STRUCT & io_sc )
{
io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_TpsFalseAlarm );
// Increase false alarm counter and check threshold.
- if ( __getTpsFalseAlarmCounter<TYPE_MCA>(iv_chip)->inc( iv_rank, io_sc) )
+ if ( __getTpsFalseAlarmCounter<T>(iv_chip)->inc( iv_rank, io_sc) )
{
io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_TpsFalseAlarmTH );
// Permanently mask mainline NCEs and TCEs
- getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;
+ __maskMainlineNceTces<T>( iv_chip );
}
return SUCCESS;
}
-//------------------------------------------------------------------------------
-
-template<>
+template
+uint32_t TpsEvent<TYPE_MCA>::handleFalseAlarm( STEP_CODE_DATA_STRUCT & io_sc );
+template
uint32_t TpsEvent<TYPE_OCMB_CHIP>::handleFalseAlarm(
- STEP_CODE_DATA_STRUCT & io_sc )
-{
- PRDF_ERR( "handleFalseAlarm: Function not supported yet" );
- /* TODO RTC 208263
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_TpsFalseAlarm );
-
- // Increase false alarm counter and check threshold.
- if ( __getTpsFalseAlarmCounter<TYPE_MCA>(iv_chip)->inc( iv_rank, io_sc) )
- {
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_TpsFalseAlarmTH );
-
- // Permanently mask mainline NCEs and TCEs
- getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;
- }
- */
-
- return SUCCESS;
-}
+ STEP_CODE_DATA_STRUCT & io_sc );
//------------------------------------------------------------------------------
-template<>
-uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
+template<TARGETING::TYPE T>
+uint32_t TpsEvent<T>::analyzeCeSymbolCounts( CeCount i_badDqCount,
CeCount i_badChipCount, CeCount i_sumAboveOneCount,
CeCount i_singleSymCount, STEP_CODE_DATA_STRUCT & io_sc )
{
- #define PRDF_FUNC "[TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts] "
+ #define PRDF_FUNC "[TpsEvent<T>::analyzeCeSymbolCounts] "
uint32_t o_rc = SUCCESS;
@@ -744,33 +486,33 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
bool tpsFalseAlarm = false;
// Get the Bad DQ Bitmap.
- TargetHandle_t mcaTrgt = iv_chip->getTrgt();
+ TargetHandle_t trgt = iv_chip->getTrgt();
MemDqBitmap dqBitmap;
- o_rc = getBadDqBitmap( mcaTrgt, iv_rank, dqBitmap );
+ o_rc = getBadDqBitmap( trgt, iv_rank, dqBitmap );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "getBadDqBitmap(0x%08x, 0x%02x) failed",
- getHuid(mcaTrgt), iv_rank.getKey() );
+ getHuid(trgt), iv_rank.getKey() );
break;
}
// Get the symbol mark.
MemMark symMark;
- o_rc = MarkStore::readSymbolMark<TYPE_MCA>( iv_chip, iv_rank, symMark );
+ o_rc = MarkStore::readSymbolMark<T>( iv_chip, iv_rank, symMark );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "readSymbolMark<TYPE_MCA>(0x%08x, 0x%02x) "
+ PRDF_ERR( PRDF_FUNC "readSymbolMark<T>(0x%08x, 0x%02x) "
"failed", iv_chip->getHuid(), iv_rank.getKey() );
break;
}
// Get the chip mark.
MemMark chipMark;
- o_rc = MarkStore::readChipMark<TYPE_MCA>( iv_chip, iv_rank, chipMark );
+ o_rc = MarkStore::readChipMark<T>( iv_chip, iv_rank, chipMark );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "readChipMark<TYPE_MCA>(0x%08x, 0x%02x) "
+ PRDF_ERR( PRDF_FUNC "readChipMark<T>(0x%08x, 0x%02x) "
"failed", iv_chip->getHuid(), iv_rank.getKey() );
break;
}
@@ -799,9 +541,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
// TCE. Both are still correctable after a symbol mark
// is placed.
// Place a symbol mark on this bad DQ.
- MemMark newSymMark( mcaTrgt, iv_rank,
+ MemMark newSymMark( trgt, iv_rank,
i_badDqCount.symList[0].symbol );
- o_rc = MarkStore::writeSymbolMark<TYPE_MCA>( iv_chip,
+ o_rc = MarkStore::writeSymbolMark<T>( iv_chip,
iv_rank, newSymMark );
if ( SUCCESS != o_rc )
{
@@ -839,7 +581,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
io_sc.service_data->setServiceCall();
// Permanently mask mainline NCEs and TCEs.
- getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;
+ __maskMainlineNceTces<T>( iv_chip );
}
}
else
@@ -853,7 +595,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
else if ( 2 == i_badDqCount.count && 0 == i_badChipCount.count )
{
// Permanently mask mainline NCEs and TCEs.
- getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;
+ __maskMainlineNceTces<T>( iv_chip );
// If the symbol mark is available.
if ( !symMark.isValid() )
@@ -874,9 +616,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
highSym = sym;
}
- MemMark newSymMark( mcaTrgt, iv_rank,
+ MemMark newSymMark( trgt, iv_rank,
highSym.symbol );
- o_rc = MarkStore::writeSymbolMark<TYPE_MCA>( iv_chip,
+ o_rc = MarkStore::writeSymbolMark<T>( iv_chip,
iv_rank, newSymMark );
if ( SUCCESS != o_rc )
{
@@ -956,10 +698,10 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
// This means we have only one more potential bad DQ, which
// is still correctable after a chip mark is placed.
// Place a chip mark on this bad chip.
- MemMark newChipMark( mcaTrgt, iv_rank,
+ MemMark newChipMark( trgt, iv_rank,
i_badChipCount.symList[0].symbol );
- o_rc = MarkStore::writeChipMark<TYPE_MCA>( iv_chip, iv_rank,
- newChipMark );
+ o_rc = MarkStore::writeChipMark<T>( iv_chip, iv_rank,
+ newChipMark );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "writeChipMark(0x%08x,0x%02x) "
@@ -995,7 +737,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
io_sc.service_data->setServiceCall();
// Permanently mask mainline NCEs and TCEs
- getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;
+ __maskMainlineNceTces<T>( iv_chip );
}
}
else
@@ -1018,7 +760,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
io_sc.service_data->setServiceCall();
// Permanently mask mainline NCEs and TCEs
- getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;
+ __maskMainlineNceTces<T>( iv_chip );
}
// If the chip mark is available.
if ( !chipMark.isValid() )
@@ -1029,10 +771,10 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
// This means we have no more potential bad DQ or bad chips
// since we can't correct those after chip mark is placed.
// Place a chip mark on the bad chip.
- MemMark newChipMark( mcaTrgt, iv_rank,
+ MemMark newChipMark( trgt, iv_rank,
i_badChipCount.symList[0].symbol );
- o_rc = MarkStore::writeChipMark<TYPE_MCA>( iv_chip, iv_rank,
- newChipMark );
+ o_rc = MarkStore::writeChipMark<T>( iv_chip, iv_rank,
+ newChipMark );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "writeChipMark(0x%08x,0x%02x) "
@@ -1050,8 +792,8 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
// this chip mark, we need to clear the symbol mark now
// instead of at the end of the function to make room
// for the additional symbol mark.
- o_rc = MarkStore::clearSymbolMark<TYPE_MCA>( iv_chip,
- iv_rank );
+ o_rc = MarkStore::clearSymbolMark<T>( iv_chip,
+ iv_rank );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "MarkStore::clearSymbolMark("
@@ -1097,7 +839,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
io_sc.service_data->setServiceCall();
// Permanently mask mainline NCEs and TCEs.
- getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;
+ __maskMainlineNceTces<T>( iv_chip );
}
}
// If the symbol mark is available.
@@ -1109,9 +851,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
// This means we have no more potential bad DQ or bad chips
// since we can't correct those after symbol mark is placed.
// Place a symbol mark on this bad DQ.
- MemMark newSymMark( mcaTrgt, iv_rank,
+ MemMark newSymMark( trgt, iv_rank,
i_badDqCount.symList[0].symbol );
- o_rc = MarkStore::writeSymbolMark<TYPE_MCA>( iv_chip,
+ o_rc = MarkStore::writeSymbolMark<T>( iv_chip,
iv_rank, newSymMark );
if ( SUCCESS != o_rc )
{
@@ -1152,7 +894,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
io_sc.service_data->setServiceCall();
// Permanently mask mainline NCEs and TCEs.
- getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;
+ __maskMainlineNceTces<T>( iv_chip );
}
}
@@ -1175,7 +917,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
io_sc.service_data->setServiceCall();
// Permanently mask mainline NCEs and TCEs.
- getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;
+ __maskMainlineNceTces<T>( iv_chip );
}
// If analysis resulted in a false alarm.
@@ -1190,18 +932,18 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
}
// Write any updates to VPD.
- o_rc = setBadDqBitmap( mcaTrgt, iv_rank, dqBitmap );
+ o_rc = setBadDqBitmap( trgt, iv_rank, dqBitmap );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "setBadDqBitmap(0x%08x, 0x%02x) failed",
- getHuid(mcaTrgt), iv_rank.getKey() );
+ getHuid(trgt), iv_rank.getKey() );
break;
}
// We may have placed a chip mark so do any necessary cleanup. This must
// be called after writing the bad DQ bitmap because the this function
// will also write it if necessary.
- o_rc = MarkStore::chipMarkCleanup<TYPE_MCA>( iv_chip, iv_rank, io_sc );
+ o_rc = MarkStore::chipMarkCleanup<T>( iv_chip, iv_rank, io_sc );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "MarkStore::chipMarkCleanup(0x%08x,0x%02x) "
@@ -1216,496 +958,14 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
#undef PRDF_FUNC
}
-//------------------------------------------------------------------------------
-
-template<>
+template
+uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
+ CeCount i_badChipCount, CeCount i_sumAboveOneCount,
+ CeCount i_singleSymCount, STEP_CODE_DATA_STRUCT & io_sc );
+template
uint32_t TpsEvent<TYPE_OCMB_CHIP>::analyzeCeSymbolCounts( CeCount i_badDqCount,
CeCount i_badChipCount, CeCount i_sumAboveOneCount,
- CeCount i_singleSymCount, STEP_CODE_DATA_STRUCT & io_sc )
-{
-
- #define PRDF_FUNC "[TpsEvent<TYPE_OCMB_CHIP>::analyzeCeSymbolCounts] "
-
- uint32_t o_rc = SUCCESS;
- PRDF_ERR( PRDF_FUNC "Function not supported yet" );
- /* TODO RTC 208263
- do
- {
- bool tpsFalseAlarm = false;
-
- // Get the Bad DQ Bitmap.
- TargetHandle_t mcaTrgt = iv_chip->getTrgt();
- MemDqBitmap dqBitmap;
-
- o_rc = getBadDqBitmap( mcaTrgt, iv_rank, dqBitmap );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "getBadDqBitmap(0x%08x, 0x%02x) failed",
- getHuid(mcaTrgt), iv_rank.getKey() );
- break;
- }
-
- // Get the symbol mark.
- MemMark symMark;
- o_rc = MarkStore::readSymbolMark<TYPE_MCA>( iv_chip, iv_rank, symMark );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "readSymbolMark<TYPE_MCA>(0x%08x, 0x%02x) "
- "failed", iv_chip->getHuid(), iv_rank.getKey() );
- break;
- }
-
- // Get the chip mark.
- MemMark chipMark;
- o_rc = MarkStore::readChipMark<TYPE_MCA>( iv_chip, iv_rank, chipMark );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "readChipMark<TYPE_MCA>(0x%08x, 0x%02x) "
- "failed", iv_chip->getHuid(), iv_rank.getKey() );
- break;
- }
-
- // If the bad DQ nibble count is 0 and the bad chip nibble count is 0.
- if ( 0 == i_badDqCount.count && 0 == i_badChipCount.count )
- {
- // There is nothing to repair. Any other non-zero counts are
- // considered acceptable noise.
- // Set false alarm flag to true.
- tpsFalseAlarm = true;
- }
- // If the bad DQ nibble count is 1 and the bad chip nibble count is 0.
- else if ( 1 == i_badDqCount.count && 0 == i_badChipCount.count )
- {
- // If the symbol mark is available.
- if ( !symMark.isValid() )
- {
- // If the sum above one nibble count is <= 1 or sum above one
- // nibble count == 2 and single sym nibble count == 2
- if ( (i_sumAboveOneCount.count <= 1) ||
- (i_sumAboveOneCount.count == 2 &&
- i_singleSymCount.count == 2) )
- {
- // This means we have a potential future chip kill or
- // TCE. Both are still correctable after a symbol mark
- // is placed.
- // Place a symbol mark on this bad DQ.
- MemMark newSymMark( mcaTrgt, iv_rank,
- i_badDqCount.symList[0].symbol );
- o_rc = MarkStore::writeSymbolMark<TYPE_MCA>( iv_chip,
- iv_rank, newSymMark );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "writeSymbolMark(0x%08x,0x%02x) "
- "failed", iv_chip->getHuid(), getKey() );
- break;
- }
-
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_TpsSymbolMark );
-
- // Update VPD with the symbol mark.
- o_rc = dqBitmap.setSymbol( i_badDqCount.symList[0].symbol );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "dqBitmap.setSymbol failed." );
- break;
- }
- }
- else
- {
- // Placing a symbol mark risks a UE.
- // For nibbles under threshold with a sum greater than 1,
- // update VPD with it's non-zero symbols.
- o_rc = __updateVpdSumAboveOne(i_sumAboveOneCount, dqBitmap);
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR(PRDF_FUNC "__updateVpdSumAboveOne() failed.");
- }
-
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_TpsSymUeRisk );
-
- // Make the error log predictive.
- io_sc.service_data->setServiceCall();
-
- // Permanently mask mainline NCEs and TCEs.
- getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;
- }
- }
- else
- {
- // Otherwise assume the symbol mark is fixing this bad DQ.
- // Set the false alarm flag to true.
- tpsFalseAlarm = true;
- }
- }
- // Else if bad DQ nibble count is 2 and bad chip nibble count is 0.
- else if ( 2 == i_badDqCount.count && 0 == i_badChipCount.count )
- {
- // Permanently mask mainline NCEs and TCEs.
- getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;
-
- // If the symbol mark is available.
- if ( !symMark.isValid() )
- {
- // If the sum above one nibble count is = 0 or sum above one
- // nibble count = 1 and single sym nibble count = 1
- if ( (i_sumAboveOneCount.count == 0) ||
- (i_sumAboveOneCount.count == 1 &&
- i_singleSymCount.count == 1) )
- {
- // This means we have only one more potential bad DQ, which
- // is correctable after a symbol mark is placed.
- // Place a symbol mark on this bad DQ with the highest count
- MemUtils::SymbolData highSym;
- for ( auto sym : i_badDqCount.symList )
- {
- if ( sym.count > highSym.count )
- highSym = sym;
- }
-
- MemMark newSymMark( mcaTrgt, iv_rank,
- highSym.symbol );
- o_rc = MarkStore::writeSymbolMark<TYPE_MCA>( iv_chip,
- iv_rank, newSymMark );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "writeSymbolMark(0x%08x,0x%02x) "
- "failed", iv_chip->getHuid(), getKey() );
- break;
- }
-
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_TpsSymbolMark );
-
- // Update VPD with both symbols.
- for ( auto sym : i_badDqCount.symList )
- {
- o_rc = dqBitmap.setSymbol( sym.symbol );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "dqBitmap.setSymbol failed." );
- break;
- }
- }
- if ( SUCCESS != o_rc ) break;
- }
- else
- {
- // Placing a symbol mark risks a UE.
- // For nibbles under threshold with a sum greater than 1,
- // update VPD with it's non-zero symbols.
- o_rc = __updateVpdSumAboveOne(i_sumAboveOneCount, dqBitmap);
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR(PRDF_FUNC "__updateVpdSumAboveOne() failed.");
- }
-
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_TpsSymUeRisk );
-
- // Make the error log predictive.
- io_sc.service_data->setServiceCall();
- }
-
- }
- else
- {
- // Otherwise assume the symbol mark is fixing a bad DQ.
- // Update VPD with the unrepaired symbol.
- for ( auto sym : i_badDqCount.symList )
- {
- if ( sym.symbol == symMark.getSymbol() ) continue;
-
- o_rc = dqBitmap.setSymbol( sym.symbol );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "dqBitmap.setSymbol failed." );
- break;
- }
- }
- if ( SUCCESS != o_rc ) break;
-
- // Set the false alarm flag to true.
- tpsFalseAlarm = true;
- }
-
- }
- // Else if bad DQ nibble count is 0 and bad chip nibble count is 1
- else if ( 0 == i_badDqCount.count && 1 == i_badChipCount.count )
- {
- // If the chip mark is available.
- if ( !chipMark.isValid() )
- {
- // If the sum above one nibble count is = 0 or the sum above one
- // nibble count = 1 and the single sym nibble count = 1
- if ( (i_sumAboveOneCount.count == 0) ||
- (i_sumAboveOneCount.count == 1 &&
- i_singleSymCount.count == 1) )
- {
- // This means we have only one more potential bad DQ, which
- // is still correctable after a chip mark is placed.
- // Place a chip mark on this bad chip.
- MemMark newChipMark( mcaTrgt, iv_rank,
- i_badChipCount.symList[0].symbol );
- o_rc = MarkStore::writeChipMark<TYPE_MCA>( iv_chip, iv_rank,
- newChipMark );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "writeChipMark(0x%08x,0x%02x) "
- "failed", iv_chip->getHuid(), getKey() );
- break;
- }
-
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_TpsChipMark );
- // Update VPD with the chip mark.
- o_rc = dqBitmap.setDram( i_badChipCount.symList[0].symbol );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "dqBitmap.setDram failed." );
- break;
- }
- }
- else
- {
- // Placing a mark risks a UE.
- // For nibbles under threshold with a sum greater than 1,
- // update VPD with it's non-zero symbols.
- o_rc = __updateVpdSumAboveOne(i_sumAboveOneCount, dqBitmap);
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR(PRDF_FUNC "__updateVpdSumAboveOne() failed.");
- }
-
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_TpsChipUeRisk );
-
- // Make the error log predictive.
- io_sc.service_data->setServiceCall();
-
- // Permanently mask mainline NCEs and TCEs
- getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;
- }
- }
- else
- {
- // Assume the chip mark is being used to fix the bad chip.
- // Set the false alarm flag to true.
- tpsFalseAlarm = true;
- }
- }
- // Else if bad DQ nibble count is 1 and bad chip nibble count is 1
- else if ( 1 == i_badDqCount.count && 1 == i_badChipCount.count )
- {
- // If neither chip nor symbol mark is available.
- if ( chipMark.isValid() && symMark.isValid() )
- {
- // Assume the chip and symbol marks are already being used to
- // fix the bad chip and DQ and some other nibble under
- // threshold triggered TPS.
- // Make the error log predictive.
- io_sc.service_data->setServiceCall();
-
- // Permanently mask mainline NCEs and TCEs
- getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;
- }
- // If the chip mark is available.
- if ( !chipMark.isValid() )
- {
- // If the sum above one nibble count is 0
- if ( 0 == i_sumAboveOneCount.count )
- {
- // This means we have no more potential bad DQ or bad chips
- // since we can't correct those after chip mark is placed.
- // Place a chip mark on the bad chip.
- MemMark newChipMark( mcaTrgt, iv_rank,
- i_badChipCount.symList[0].symbol );
- o_rc = MarkStore::writeChipMark<TYPE_MCA>( iv_chip, iv_rank,
- newChipMark );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "writeChipMark(0x%08x,0x%02x) "
- "failed", iv_chip->getHuid(), getKey() );
- break;
- }
-
- // Check if the current symbol mark is on the same DRAM as
- // this newly placed chip mark.
- if ( symMark.isValid() &&
- ( symMark.getSymbol().getDram() ==
- newChipMark.getSymbol().getDram() ) )
- {
- // Since we need to set a symbol mark in addition to
- // this chip mark, we need to clear the symbol mark now
- // instead of at the end of the function to make room
- // for the additional symbol mark.
- o_rc = MarkStore::clearSymbolMark<TYPE_MCA>( iv_chip,
- iv_rank );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "MarkStore::clearSymbolMark("
- "0x%08x,0x%02x) failed",
- iv_chip->getHuid(), iv_rank.getKey() );
- break;
- }
-
- // Now refresh the symMark variable since the mark has
- // been removed.
- symMark = MemMark();
- }
-
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_TpsChipMark );
-
- // Update VPD with the chip mark.
- o_rc = dqBitmap.setDram( i_badChipCount.symList[0].symbol );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "dqBitmap.setDram failed." );
- break;
- }
-
- // Make the error log predictive.
- io_sc.service_data->setServiceCall();
- }
- else
- {
- // Placing a chip mark risks a UE.
- // For nibbles under threshold with a sum greater than 1,
- // update VPD with it's non-zero symbols.
- o_rc = __updateVpdSumAboveOne(i_sumAboveOneCount, dqBitmap);
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR(PRDF_FUNC "__updateVpdSumAboveOne() failed.");
- }
-
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_TpsChipUeRisk );
-
- // Make the error log predictive.
- io_sc.service_data->setServiceCall();
-
- // Permanently mask mainline NCEs and TCEs.
- getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;
- }
- }
- // If the symbol mark is available.
- if ( !symMark.isValid() )
- {
- // If the sum above one nibble count is 0
- if ( 0 == i_sumAboveOneCount.count )
- {
- // This means we have no more potential bad DQ or bad chips
- // since we can't correct those after symbol mark is placed.
- // Place a symbol mark on this bad DQ.
- MemMark newSymMark( mcaTrgt, iv_rank,
- i_badDqCount.symList[0].symbol );
- o_rc = MarkStore::writeSymbolMark<TYPE_MCA>( iv_chip,
- iv_rank, newSymMark );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "writeSymbolMark(0x%08x,0x%02x) "
- "failed", iv_chip->getHuid(), getKey() );
- break;
- }
-
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_TpsSymbolMark );
-
- // Update VPD with the symbol mark.
- o_rc = dqBitmap.setSymbol( i_badDqCount.symList[0].symbol );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "dqBitmap.setSymbol failed." );
- break;
- }
-
- // Make the error log predictive.
- io_sc.service_data->setServiceCall();
- }
- else
- {
- // Placing the symbol mark risks a UE.
- // For nibbles under threshold with a sum greater than 1,
- // update VPD with it's non-zero symbols.
- o_rc = __updateVpdSumAboveOne(i_sumAboveOneCount, dqBitmap);
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR(PRDF_FUNC "__updateVpdSumAboveOne() failed.");
- }
-
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_TpsSymUeRisk );
-
- // Make the error log predictive.
- io_sc.service_data->setServiceCall();
-
- // Permanently mask mainline NCEs and TCEs.
- getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;
- }
- }
-
- }
- else
- {
- // There are enough errors that this could be a potential UE.
- // For nibbles under threshold with a sum greater than 1,
- // update VPD with it's non-zero symbols.
- o_rc = __updateVpdSumAboveOne( i_sumAboveOneCount, dqBitmap );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "__updateVpdSumAboveOne() failed." );
- }
-
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_TpsPotentialUe );
-
- // Make the error log predictive.
- io_sc.service_data->setServiceCall();
-
- // Permanently mask mainline NCEs and TCEs.
- getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;
- }
-
- // If analysis resulted in a false alarm.
- if ( tpsFalseAlarm )
- {
- o_rc = handleFalseAlarm( io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "handleFalseAlarm() failed on 0x%08x, "
- "0x%02x", iv_chip->getHuid(), getKey() );
- }
- }
-
- // Write any updates to VPD.
- o_rc = setBadDqBitmap( mcaTrgt, iv_rank, dqBitmap );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "setBadDqBitmap(0x%08x, 0x%02x) failed",
- getHuid(mcaTrgt), iv_rank.getKey() );
- break;
- }
-
- // We may have placed a chip mark so do any necessary cleanup. This must
- // be called after writing the bad DQ bitmap because the this function
- // will also write it if necessary.
- o_rc = MarkStore::chipMarkCleanup<TYPE_MCA>( iv_chip, iv_rank, io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "MarkStore::chipMarkCleanup(0x%08x,0x%02x) "
- "failed", iv_chip->getHuid(), getKey() );
- break;
- }
-
- } while (0);
- */
-
- return o_rc;
-
- #undef PRDF_FUNC
-}
+ CeCount i_singleSymCount, STEP_CODE_DATA_STRUCT & io_sc );
//------------------------------------------------------------------------------
@@ -1818,31 +1078,28 @@ uint32_t TpsEvent<TYPE_OCMB_CHIP>::getSymbolCeCounts( CeCount & io_badDqCount,
uint32_t o_rc = SUCCESS;
- PRDF_ERR( PRDF_FUNC "Function not supported yet" );
- /* TODO RTC 208263
do
{
// Get the Bad DQ Bitmap.
- TargetHandle_t mcaTrgt = iv_chip->getTrgt();
+ TargetHandle_t ocmbTrgt = iv_chip->getTrgt();
MemDqBitmap dqBitmap;
- o_rc = getBadDqBitmap( mcaTrgt, iv_rank, dqBitmap );
+ o_rc = getBadDqBitmap( ocmbTrgt, iv_rank, dqBitmap );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "getBadDqBitmap(0x%08x,%d) failed",
- getHuid(mcaTrgt), iv_rank.getMaster() );
+ getHuid(ocmbTrgt), iv_rank.getMaster() );
break;
}
std::vector<MemSymbol> bmSymList = dqBitmap.getSymbolList();
- ExtensibleChip * mcbChip = getConnectedParent( iv_chip, TYPE_MCBIST );
const char * reg_str = nullptr;
SCAN_COMM_REGISTER_CLASS * reg = nullptr;
for ( uint8_t regIdx = 0; regIdx < CE_REGS_PER_PORT; regIdx++ )
{
- reg_str = mcbCeStatReg[regIdx];
- reg = mcbChip->getRegister( reg_str );
+ reg_str = ocmbCeStatReg[regIdx];
+ reg = iv_chip->getRegister( reg_str );
o_rc = reg->Read();
if ( SUCCESS != o_rc )
@@ -1864,7 +1121,7 @@ uint32_t TpsEvent<TYPE_OCMB_CHIP>::getSymbolCeCounts( CeCount & io_badDqCount,
PRDF_ASSERT( sym < SYMBOLS_PER_RANK );
MemUtils::SymbolData symData;
- symData.symbol = MemSymbol::fromSymbol( mcaTrgt, iv_rank,
+ symData.symbol = MemSymbol::fromSymbol( ocmbTrgt, iv_rank,
sym, CEN_SYMBOL::ODD_SYMBOL_DQ );
if ( !symData.symbol.isValid() )
{
@@ -1887,14 +1144,14 @@ uint32_t TpsEvent<TYPE_OCMB_CHIP>::getSymbolCeCounts( CeCount & io_badDqCount,
// Add all symbols with non-zero counts to the callout list.
if ( symData.count != 0 )
{
- MemoryMru mm { mcaTrgt, iv_rank, symData.symbol };
+ MemoryMru mm { ocmbTrgt, iv_rank, symData.symbol };
io_sc.service_data->SetCallout( mm );
}
}
if ( SUCCESS != o_rc ) break;
// Analyze the nibble of symbols.
- __analyzeNibbleSyms<TYPE_MCA>( nibbleStats, io_badDqCount,
+ __analyzeNibbleSyms<TYPE_OCMB_CHIP>( nibbleStats, io_badDqCount,
io_badChipCount, io_sumAboveOneCount, io_singleSymCount );
}
@@ -1902,7 +1159,6 @@ uint32_t TpsEvent<TYPE_OCMB_CHIP>::getSymbolCeCounts( CeCount & io_badDqCount,
}
}while(0);
- */
return o_rc;
@@ -1912,11 +1168,11 @@ uint32_t TpsEvent<TYPE_OCMB_CHIP>::getSymbolCeCounts( CeCount & io_badDqCount,
//------------------------------------------------------------------------------
-template <>
-uint32_t TpsEvent<TYPE_MCA>::analyzeCeStats( STEP_CODE_DATA_STRUCT & io_sc,
- bool & o_done )
+template <TARGETING::TYPE T>
+uint32_t TpsEvent<T>::analyzeCeStats( STEP_CODE_DATA_STRUCT & io_sc,
+ bool & o_done )
{
- #define PRDF_FUNC "[TpsEvent<TYPE_MCA>::analyzeCeStats] "
+ #define PRDF_FUNC "[TpsEvent<T>::analyzeCeStats] "
uint32_t o_rc = SUCCESS;
@@ -1967,71 +1223,18 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeStats( STEP_CODE_DATA_STRUCT & io_sc,
}
-//------------------------------------------------------------------------------
-
-template <>
+template
+uint32_t TpsEvent<TYPE_MCA>::analyzeCeStats( STEP_CODE_DATA_STRUCT & io_sc,
+ bool & o_done );
+template
uint32_t TpsEvent<TYPE_OCMB_CHIP>::analyzeCeStats(STEP_CODE_DATA_STRUCT & io_sc,
- bool & o_done)
-{
- #define PRDF_FUNC "[TpsEvent<TYPE_OCMB_CHIP>::analyzeCeStats] "
-
- uint32_t o_rc = SUCCESS;
-
- PRDF_ERR( PRDF_FUNC "Function not supported yet" );
- /* TODO RTC 208263
- do
- {
- // The symbol CE counts will be summarized in the following buckets:
- // Number of nibbles with a bad DQ
- // Number of nibbles with a bad chip
- // Number of nibbles under threshold with a sum greater than 1
- // Number of nibbles under threshold with only a single symbol with a
- // non-zero count, and that count is > 1
- CeCount badDqCount, badChipCount, sumAboveOneCount, singleSymCount;
-
- // Get the symbol CE counts.
- o_rc = getSymbolCeCounts( badDqCount, badChipCount, sumAboveOneCount,
- singleSymCount, io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "getSymbolCeCounts failed." );
- break;
- }
-
- // If DRAM repairs are disabled, make the error log predictive and
- // abort this procedure.
- if ( areDramRepairsDisabled() )
- {
- io_sc.service_data->setSignature( iv_chip->getHuid(),
- PRDFSIG_TpsDramDisabled );
-
- io_sc.service_data->setServiceCall();
- break;
- }
-
- // Analyze the symbol CE counts.
- o_rc = analyzeCeSymbolCounts(badDqCount, badChipCount, sumAboveOneCount,
- singleSymCount, io_sc);
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "analyzeCeSymbolCounts failed." );
- break;
- }
-
- }while(0);
- */
-
- return o_rc;
-
- #undef PRDF_FUNC
-
-}
+ bool & o_done);
//------------------------------------------------------------------------------
-template<>
-uint32_t TpsEvent<TYPE_MCA>::analyzePhase( STEP_CODE_DATA_STRUCT & io_sc,
- bool & o_done )
+template<TARGETING::TYPE T>
+uint32_t TpsEvent<T>::analyzePhase( STEP_CODE_DATA_STRUCT & io_sc,
+ bool & o_done )
{
#define PRDF_FUNC "[TpsEvent::analyzePhase] "
@@ -2043,11 +1246,11 @@ uint32_t TpsEvent<TYPE_MCA>::analyzePhase( STEP_CODE_DATA_STRUCT & io_sc,
// Analyze Ecc Attentions
uint32_t eccAttns;
- o_rc = checkEccFirs<TYPE_MCA>( iv_chip, eccAttns );
+ o_rc = checkEccFirs<T>( iv_chip, eccAttns );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "checkEccFirs(0x%08x) failed",
- iv_chip->getHuid() );
+ iv_chip->getHuid() );
break;
}
@@ -2076,7 +1279,7 @@ uint32_t TpsEvent<TYPE_MCA>::analyzePhase( STEP_CODE_DATA_STRUCT & io_sc,
if ( (SUCCESS == o_rc) && o_done )
{
// Clear the ECC FFDC for this master rank.
- MemDbUtils::resetEccFfdc<TYPE_MCA>( iv_chip, iv_rank, SLAVE_RANK );
+ MemDbUtils::resetEccFfdc<T>( iv_chip, iv_rank, SLAVE_RANK );
}
return o_rc;
@@ -2084,66 +1287,36 @@ uint32_t TpsEvent<TYPE_MCA>::analyzePhase( STEP_CODE_DATA_STRUCT & io_sc,
#undef PRDF_FUNC
}
-//------------------------------------------------------------------------------
-
-template<>
+template
+uint32_t TpsEvent<TYPE_MCA>::analyzePhase( STEP_CODE_DATA_STRUCT & io_sc,
+ bool & o_done );
+template
uint32_t TpsEvent<TYPE_OCMB_CHIP>::analyzePhase( STEP_CODE_DATA_STRUCT & io_sc,
- bool & o_done )
-{
- #define PRDF_FUNC "[TpsEvent::analyzePhase] "
-
- uint32_t o_rc = SUCCESS;
+ bool & o_done );
- PRDF_ERR( PRDF_FUNC "Function not supported yet" );
- /* TODO RTC 208263
- do
- {
- if ( TD_PHASE_0 == iv_phase ) break; // Nothing to analyze yet.
-
- // Analyze Ecc Attentions
- uint32_t eccAttns;
- o_rc = checkEccFirs<TYPE_MCA>( iv_chip, eccAttns );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "checkEccFirs(0x%08x) failed",
- iv_chip->getHuid() );
- break;
- }
-
- o_rc = analyzeEccErrors( eccAttns, io_sc, o_done );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "analyzeEccErrors() failed." );
- break;
- }
- if ( o_done ) break;
-
- // Analyze CEs
- o_rc = analyzeCeStats( io_sc, o_done );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "analyzeCeStats() failed." );
- break;
- }
- if ( o_done ) break;
+//------------------------------------------------------------------------------
- // At this point, we are done with the procedure.
- o_done = true;
+template<TARGETING::TYPE T>
+uint32_t TpsEvent<T>::startNextPhase( STEP_CODE_DATA_STRUCT & io_sc )
+{
+ uint32_t signature = 0;
- } while (0);
+ __getNextPhase<T>( iv_chip, iv_rank, io_sc, iv_phase, signature );
- if ( (SUCCESS == o_rc) && o_done )
- {
- // Clear the ECC FFDC for this master rank.
- MemDbUtils::resetEccFfdc<TYPE_MCA>( iv_chip, iv_rank, SLAVE_RANK );
- }
- */
+ PRDF_TRAC( "[TpsEvent] Starting TPS Phase %d: 0x%08x,0x%02x",
+ iv_phase, iv_chip->getHuid(), getKey() );
- return o_rc;
+ io_sc.service_data->AddSignatureList( iv_chip->getTrgt(), signature );
- #undef PRDF_FUNC
+ return startCmd();
}
+template
+uint32_t TpsEvent<TYPE_MCA>::startNextPhase( STEP_CODE_DATA_STRUCT & io_sc );
+template
+uint32_t TpsEvent<TYPE_OCMB_CHIP>::startNextPhase(
+ STEP_CODE_DATA_STRUCT & io_sc );
+
//##############################################################################
//
// Specializations for MCA
@@ -2196,6 +1369,12 @@ uint32_t TpsEvent<TYPE_MCA>::startCmd()
#undef PRDF_FUNC
}
+//##############################################################################
+//
+// Specializations for OCMB
+//
+//##############################################################################
+
template<>
uint32_t TpsEvent<TYPE_OCMB_CHIP>::startCmd()
{
@@ -2203,15 +1382,13 @@ uint32_t TpsEvent<TYPE_OCMB_CHIP>::startCmd()
uint32_t o_rc = SUCCESS;
- PRDF_ERR( PRDF_FUNC "Function not supported yet" );
- /* TODO RTC 208263
// We don't need to set any stop-on-error conditions or thresholds for
// soft/inter/hard CEs at runtime. The design is to let the command continue
// to the end of the rank and we do diagnostics on the CE counts found in
// the per-symbol counters. Therefore, all we need to do is tell the
// hardware which CE types to count.
- mss::mcbist::stop_conditions stopCond;
+ mss::mcbist::stop_conditions<> stopCond;
switch ( iv_phase )
{
@@ -2232,56 +1409,18 @@ uint32_t TpsEvent<TYPE_OCMB_CHIP>::startCmd()
}
// Start the time based scrub procedure on this slave rank.
- o_rc = startTdScrub<TYPE_MCA>( iv_chip, iv_rank, SLAVE_RANK, stopCond );
+ o_rc = startTdScrub<TYPE_OCMB_CHIP>(iv_chip, iv_rank, SLAVE_RANK, stopCond);
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "startTdScrub(0x%08x,0x%2x) failed",
iv_chip->getHuid(), getKey() );
}
- */
return o_rc;
#undef PRDF_FUNC
}
-//------------------------------------------------------------------------------
-
-template<>
-uint32_t TpsEvent<TYPE_MCA>::startNextPhase( STEP_CODE_DATA_STRUCT & io_sc )
-{
- uint32_t signature = 0;
-
- __getNextPhase<TYPE_MCA>( iv_chip, iv_rank, io_sc, iv_phase, signature );
-
- PRDF_TRAC( "[TpsEvent] Starting TPS Phase %d: 0x%08x,0x%02x",
- iv_phase, iv_chip->getHuid(), getKey() );
-
- io_sc.service_data->AddSignatureList( iv_chip->getTrgt(), signature );
-
- return startCmd();
-}
-
-//------------------------------------------------------------------------------
-
-template<>
-uint32_t TpsEvent<TYPE_OCMB_CHIP>::startNextPhase(STEP_CODE_DATA_STRUCT & io_sc)
-{
- PRDF_ERR( "startNextPhase: Function not supported yet" );
- /* TODO RTC 208263
- uint32_t signature = 0;
-
- __getNextPhase<TYPE_MCA>( iv_chip, iv_rank, io_sc, iv_phase, signature );
-
- PRDF_TRAC( "[TpsEvent] Starting TPS Phase %d: 0x%08x,0x%02x",
- iv_phase, iv_chip->getHuid(), getKey() );
-
- io_sc.service_data->AddSignatureList( iv_chip->getTrgt(), signature );
- */
-
- return startCmd();
-}
-
//##############################################################################
//
// Specializations for MBA
OpenPOWER on IntegriCloud