summaryrefslogtreecommitdiffstats
path: root/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C
diff options
context:
space:
mode:
Diffstat (limited to 'src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C')
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C147
1 files changed, 104 insertions, 43 deletions
diff --git a/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C
index ef3a143eb..fc389000a 100644
--- a/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C
+++ b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C
@@ -99,7 +99,7 @@ void commitErrl( errlHndl_t i_errl, TargetHandle_t i_trgt )
template<TARGETING::TYPE T>
void __calloutDimm( errlHndl_t & io_errl, TargetHandle_t i_portTrgt,
- TargetHandle_t i_dimmTrgt )
+ TargetHandle_t i_dimmTrgt, bool i_nvdimmNoGard = false )
{
#define PRDF_FUNC "[RDR::__calloutDimm] "
@@ -109,9 +109,31 @@ void __calloutDimm( errlHndl_t & io_errl, TargetHandle_t i_portTrgt,
PRDF_ASSERT( nullptr != i_dimmTrgt );
PRDF_ASSERT( TYPE_DIMM == getTargetType(i_dimmTrgt) );
- // Callout the DIMM.
+ HWAS::DeconfigEnum deconfigPolicy = HWAS::DELAYED_DECONFIG;
+ HWAS::GARD_ErrorType gardPolicy = HWAS::GARD_Predictive;
+
+ #ifdef CONFIG_NVDIMM
+ // For the "RDR: All repairs used" case, If the DIMM is an NVDIMM, change
+ // the gard and deconfig options to no gard/deconfig and call
+ // nvdimmNotifyProtChange to indicate a save/restore may work.
+ if ( i_nvdimmNoGard )
+ {
+ deconfigPolicy = HWAS::NO_DECONFIG;
+ gardPolicy = HWAS::GARD_NULL;
+
+ uint32_t l_rc = PlatServices::nvdimmNotifyProtChange( i_dimmTrgt,
+ NVDIMM::NVDIMM_RISKY_HW_ERROR );
+ if ( SUCCESS != l_rc )
+ {
+ PRDF_TRAC( PRDF_FUNC "nvdimmNotifyProtChange(0x%08x) "
+ "failed.", PlatServices::getHuid(i_dimmTrgt) );
+ }
+ }
+ #endif
+
io_errl->addHwCallout( i_dimmTrgt, HWAS::SRCI_PRIORITY_HIGH,
- HWAS::DELAYED_DECONFIG, HWAS::GARD_Predictive );
+ deconfigPolicy, gardPolicy );
+
// Clear the VPD on this DIMM. The DIMM has been garded, but it is possible
// the customer will want to ungard the DIMM. Without clearing the VPD, the
@@ -120,16 +142,20 @@ void __calloutDimm( errlHndl_t & io_errl, TargetHandle_t i_portTrgt,
// customer takes the risk of ungarding the DIMM (that they should replace),
// the repairs will need to be rediscovered.
- std::vector<MemRank> ranks;
- getMasterRanks<T>( i_portTrgt, ranks, getDimmSlct(i_dimmTrgt) );
-
- for ( auto & rank : ranks )
+ // Do not clear the VPD if we had an NVDIMM that we avoided garding.
+ if ( !i_nvdimmNoGard )
{
- if ( SUCCESS != clearBadDqBitmap(i_portTrgt, rank) )
+ std::vector<MemRank> ranks;
+ getMasterRanks<T>( i_portTrgt, ranks, getDimmSlct(i_dimmTrgt) );
+
+ for ( auto & rank : ranks )
{
- PRDF_ERR( PRDF_FUNC "clearBadDqBitmap(0x%08x,0x%02x) failed",
- getHuid(i_portTrgt), rank.getKey() );
- continue;
+ if ( SUCCESS != clearBadDqBitmap(i_portTrgt, rank) )
+ {
+ PRDF_ERR( PRDF_FUNC "clearBadDqBitmap(0x%08x,0x%02x) failed",
+ getHuid(i_portTrgt), rank.getKey() );
+ continue;
+ }
}
}
@@ -156,11 +182,7 @@ void commitSoftError( uint32_t i_reasonCode, TargetHandle_t i_trgt,
//------------------------------------------------------------------------------
template<TARGETING::TYPE T>
-bool processRepairedRanks( TargetHandle_t i_trgt, uint8_t i_repairedRankMask );
-
-template<>
-bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt,
- uint8_t i_repairedRankMask )
+bool processRepairedRanks( TargetHandle_t i_trgt, uint8_t i_repairedRankMask )
{
#define PRDF_FUNC "[processRepairedRanks] "
@@ -179,7 +201,7 @@ bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt,
// map value has no significance.
std::map<TargetHandle_t, uint32_t> calloutList;
- ExtensibleChip * mcaChip = (ExtensibleChip *)systemPtr->GetChip(i_trgt);
+ ExtensibleChip * chip = (ExtensibleChip *)systemPtr->GetChip(i_trgt);
for ( uint8_t r = 0; r < MASTER_RANKS_PER_PORT; ++r )
{
@@ -191,20 +213,18 @@ bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt,
MemRank rank ( r );
MemMark cm;
- if ( SUCCESS != MarkStore::readChipMark<TYPE_MCA>( mcaChip, rank,
- cm ) )
+ if ( SUCCESS != MarkStore::readChipMark<T>( chip, rank, cm ) )
{
- PRDF_ERR( PRDF_FUNC "readChipMark<TYPE_MCA>(0x%08x,0x%02x) "
- "failed", mcaChip->getHuid(), rank.getKey() );
+ PRDF_ERR( PRDF_FUNC "readChipMark<T>(0x%08x,0x%02x) "
+ "failed", chip->getHuid(), rank.getKey() );
continue; // skip this rank
}
MemMark sm;
- if ( SUCCESS != MarkStore::readSymbolMark<TYPE_MCA>( mcaChip, rank,
- sm ) )
+ if ( SUCCESS != MarkStore::readSymbolMark<T>( chip, rank, sm ) )
{
- PRDF_ERR( PRDF_FUNC "readSymbolMark<TYPE_MCA>(0x%08x,0x%02x) "
- "failed", mcaChip->getHuid(), rank.getKey() );
+ PRDF_ERR( PRDF_FUNC "readSymbolMark<T>(0x%08x,0x%02x) "
+ "failed", chip->getHuid(), rank.getKey() );
continue; // skip this rank
}
@@ -214,9 +234,8 @@ bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt,
if ( NULL == errl )
{
- errl = createErrl<TYPE_MCA>( PRDF_DETECTED_FAIL_HARDWARE,
- i_trgt,
- PRDFSIG_RdrRepairsUsed );
+ errl = createErrl<T>( PRDF_DETECTED_FAIL_HARDWARE,
+ i_trgt, PRDFSIG_RdrRepairsUsed );
}
std::vector<MemSymbol> symList;
@@ -246,16 +265,21 @@ bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt,
// Callout all DIMMs in the map.
for ( auto const & dimm : calloutList )
{
- __calloutDimm<TYPE_MCA>( errl, i_trgt, dimm.first );
+ bool nvdimmNoGard = false;
+ #ifdef CONFIG_NVDIMM
+ if ( isNVDIMM(dimm.first) ) nvdimmNoGard = true;
+ #endif
+
+ __calloutDimm<T>( errl, i_trgt, dimm.first, nvdimmNoGard );
}
// Commit the error log, if needed.
- commitErrl<TYPE_MCA>( errl, i_trgt );
+ commitErrl<T>( errl, i_trgt );
// Commit an additional error log indicating something failed in the
// analysis, if needed.
- commitSoftError<TYPE_MCA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt,
- PRDFSIG_RdrInternalFail, analysisErrors );
+ commitSoftError<T>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt,
+ PRDFSIG_RdrInternalFail, analysisErrors );
}while(0);
return o_calloutMade;
@@ -263,6 +287,14 @@ bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt,
#undef PRDF_FUNC
}
+
+template
+bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt,
+ uint8_t i_repairedRankMask );
+template
+bool processRepairedRanks<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt,
+ uint8_t i_repairedRankMask );
+
//------------------------------------------------------------------------------
template<>
@@ -368,7 +400,12 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt,
// Callout all DIMMs in the map.
for ( auto const & dimm : calloutList )
{
- __calloutDimm<TYPE_MBA>( errl, i_trgt, dimm.first );
+ bool nvdimmNoGard = false;
+ #ifdef CONFIG_NVDIMM
+ if ( isNVDIMM(dimm.first) ) nvdimmNoGard = true;
+ #endif
+
+ __calloutDimm<TYPE_MBA>(errl, i_trgt, dimm.first, nvdimmNoGard);
}
o_calloutMade = true;
@@ -392,10 +429,7 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt,
template<TARGETING::TYPE T>
-bool processBadDimms( TargetHandle_t i_trgt, uint8_t i_badDimmMask );
-
-template<>
-bool processBadDimms<TYPE_MCA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask )
+bool processBadDimms( TargetHandle_t i_trgt, uint8_t i_badDimmMask )
{
#define PRDF_FUNC "[processBadDimms] "
@@ -421,29 +455,35 @@ bool processBadDimms<TYPE_MCA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask )
{
if ( NULL == errl )
{
- errl = createErrl<TYPE_MCA>( PRDF_DETECTED_FAIL_HARDWARE,
- i_trgt, PRDFSIG_RdrRepairUnavail );
+ errl = createErrl<T>( PRDF_DETECTED_FAIL_HARDWARE,
+ i_trgt, PRDFSIG_RdrRepairUnavail );
}
- __calloutDimm<TYPE_MCA>( errl, i_trgt, dimm );
+ __calloutDimm<T>( errl, i_trgt, dimm );
o_calloutMade = true;
}
}
// Commit the error log, if needed.
- commitErrl<TYPE_MCA>( errl, i_trgt );
+ commitErrl<T>( errl, i_trgt );
// Commit an additional error log indicating something failed in the
// analysis, if needed.
- commitSoftError<TYPE_MCA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt,
- PRDFSIG_RdrInternalFail, analysisErrors );
+ commitSoftError<T>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt,
+ PRDFSIG_RdrInternalFail, analysisErrors );
return o_calloutMade;
#undef PRDF_FUNC
}
+template
+bool processBadDimms<TYPE_MCA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask );
+template
+bool processBadDimms<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt,
+ uint8_t i_badDimmMask );
+
//------------------------------------------------------------------------------
template<>
@@ -580,6 +620,25 @@ void deployDramSpares<TYPE_MBA>( TargetHandle_t i_trgt,
}
}
+template<>
+void deployDramSpares<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt,
+ const std::vector<MemRank> & i_ranks )
+{
+ for ( auto & rank : i_ranks )
+ {
+ MemSymbol sym = MemSymbol::fromSymbol( i_trgt, rank, 71 );
+
+ int32_t l_rc = mssSetSteerMux<TYPE_OCMB_CHIP>(i_trgt, rank, sym, false);
+ if ( SUCCESS != l_rc )
+ {
+ // mssSetSteerMux() will print a trace and commit the error log,
+ // however, we need to handle the return code or we get a compile
+ // warning in Hostboot.
+ continue;
+ }
+ }
+}
+
} // end namespace RDR
//------------------------------------------------------------------------------
@@ -680,6 +739,8 @@ template
uint32_t restoreDramRepairs<TYPE_MCA>( TargetHandle_t i_trgt );
template
uint32_t restoreDramRepairs<TYPE_MBA>( TargetHandle_t i_trgt );
+template
+uint32_t restoreDramRepairs<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt );
//------------------------------------------------------------------------------
OpenPOWER on IntegriCloud