diff options
Diffstat (limited to 'src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C')
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C | 147 |
1 files changed, 104 insertions, 43 deletions
diff --git a/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C index ef3a143eb..fc389000a 100644 --- a/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C +++ b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C @@ -99,7 +99,7 @@ void commitErrl( errlHndl_t i_errl, TargetHandle_t i_trgt ) template<TARGETING::TYPE T> void __calloutDimm( errlHndl_t & io_errl, TargetHandle_t i_portTrgt, - TargetHandle_t i_dimmTrgt ) + TargetHandle_t i_dimmTrgt, bool i_nvdimmNoGard = false ) { #define PRDF_FUNC "[RDR::__calloutDimm] " @@ -109,9 +109,31 @@ void __calloutDimm( errlHndl_t & io_errl, TargetHandle_t i_portTrgt, PRDF_ASSERT( nullptr != i_dimmTrgt ); PRDF_ASSERT( TYPE_DIMM == getTargetType(i_dimmTrgt) ); - // Callout the DIMM. + HWAS::DeconfigEnum deconfigPolicy = HWAS::DELAYED_DECONFIG; + HWAS::GARD_ErrorType gardPolicy = HWAS::GARD_Predictive; + + #ifdef CONFIG_NVDIMM + // For the "RDR: All repairs used" case, If the DIMM is an NVDIMM, change + // the gard and deconfig options to no gard/deconfig and call + // nvdimmNotifyProtChange to indicate a save/restore may work. + if ( i_nvdimmNoGard ) + { + deconfigPolicy = HWAS::NO_DECONFIG; + gardPolicy = HWAS::GARD_NULL; + + uint32_t l_rc = PlatServices::nvdimmNotifyProtChange( i_dimmTrgt, + NVDIMM::NVDIMM_RISKY_HW_ERROR ); + if ( SUCCESS != l_rc ) + { + PRDF_TRAC( PRDF_FUNC "nvdimmNotifyProtChange(0x%08x) " + "failed.", PlatServices::getHuid(i_dimmTrgt) ); + } + } + #endif + io_errl->addHwCallout( i_dimmTrgt, HWAS::SRCI_PRIORITY_HIGH, - HWAS::DELAYED_DECONFIG, HWAS::GARD_Predictive ); + deconfigPolicy, gardPolicy ); + // Clear the VPD on this DIMM. The DIMM has been garded, but it is possible // the customer will want to ungard the DIMM. Without clearing the VPD, the @@ -120,16 +142,20 @@ void __calloutDimm( errlHndl_t & io_errl, TargetHandle_t i_portTrgt, // customer takes the risk of ungarding the DIMM (that they should replace), // the repairs will need to be rediscovered. - std::vector<MemRank> ranks; - getMasterRanks<T>( i_portTrgt, ranks, getDimmSlct(i_dimmTrgt) ); - - for ( auto & rank : ranks ) + // Do not clear the VPD if we had an NVDIMM that we avoided garding. + if ( !i_nvdimmNoGard ) { - if ( SUCCESS != clearBadDqBitmap(i_portTrgt, rank) ) + std::vector<MemRank> ranks; + getMasterRanks<T>( i_portTrgt, ranks, getDimmSlct(i_dimmTrgt) ); + + for ( auto & rank : ranks ) { - PRDF_ERR( PRDF_FUNC "clearBadDqBitmap(0x%08x,0x%02x) failed", - getHuid(i_portTrgt), rank.getKey() ); - continue; + if ( SUCCESS != clearBadDqBitmap(i_portTrgt, rank) ) + { + PRDF_ERR( PRDF_FUNC "clearBadDqBitmap(0x%08x,0x%02x) failed", + getHuid(i_portTrgt), rank.getKey() ); + continue; + } } } @@ -156,11 +182,7 @@ void commitSoftError( uint32_t i_reasonCode, TargetHandle_t i_trgt, //------------------------------------------------------------------------------ template<TARGETING::TYPE T> -bool processRepairedRanks( TargetHandle_t i_trgt, uint8_t i_repairedRankMask ); - -template<> -bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt, - uint8_t i_repairedRankMask ) +bool processRepairedRanks( TargetHandle_t i_trgt, uint8_t i_repairedRankMask ) { #define PRDF_FUNC "[processRepairedRanks] " @@ -179,7 +201,7 @@ bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt, // map value has no significance. std::map<TargetHandle_t, uint32_t> calloutList; - ExtensibleChip * mcaChip = (ExtensibleChip *)systemPtr->GetChip(i_trgt); + ExtensibleChip * chip = (ExtensibleChip *)systemPtr->GetChip(i_trgt); for ( uint8_t r = 0; r < MASTER_RANKS_PER_PORT; ++r ) { @@ -191,20 +213,18 @@ bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt, MemRank rank ( r ); MemMark cm; - if ( SUCCESS != MarkStore::readChipMark<TYPE_MCA>( mcaChip, rank, - cm ) ) + if ( SUCCESS != MarkStore::readChipMark<T>( chip, rank, cm ) ) { - PRDF_ERR( PRDF_FUNC "readChipMark<TYPE_MCA>(0x%08x,0x%02x) " - "failed", mcaChip->getHuid(), rank.getKey() ); + PRDF_ERR( PRDF_FUNC "readChipMark<T>(0x%08x,0x%02x) " + "failed", chip->getHuid(), rank.getKey() ); continue; // skip this rank } MemMark sm; - if ( SUCCESS != MarkStore::readSymbolMark<TYPE_MCA>( mcaChip, rank, - sm ) ) + if ( SUCCESS != MarkStore::readSymbolMark<T>( chip, rank, sm ) ) { - PRDF_ERR( PRDF_FUNC "readSymbolMark<TYPE_MCA>(0x%08x,0x%02x) " - "failed", mcaChip->getHuid(), rank.getKey() ); + PRDF_ERR( PRDF_FUNC "readSymbolMark<T>(0x%08x,0x%02x) " + "failed", chip->getHuid(), rank.getKey() ); continue; // skip this rank } @@ -214,9 +234,8 @@ bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt, if ( NULL == errl ) { - errl = createErrl<TYPE_MCA>( PRDF_DETECTED_FAIL_HARDWARE, - i_trgt, - PRDFSIG_RdrRepairsUsed ); + errl = createErrl<T>( PRDF_DETECTED_FAIL_HARDWARE, + i_trgt, PRDFSIG_RdrRepairsUsed ); } std::vector<MemSymbol> symList; @@ -246,16 +265,21 @@ bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt, // Callout all DIMMs in the map. for ( auto const & dimm : calloutList ) { - __calloutDimm<TYPE_MCA>( errl, i_trgt, dimm.first ); + bool nvdimmNoGard = false; + #ifdef CONFIG_NVDIMM + if ( isNVDIMM(dimm.first) ) nvdimmNoGard = true; + #endif + + __calloutDimm<T>( errl, i_trgt, dimm.first, nvdimmNoGard ); } // Commit the error log, if needed. - commitErrl<TYPE_MCA>( errl, i_trgt ); + commitErrl<T>( errl, i_trgt ); // Commit an additional error log indicating something failed in the // analysis, if needed. - commitSoftError<TYPE_MCA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, - PRDFSIG_RdrInternalFail, analysisErrors ); + commitSoftError<T>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, + PRDFSIG_RdrInternalFail, analysisErrors ); }while(0); return o_calloutMade; @@ -263,6 +287,14 @@ bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt, #undef PRDF_FUNC } + +template +bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt, + uint8_t i_repairedRankMask ); +template +bool processRepairedRanks<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, + uint8_t i_repairedRankMask ); + //------------------------------------------------------------------------------ template<> @@ -368,7 +400,12 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt, // Callout all DIMMs in the map. for ( auto const & dimm : calloutList ) { - __calloutDimm<TYPE_MBA>( errl, i_trgt, dimm.first ); + bool nvdimmNoGard = false; + #ifdef CONFIG_NVDIMM + if ( isNVDIMM(dimm.first) ) nvdimmNoGard = true; + #endif + + __calloutDimm<TYPE_MBA>(errl, i_trgt, dimm.first, nvdimmNoGard); } o_calloutMade = true; @@ -392,10 +429,7 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt, template<TARGETING::TYPE T> -bool processBadDimms( TargetHandle_t i_trgt, uint8_t i_badDimmMask ); - -template<> -bool processBadDimms<TYPE_MCA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask ) +bool processBadDimms( TargetHandle_t i_trgt, uint8_t i_badDimmMask ) { #define PRDF_FUNC "[processBadDimms] " @@ -421,29 +455,35 @@ bool processBadDimms<TYPE_MCA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask ) { if ( NULL == errl ) { - errl = createErrl<TYPE_MCA>( PRDF_DETECTED_FAIL_HARDWARE, - i_trgt, PRDFSIG_RdrRepairUnavail ); + errl = createErrl<T>( PRDF_DETECTED_FAIL_HARDWARE, + i_trgt, PRDFSIG_RdrRepairUnavail ); } - __calloutDimm<TYPE_MCA>( errl, i_trgt, dimm ); + __calloutDimm<T>( errl, i_trgt, dimm ); o_calloutMade = true; } } // Commit the error log, if needed. - commitErrl<TYPE_MCA>( errl, i_trgt ); + commitErrl<T>( errl, i_trgt ); // Commit an additional error log indicating something failed in the // analysis, if needed. - commitSoftError<TYPE_MCA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, - PRDFSIG_RdrInternalFail, analysisErrors ); + commitSoftError<T>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, + PRDFSIG_RdrInternalFail, analysisErrors ); return o_calloutMade; #undef PRDF_FUNC } +template +bool processBadDimms<TYPE_MCA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask ); +template +bool processBadDimms<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, + uint8_t i_badDimmMask ); + //------------------------------------------------------------------------------ template<> @@ -580,6 +620,25 @@ void deployDramSpares<TYPE_MBA>( TargetHandle_t i_trgt, } } +template<> +void deployDramSpares<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt, + const std::vector<MemRank> & i_ranks ) +{ + for ( auto & rank : i_ranks ) + { + MemSymbol sym = MemSymbol::fromSymbol( i_trgt, rank, 71 ); + + int32_t l_rc = mssSetSteerMux<TYPE_OCMB_CHIP>(i_trgt, rank, sym, false); + if ( SUCCESS != l_rc ) + { + // mssSetSteerMux() will print a trace and commit the error log, + // however, we need to handle the return code or we get a compile + // warning in Hostboot. + continue; + } + } +} + } // end namespace RDR //------------------------------------------------------------------------------ @@ -680,6 +739,8 @@ template uint32_t restoreDramRepairs<TYPE_MCA>( TargetHandle_t i_trgt ); template uint32_t restoreDramRepairs<TYPE_MBA>( TargetHandle_t i_trgt ); +template +uint32_t restoreDramRepairs<TYPE_OCMB_CHIP>( TargetHandle_t i_trgt ); //------------------------------------------------------------------------------ |