diff options
author | Caleb Palmer <cnpalmer@us.ibm.com> | 2018-04-17 09:18:37 -0500 |
---|---|---|
committer | Zane C. Shelley <zshelle@us.ibm.com> | 2018-05-04 22:26:25 -0400 |
commit | 3d5c1c541bae81970b97160272e76e3772a75864 (patch) | |
tree | cdb403e39d27e67a8842ca3f5234096466d82299 /src/usr/diag | |
parent | 453283ebfde2d76bd999af0edc1c5b16cf836773 (diff) | |
download | talos-hostboot-3d5c1c541bae81970b97160272e76e3772a75864.tar.gz talos-hostboot-3d5c1c541bae81970b97160272e76e3772a75864.zip |
PRD: Update restoreDramRepairs for Centaur
Change-Id: Iccd21642ce64e3f5d366170cb18837096012e6d0
RTC: 178743
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/57511
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58326
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr/diag')
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C | 298 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices_ipl.C | 4 |
2 files changed, 129 insertions, 173 deletions
diff --git a/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C index 3ad9878bc..c32dd52a7 100644 --- a/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C +++ b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C @@ -282,7 +282,6 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt, bool o_calloutMade = false; - /* TODO RTC 178743 bool analysisErrors = false; errlHndl_t errl = NULL; // Initially NULL, will create if needed. @@ -297,19 +296,30 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt, continue; // this rank didn't have any repairs } - CenRank rank ( r ); - CenMark mark; + MemRank rank ( r ); + MemMark chipMark, symMark; - if ( SUCCESS != mssGetMarkStore(i_trgt, rank, mark) ) + ExtensibleChip * chip = (ExtensibleChip *)systemPtr->GetChip( i_trgt ); + + if (SUCCESS != MarkStore::readChipMark<TYPE_MBA>(chip, rank, chipMark)) { - PRDF_ERR( PRDF_FUNC "mssGetMarkStore() failed: MBA=0x%08x " - "rank=%d", getHuid(i_trgt), rank.getMaster() ); + PRDF_ERR( PRDF_FUNC "readChipMark() failed: MBA=0x%08x " + "rank=0x%02x", getHuid(i_trgt), rank.getKey() ); + analysisErrors = true; + continue; // skip this rank + } + + if (SUCCESS != MarkStore::readSymbolMark<TYPE_MBA>(chip, rank, symMark)) + { + PRDF_ERR( PRDF_FUNC "readSymbolMark() failed: MBA=0x%08x " + "rank=0x%02x", getHuid(i_trgt), rank.getKey() ); analysisErrors = true; continue; // skip this rank } - CenSymbol sp0, sp1, ecc; + MemSymbol sp0, sp1, ecc; + /* TODO RTC 189221 DRAM sparing if ( SUCCESS != mssGetSteerMux(i_trgt, rank, sp0, sp1, ecc) ) { PRDF_ERR( PRDF_FUNC "mssGetSteerMux() failed: MBA=0x%08x " @@ -317,9 +327,10 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt, analysisErrors = true; continue; // skip this rank } + */ - bool isCm = mark.getCM().isValid(); // chip mark - bool isSm = mark.getSM().isValid(); // symbol mark + bool isCm = chipMark.isValid(); // chip mark + bool isSm = symMark.isValid(); // symbol mark bool isSp = (sp0.isValid() || sp1.isValid()); // either DRAM spare bool isEcc = ecc.isValid(); // ECC spare @@ -332,33 +343,41 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt, if ( NULL == errl ) { errl = createErrl<TYPE_MBA>( PRDF_DETECTED_FAIL_HARDWARE, - i_trgt, - PRDFSIG_RdrRepairsUsed ); + i_trgt, PRDFSIG_RdrRepairsUsed ); } - std::vector<CenSymbol> list; - list.push_back( mark.getCM() ); - list.push_back( mark.getSM() ); - list.push_back( sp0 ); - list.push_back( sp1 ); - list.push_back( ecc ); - - for ( std::vector<CenSymbol>::iterator it = list.begin(); - it != list.end(); it++ ) + // Keep a list of DIMMs to callout. Note that we are using a map + // with the DIMM target as the key so that we can maintain a + // unique list. The map value has no significance. + std::map<TargetHandle_t, uint32_t> calloutList; + std::vector<MemSymbol> symList; + symList.push_back( chipMark.getSymbol() ); + symList.push_back( symMark.getSymbol() ); + symList.push_back( sp0 ); + symList.push_back( sp1 ); + symList.push_back( ecc ); + + for ( auto & sym : symList ) { - if ( !it->isValid() ) continue; + if ( !sym.isValid() ) continue; + + MemoryMru mm( i_trgt, rank, sym ); // Add all parts to the error log. - TargetHandleList partList = i_memmru.getCalloutList(); - for ( auto &part : partList ) + for ( auto & dimm : mm.getCalloutList() ) { - errl->addHwCallout( part, MRU_HIGH, - HWAS::DELAYED_DECONFIG, - HWAS::GARD_Predictive ); + calloutList[dimm] = 1; } // Add the MemoryMru to the capture data. - MemCaptureData::addExtMemMruData( i_memmru, errl ); + MemCaptureData::addExtMemMruData( mm, errl ); + } + + // Callout all DIMMs in the map. + for ( auto const & dimm : calloutList ) + { + __calloutDimm<TYPE_MBA, DIMMS_PER_RANK::MBA>( errl, i_trgt, + dimm.first ); } o_calloutMade = true; @@ -366,13 +385,12 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt, } // Commit the error log, if needed. - commitErrl( errl, i_trgt ); + commitErrl<TYPE_MBA>( errl, i_trgt ); // Commit an additional error log indicating something failed in the // analysis, if needed. - commitSoftError( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, - PRDFSIG_RdrInternalFail, analysisErrors ); - */ + commitSoftError<TYPE_MBA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, + PRDFSIG_RdrInternalFail, analysisErrors ); return o_calloutMade; @@ -446,57 +464,41 @@ bool processBadDimms<TYPE_MBA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask ) // available repairs. Callout these DIMMs. bool o_calloutMade = false; - - /* TODO RTC 178743 bool analysisErrors = false; - errlHndl_t errl = NULL; // Initially NULL, will create if needed. + errlHndl_t errl = nullptr; // Initially NULL, will create if needed. // Iterate the list of all DIMMs be - TargetHandleList dimms = getConnected( i_mba, TYPE_DIMM ); - for ( TargetHandleList::iterator i = dimms.begin(); i < dimms.end(); i++ ) + TargetHandleList dimms = getConnected( i_trgt, TYPE_DIMM ); + for ( auto & dimm : dimms ) { - uint8_t port = 0, dimm = 0; - - if ( SUCCESS != getMbaPort(*i, port) ) - { - PRDF_ERR( PRDF_FUNC "getMbaPort() failed: DIMM=0x%08x", getHuid(*i)); - analysisErrors = true; - continue; // skip this dimm - } - - if ( SUCCESS != getMbaDimm(*i, dimm) ) - { - PRDF_ERR( PRDF_FUNC "getMbaDimm() failed: DIMM=0x%08x", getHuid(*i)); - analysisErrors = true; - continue; // skip this dimm - } + uint8_t portSlct = getDimmPort<TYPE_MBA>( dimm ); + uint8_t dimmSlct = getDimmSlct<TYPE_MBA>( dimm ); // The 4 bits of i_badDimmMask is defined as p0d0, p0d1, p1d0, and p1d1. - uint8_t mask = 0x8 >> (port * MBA_DIMMS_PER_RANK + dimm); + uint8_t mask = 0x8 >> (portSlct * MBA_DIMMS_PER_RANK + dimmSlct); if ( 0 != (i_badDimmMask & mask) ) { if ( NULL == errl ) { - errl = createErrl<TYPE_MBA>( PRDF_DETECTED_FAIL_HARDWARE, i_mba, - PRDFSIG_RdrRepairUnavail ); + errl = createErrl<TYPE_MBA>( PRDF_DETECTED_FAIL_HARDWARE, + i_trgt, PRDFSIG_RdrRepairUnavail ); } + __calloutDimm<TYPE_MBA, DIMMS_PER_RANK::MBA>( errl, i_trgt, dimm ); + o_calloutMade = true; - errl->addHwCallout( *i, MRU_HIGH, HWAS::DELAYED_DECONFIG, - HWAS::GARD_Predictive ); } } // Commit the error log, if needed. - commitErrl( errl, i_mba ); + commitErrl<TYPE_MBA>( errl, i_trgt ); // Commit an additional error log indicating something failed in the // analysis, if needed. - commitSoftError( PRDF_DETECTED_FAIL_SOFTWARE, i_mba, - PRDFSIG_RdrInternalFail, analysisErrors ); - */ + commitSoftError<TYPE_MBA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, + PRDFSIG_RdrInternalFail, analysisErrors ); return o_calloutMade; @@ -505,7 +507,26 @@ bool processBadDimms<TYPE_MBA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask ) //------------------------------------------------------------------------------ -template<DIMMS_PER_RANK T> +template<TARGETING::TYPE> +int32_t __readBadDqBitmap( TargetHandle_t i_trgt, MemRank i_rank ); + +template<> +int32_t __readBadDqBitmap<TYPE_MCA>( TargetHandle_t i_trgt, MemRank i_rank ) +{ + MemDqBitmap<DIMMS_PER_RANK::MCA> bitmap; + return getBadDqBitmap<DIMMS_PER_RANK::MCA>( i_trgt, i_rank, bitmap ); +} + +template<> +int32_t __readBadDqBitmap<TYPE_MBA>( TargetHandle_t i_trgt, MemRank i_rank ) +{ + MemDqBitmap<DIMMS_PER_RANK::MBA> bitmap; + return getBadDqBitmap<DIMMS_PER_RANK::MBA>( i_trgt, i_rank, bitmap ); +} + +//------------------------------------------------------------------------------ + +template<TARGETING::TYPE T> bool screenBadDqs( TargetHandle_t i_trgt, const std::vector<MemRank> & i_ranks ) { #define PRDF_FUNC "[screenBadDqs<T>] " @@ -522,10 +543,9 @@ bool screenBadDqs( TargetHandle_t i_trgt, const std::vector<MemRank> & i_ranks ) // flag is set. PRD will simply need to iterate through all the ranks // to ensure all DIMMs are screen and the procedure will do the rest. - MemDqBitmap<T> bitmap; - if ( SUCCESS != getBadDqBitmap<T>(i_trgt, rank, bitmap) ) + if ( SUCCESS != __readBadDqBitmap<T>(i_trgt, rank) ) { - PRDF_ERR( PRDF_FUNC "getBadDqBitmap() failed: TRGT=0x%08x " + PRDF_ERR( PRDF_FUNC "__readBadDqBitmap() failed: TRGT=0x%08x " "rank=0x%02x", getHuid(i_trgt), rank.getKey() ); analysisErrors = true; continue; // skip this rank @@ -534,16 +554,8 @@ bool screenBadDqs( TargetHandle_t i_trgt, const std::vector<MemRank> & i_ranks ) // Commit an additional error log indicating something failed in the // analysis, if needed. - if ( DIMMS_PER_RANK::MBA == T ) - { - commitSoftError<TYPE_MBA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, - PRDFSIG_RdrInternalFail, analysisErrors ); - } - else - { - commitSoftError<TYPE_MCA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, - PRDFSIG_RdrInternalFail, analysisErrors ); - } + commitSoftError<T>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, + PRDFSIG_RdrInternalFail, analysisErrors ); return o_calloutMade; @@ -552,33 +564,41 @@ bool screenBadDqs( TargetHandle_t i_trgt, const std::vector<MemRank> & i_ranks ) //------------------------------------------------------------------------------ -void deployDramSpares( TargetHandle_t i_mba, - const std::vector<CenRank> & i_ranks ) +template<TARGETING::TYPE> +void deployDramSpares( TargetHandle_t i_trgt, + const std::vector<MemRank> & i_ranks ); + +template<> +void deployDramSpares<TYPE_MCA>( TargetHandle_t i_trgt, + const std::vector<MemRank> & i_ranks ){} +template<> +void deployDramSpares<TYPE_MBA>( TargetHandle_t i_trgt, + const std::vector<MemRank> & i_ranks ) { - /* TODO RTC 178743 - bool x4 = isDramWidthX4(i_mba); + PRDF_TRAC( "deployDramSpares: Function not implemented yet" ); + /* TODO RTC 189221 + bool x4 = isDramWidthX4( i_trgt ); bool cenDimm = isMembufOnDimm<TYPE_MBA>( i_mba ); - for ( std::vector<CenRank>::const_iterator rank = i_ranks.begin(); - rank != i_ranks.end(); rank++ ) + for ( auto & rank : i_ranks ) { // Doesn't matter which DRAM is spared as long as they are all spared. // Also, make sure the ECC spare is on a different DRAM than the spare // DRAM. - CenSymbol symPort0 = CenSymbol::fromSymbol( i_mba, *rank, 71 ); - CenSymbol symPort1 = CenSymbol::fromSymbol( i_mba, *rank, 53 ); - CenSymbol symEccSp = CenSymbol::fromSymbol( i_mba, *rank, 67 ); + MemSymbol symPort0 = MemSymbol::fromSymbol( i_trgt, rank, 71 ); + MemSymbol symPort1 = MemSymbol::fromSymbol( i_trgt, rank, 53 ); + MemSymbol symEccSp = MemSymbol::fromSymbol( i_trgt, rank, 67 ); int32_t l_rc = SUCCESS; if ( cenDimm ) { - l_rc |= mssSetSteerMux( i_mba, *rank, symPort0, false ); - l_rc |= mssSetSteerMux( i_mba, *rank, symPort1, false ); + l_rc |= mssSetSteerMux( i_trgt, rank, symPort0, false ); + l_rc |= mssSetSteerMux( i_trgt, rank, symPort1, false ); } if ( x4 ) - l_rc |= mssSetSteerMux( i_mba, *rank, symEccSp, true ); + l_rc |= mssSetSteerMux( i_trgt, rank, symEccSp, true ); if ( SUCCESS != l_rc ) { @@ -587,8 +607,7 @@ void deployDramSpares( TargetHandle_t i_mba, // warning in Hostboot. continue; } - } - */ + }*/ } } // end namespace RDR @@ -598,12 +617,9 @@ void deployDramSpares( TargetHandle_t i_mba, //------------------------------------------------------------------------------ template<TARGETING::TYPE T> -uint32_t restoreDramRepairs( TargetHandle_t i_trgt ); - -template<> -uint32_t restoreDramRepairs<TYPE_MCA>( TargetHandle_t i_trgt ) +uint32_t restoreDramRepairs( TargetHandle_t i_trgt ) { - #define PRDF_FUNC "PRDF::restoreDramRepairs<TYPE_MCA>" + #define PRDF_FUNC "PRDF::restoreDramRepairs<T>" PRDF_ENTER( PRDF_FUNC "(0x%08x)", getHuid(i_trgt) ); @@ -622,94 +638,28 @@ uint32_t restoreDramRepairs<TYPE_MCA>( TargetHandle_t i_trgt ) if ( nullptr != errl ) { PRDF_ERR( PRDF_FUNC "Failed to initialize PRD" ); - RDR::commitErrl<TYPE_MCA>( errl, i_trgt ); + RDR::commitErrl<T>( errl, i_trgt ); break; } } std::vector<MemRank> ranks; - getMasterRanks<TYPE_MCA>( i_trgt, ranks ); - - if ( areDramRepairsDisabled() ) - { - // DRAM Repairs are disabled in MNFG mode, so screen all DIMMs with - // VPD information. - if ( RDR::screenBadDqs<DIMMS_PER_RANK::MCA>(i_trgt, ranks) ) - calloutMade = true; - - // No need to continue because there will not be anything to - // restore. - break; - } - - uint8_t rankMask = 0, dimmMask = 0; - if ( SUCCESS != mssRestoreDramRepairs<TYPE_MCA>(i_trgt, rankMask, - dimmMask) ) - { - // Can't check anything if this doesn't work. - PRDF_ERR( "[" PRDF_FUNC "] mssRestoreDramRepairs() failed" ); - break; - } - - // Callout DIMMs with too many bad bits and not enough repairs available - if ( RDR::processBadDimms<TYPE_MCA>(i_trgt, dimmMask) ) - calloutMade = true; - - // Check repaired ranks for RAS policy violations. - if ( RDR::processRepairedRanks<TYPE_MCA>(i_trgt, rankMask) ) - calloutMade = true; - - } while(0); - - PRDF_EXIT( PRDF_FUNC "(0x%08x)", getHuid(i_trgt) ); - - return calloutMade ? FAIL : SUCCESS; - - #undef PRDF_FUNC -} - -//------------------------------------------------------------------------------ - -template<> -uint32_t restoreDramRepairs<TYPE_MBA>( TargetHandle_t i_trgt ) -{ - #define PRDF_FUNC "PRDF::restoreDramRepairs<TYPE_MBA>" - - PRDF_ENTER( PRDF_FUNC "(0x%08x)", getHuid(i_trgt) ); - - // will unlock when going out of scope - PRDF_SYSTEM_SCOPELOCK; - - bool calloutMade = false; - - /* TODO RTC 178743 - do - { - std::vector<CenRank> ranks; - int32_t l_rc = getMasterRanks( i_trgt, ranks ); - if ( SUCCESS != l_rc ) - { - PRDF_ERR( "[" PRDF_FUNC "] getMasterRanks() failed" ); - - RDR::commitSoftError( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, - PRDFSIG_RdrInternalFail, true ); - - break; // Assume user meant to disable DRAM repairs. - } + getMasterRanks<T>( i_trgt, ranks ); bool spareDramDeploy = mnfgSpareDramDeploy(); if ( spareDramDeploy ) { // Deploy all spares for MNFG corner tests. - RDR::deployDramSpares( i_trgt, ranks ); + RDR::deployDramSpares<T>( i_trgt, ranks ); } if ( areDramRepairsDisabled() ) { // DRAM Repairs are disabled in MNFG mode, so screen all DIMMs with // VPD information. - if ( RDR::screenBadDqs(i_trgt, ranks) ) calloutMade = true; + if ( RDR::screenBadDqs<T>(i_trgt, ranks) ) + calloutMade = true; // No need to continue because there will not be anything to // restore. @@ -724,14 +674,15 @@ uint32_t restoreDramRepairs<TYPE_MBA>( TargetHandle_t i_trgt ) PRDF_ERR( "[" PRDF_FUNC "] MNFG spare deploy enabled, but DRAM " "repairs are not disabled" ); - RDR::commitSoftError( PRDF_INVALID_CONFIG, i_trgt, - PRDFSIG_RdrInvalidConfig, true ); + RDR::commitSoftError<T>( PRDF_INVALID_CONFIG, i_trgt, + PRDFSIG_RdrInvalidConfig, true ); break; // Assume user meant to disable DRAM repairs. } uint8_t rankMask = 0, dimmMask = 0; - if ( SUCCESS != mssRestoreDramRepairs(i_trgt, rankMask, dimmMask) ) + if ( SUCCESS != mssRestoreDramRepairs<T>( i_trgt, rankMask, + dimmMask) ) { // Can't check anything if this doesn't work. PRDF_ERR( "[" PRDF_FUNC "] mssRestoreDramRepairs() failed" ); @@ -739,13 +690,14 @@ uint32_t restoreDramRepairs<TYPE_MBA>( TargetHandle_t i_trgt ) } // Callout DIMMs with too many bad bits and not enough repairs available - if ( RDR::processBadDimms(i_trgt, dimmMask) ) calloutMade = true; + if ( RDR::processBadDimms<T>(i_trgt, dimmMask) ) + calloutMade = true; // Check repaired ranks for RAS policy violations. - if ( RDR::processRepairedRanks(i_trgt, rankMask) ) calloutMade = true; + if ( RDR::processRepairedRanks<T>(i_trgt, rankMask) ) + calloutMade = true; } while(0); - */ PRDF_EXIT( PRDF_FUNC "(0x%08x)", getHuid(i_trgt) ); @@ -754,6 +706,12 @@ uint32_t restoreDramRepairs<TYPE_MBA>( TargetHandle_t i_trgt ) #undef PRDF_FUNC } +template +uint32_t restoreDramRepairs<TYPE_MCA>( TargetHandle_t i_trgt ); +template +uint32_t restoreDramRepairs<TYPE_MBA>( TargetHandle_t i_trgt ); + +//------------------------------------------------------------------------------ } // end namespace PRDF diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C index 3982e17f5..d3c0729a9 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C @@ -188,11 +188,10 @@ uint32_t mssRestoreDramRepairs<TYPE_MBA>( TargetHandle_t i_target, { uint32_t o_rc = SUCCESS; - /* TODO RTC 178743 errlHndl_t errl = NULL; FAPI_INVOKE_HWP( errl, mss_restore_DRAM_repairs, - fapi::Target(fapi::TARGET_TYPE_MCA_CHIPLET, i_target), + fapi2::Target<fapi2::TARGET_TYPE_MBA>( i_target ), o_repairedRankMask, o_badDimmMask ); if ( NULL != errl ) @@ -203,7 +202,6 @@ uint32_t mssRestoreDramRepairs<TYPE_MBA>( TargetHandle_t i_target, PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); o_rc = FAIL; } - */ return o_rc; } |