diff options
-rw-r--r-- | src/include/usr/diag/prdf/prdfMain_ipl.H | 13 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/mem/prdfP9McaExtraSig.H | 19 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C (renamed from src/usr/diag/prdf/plat/pegasus/prdfDramRepairs.C) | 356 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdf_plat_mem_hb_only.mk | 1 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices_ipl.C | 52 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices_ipl.H | 11 |
6 files changed, 386 insertions, 66 deletions
diff --git a/src/include/usr/diag/prdf/prdfMain_ipl.H b/src/include/usr/diag/prdf/prdfMain_ipl.H index 96971a33d..933a3d47f 100644 --- a/src/include/usr/diag/prdf/prdfMain_ipl.H +++ b/src/include/usr/diag/prdf/prdfMain_ipl.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2014,2015 */ +/* Contributors Listed Below - COPYRIGHT 2014,2017 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -41,12 +41,19 @@ namespace PRDF { /** + * @brief Initialize PRD system model and data (No System Lock) + * @return Error log if error occurs + */ +extern errlHndl_t noLock_initialize(); + +/** * @brief Restores hardware DRAM repairs to reflect what is stored in VPD. - * @param i_mba An MBA target. + * @param i_trgt An MBA or MCA target. * @return Non-SUCCESS if conditions are such that a callout had to be made, * SUCCESS otherwise. */ -extern int32_t restoreDramRepairs( const TARGETING::TargetHandle_t i_mba ); +template<TARGETING::TYPE T> +extern uint32_t restoreDramRepairs( const TARGETING::TargetHandle_t i_trgt ); /** * @brief Analyzes IPL CE statistics during MNFG IPL diff --git a/src/usr/diag/prdf/common/plat/mem/prdfP9McaExtraSig.H b/src/usr/diag/prdf/common/plat/mem/prdfP9McaExtraSig.H index 9ea48f0f2..aa90e83d7 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfP9McaExtraSig.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfP9McaExtraSig.H @@ -49,6 +49,20 @@ PRDR_ERROR_SIGNATURE(VcmFalseAlarm, 0xffff0021, "", "VCM: false alarm"); PRDR_ERROR_SIGNATURE(VcmFalseAlarmTH, 0xffff0022, "", "VCM: false alarm threshold"); PRDR_ERROR_SIGNATURE(AllDramRepairs, 0xffff002F, "", "all DRAM repairs used"); +PRDR_ERROR_SIGNATURE(RdrInternalFail, 0xffff0040, "", "RDR: Internal failure"); +PRDR_ERROR_SIGNATURE(RdrInvalidConfig, 0xffff0041, "", "RDR: Invalid config"); +PRDR_ERROR_SIGNATURE(RdrScreenBadDqs, 0xffff0042, "", + "RDR: DRAM repairs disabled and VPD found"); +PRDR_ERROR_SIGNATURE(RdrRepairsUsed, 0xffff0043, "", + "RDR: All repairs used"); +PRDR_ERROR_SIGNATURE(RdrRepairUnavail, 0xffff0044, "", + "RDR: Repairs needed but unavailable"); + +PRDR_ERROR_SIGNATURE(MnfgIplHardCE, 0xffff0051, "", "MNFG IPL hard CE"); +PRDR_ERROR_SIGNATURE(MnfgIplDramCTE, 0xffff0052, "", "MNFG IPL DRAM CTE"); +PRDR_ERROR_SIGNATURE(MnfgIplRankCTE, 0xffff0053, "", "MNFG IPL rank CTE"); +PRDR_ERROR_SIGNATURE(MnfgIplDsCTE, 0xffff0054, "", "MNFG IPL DIMM CTE"); + PRDR_ERROR_SIGNATURE(TpsFalseAlarm, 0xffff0061, "", "TPS: false alarm"); PRDR_ERROR_SIGNATURE(TpsFalseAlarmTH, 0xffff0062, "", "TPS: false alarm threshold"); PRDR_ERROR_SIGNATURE(TpsSymbolMark, 0xffff0063, "", "TPS: symbol mark placed"); @@ -58,11 +72,6 @@ PRDR_ERROR_SIGNATURE(TpsChipUeRisk, 0xffff0066, "", "TPS: placing chip mark ri PRDR_ERROR_SIGNATURE(TpsPotentialUe, 0xffff0067, "", "TPS: potential UE"); PRDR_ERROR_SIGNATURE(TpsDramDisabled, 0xffff0068, "", "TPS: DRAM repairs disabled"); -PRDR_ERROR_SIGNATURE(MnfgIplHardCE, 0xffff0051, "", "MNFG IPL hard CE"); -PRDR_ERROR_SIGNATURE(MnfgIplDramCTE, 0xffff0052, "", "MNFG IPL DRAM CTE"); -PRDR_ERROR_SIGNATURE(MnfgIplRankCTE, 0xffff0053, "", "MNFG IPL rank CTE"); -PRDR_ERROR_SIGNATURE(MnfgIplDsCTE, 0xffff0054, "", "MNFG IPL DIMM CTE"); - PRDR_ERROR_SIGNATURE(MnfgDramCte, 0xffff0070, "", "MNFG per DRAM CTE"); PRDR_ERROR_SIGNATURE(MnfgRankCte, 0xffff0071, "", "MNFG per rank CTE"); PRDR_ERROR_SIGNATURE(MnfgDimmCte, 0xffff0072, "", "MNFG per DIMM CTE"); diff --git a/src/usr/diag/prdf/plat/pegasus/prdfDramRepairs.C b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C index 4d337f4ad..5e7c6bffb 100644 --- a/src/usr/diag/prdf/plat/pegasus/prdfDramRepairs.C +++ b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C @@ -1,11 +1,11 @@ /* IBM_PROLOG_BEGIN_TAG */ /* This is an automatically generated prolog. */ /* */ -/* $Source: src/usr/diag/prdf/plat/pegasus/prdfDramRepairs.C $ */ +/* $Source: src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C $ */ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2013,2017 */ +/* Contributors Listed Below - COPYRIGHT 2012,2017 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -26,20 +26,20 @@ /** @file prdfDramRepairs.C */ #include <diag/prdf/prdfMain.H> -#include <diag/prdf/common/prdf_service_codes.H> +#include <prdf_service_codes.H> #include "common/iipconst.h" +#include <iipSystem.h> #include <prdfGlobal.H> #include <prdfTrace.H> #include <prdfErrlUtil.H> #include "common/prdfEnums.H" -#include "common/plat/pegasus/prdfCenMbaCaptureData.H" -#include "common/plat/pegasus/prdfCalloutUtil.H" -#include "common/plat/pegasus/prdfCenDqBitmap.H" -#include "common/plat/pegasus/prdfCenMarkstore.H" -#include "common/plat/pegasus/prdfCenMbaExtraSig.H" -#include "common/plat/pegasus/prdfCenSymbol.H" -#include "common/plat/pegasus/prdfMemoryMru.H" -#include "framework/service/prdfPlatServices.H" +#include "common/plat/mem/prdfMemCaptureData.H" +#include "common/plat/mem/prdfMemDqBitmap.H" +#include "common/plat/mem/prdfMemMark.H" +#include "common/plat/mem/prdfP9McaExtraSig.H" +#include "common/plat/mem/prdfMemSymbol.H" +#include "common/plat/mem/prdfMemoryMru.H" +#include <prdfPlatServices.H> using namespace HWAS; using namespace std; @@ -54,10 +54,10 @@ namespace RDR // local utility functions to support PRDF::restoreDramRepairs() { // Creates and returns an error log. -errlHndl_t createErrl( uint32_t i_reasonCode, TargetHandle_t i_mba, +errlHndl_t createErrl( uint32_t i_reasonCode, TargetHandle_t i_trgt, uint32_t i_signature ) { - uint64_t userdata12 = PRDF_GET_UINT64_FROM_UINT32( getHuid(i_mba), 0 ); + uint64_t userdata12 = PRDF_GET_UINT64_FROM_UINT32( getHuid(i_trgt), 0 ); uint64_t userdata34 = PRDF_GET_UINT64_FROM_UINT32( i_signature, 0 ); // Note that the error log tags are not needed because PRD uses its own @@ -75,12 +75,13 @@ errlHndl_t createErrl( uint32_t i_reasonCode, TargetHandle_t i_mba, // If an error log is given, will add DRAM repairs FFDC and traces to error log, // then commit the error log. -void commitErrl( errlHndl_t i_errl, TargetHandle_t i_mba ) +template<TARGETING::TYPE T> +void commitErrl( errlHndl_t i_errl, TargetHandle_t i_trgt ) { if ( NULL != i_errl ) { // Add capture data - CenMbaCaptureData::addEccData( i_mba, i_errl ); + MemCaptureData::addEccData<T>( i_trgt, i_errl ); // Add traces i_errl->collectTrace( PRDF_COMP_NAME, 512 ); @@ -94,20 +95,134 @@ void commitErrl( errlHndl_t i_errl, TargetHandle_t i_mba ) // If there were analysis errors, will create and commit an error log with 2nd // level support callout. -void commitSoftError( uint32_t i_reasonCode, TargetHandle_t i_mba, +template<TARGETING::TYPE T> +void commitSoftError( uint32_t i_reasonCode, TargetHandle_t i_trgt, uint32_t i_signature, bool i_analysisErrors ) { if ( i_analysisErrors ) { - errlHndl_t errl = createErrl( i_reasonCode, i_mba, i_signature ); - errl->addProcedureCallout( LEVEL2_SUPPORT, MRU_HIGH); - commitErrl( errl, i_mba ); + errlHndl_t errl = createErrl( i_reasonCode, i_trgt, i_signature ); + errl->addProcedureCallout( HWAS::EPUB_PRC_LVL_SUPP, + HWAS::SRCI_PRIORITY_HIGH ); + commitErrl<T>( errl, i_trgt ); } } //------------------------------------------------------------------------------ -bool processRepairedRanks( TargetHandle_t i_mba, uint8_t i_repairedRankMask ) +template<TARGETING::TYPE T> +bool processRepairedRanks( TargetHandle_t i_trgt, uint8_t i_repairedRankMask ); + +template<> +bool processRepairedRanks<TYPE_MCA>( TargetHandle_t i_trgt, + uint8_t i_repairedRankMask ) +{ + #define PRDF_FUNC "[processRepairedRanks] " + + // The bits in i_repairedRankMask represent ranks that have repairs. Query + // hardware and compare against RAS policies. + + bool o_calloutMade = false; + bool analysisErrors = false; + + errlHndl_t errl = NULL; // Initially NULL, will create if needed. + + do + { + if ( (false == g_initialized) || (nullptr == systemPtr) ) + { + errl = noLock_initialize(); + if ( nullptr != errl ) + { + PRDF_ERR( PRDF_FUNC "Failed to initialize PRD" ); + break; + } + } + + ExtensibleChip * mcaChip = (ExtensibleChip *)systemPtr->GetChip(i_trgt); + + for ( uint8_t r = 0; r < MASTER_RANKS_PER_PORT; ++r ) + { + if ( 0 == (i_repairedRankMask & (0x80 >> r)) ) + { + continue; // this rank didn't have any repairs + } + + MemRank rank ( r ); + + MemMark cm; + if ( SUCCESS != MarkStore::readChipMark<TYPE_MCA>( mcaChip, rank, + cm ) ) + { + PRDF_ERR( PRDF_FUNC "readChipMark<TYPE_MCA>(0x%08x,0x%02x) " + "failed", mcaChip->getHuid(), rank.getKey() ); + continue; // skip this rank + } + + MemMark sm; + if ( SUCCESS != MarkStore::readSymbolMark<TYPE_MCA>( mcaChip, rank, + sm ) ) + { + PRDF_ERR( PRDF_FUNC "readSymbolMark<TYPE_MCA>(0x%08x,0x%02x) " + "failed", mcaChip->getHuid(), rank.getKey() ); + continue; // skip this rank + } + + if ( cm.isValid() && sm.isValid() ) // CM and SM used + { + // All repairs on the rank have been used. Callout all repairs. + + if ( NULL == errl ) + { + errl = createErrl( PRDF_DETECTED_FAIL_HARDWARE, i_trgt, + PRDFSIG_RdrRepairsUsed ); + } + + std::vector<MemSymbol> symList; + symList.push_back( cm.getSymbol() ); + symList.push_back( sm.getSymbol() ); + + for ( auto & sym : symList ) + { + if ( !sym.isValid() ) continue; + + MemoryMru mm( i_trgt, rank, sym ); + + // Add all parts to the error log. + for ( auto & part : mm.getCalloutList() ) + { + errl->addHwCallout( part, HWAS::SRCI_PRIORITY_HIGH, + HWAS::DELAYED_DECONFIG, + HWAS::GARD_Predictive ); + } + + // Add the MemoryMru to the capture data. + MemCaptureData::addExtMemMruData( mm, errl ); + } + + o_calloutMade = true; + } + } + + // Commit the error log, if needed. + commitErrl<TYPE_MCA>( errl, i_trgt ); + + // Commit an additional error log indicating something failed in the + // analysis, if needed. + commitSoftError<TYPE_MCA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, + PRDFSIG_RdrInternalFail, analysisErrors ); + }while(0); + + return o_calloutMade; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<> +bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt, + uint8_t i_repairedRankMask ) { #define PRDF_FUNC "[processRepairedRanks] " @@ -115,6 +230,8 @@ bool processRepairedRanks( TargetHandle_t i_mba, uint8_t i_repairedRankMask ) // hardware and compare against RAS policies. bool o_calloutMade = false; + + /* TODO RTC 178743 bool analysisErrors = false; errlHndl_t errl = NULL; // Initially NULL, will create if needed. @@ -212,6 +329,7 @@ bool processRepairedRanks( TargetHandle_t i_mba, uint8_t i_repairedRankMask ) // analysis, if needed. commitSoftError( PRDF_DETECTED_FAIL_SOFTWARE, i_mba, PRDFSIG_RdrInternalFail, analysisErrors ); + */ return o_calloutMade; @@ -220,7 +338,12 @@ bool processRepairedRanks( TargetHandle_t i_mba, uint8_t i_repairedRankMask ) //------------------------------------------------------------------------------ -bool processBadDimms( TargetHandle_t i_mba, uint8_t i_badDimmMask ) + +template<TARGETING::TYPE T> +bool processBadDimms( TargetHandle_t i_trgt, uint8_t i_badDimmMask ); + +template<> +bool processBadDimms<TYPE_MCA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask ) { #define PRDF_FUNC "[processBadDimms] " @@ -232,6 +355,61 @@ bool processBadDimms( TargetHandle_t i_mba, uint8_t i_badDimmMask ) errlHndl_t errl = NULL; // Initially NULL, will create if needed. + // Iterate the list of all DIMMs + TargetHandleList dimms = getConnected( i_trgt, TYPE_DIMM ); + for ( auto & i : dimms ) + { + uint8_t dimm = getTargetPosition( i ) % MAX_DIMM_PER_PORT; + + // i_badDimmMask is defined as a 2-bit mask where a bit set means that + // DIMM had more bad bits than could be repaired. + uint8_t mask = 0x2 >> dimm; + + if ( 0 != (i_badDimmMask & mask) ) + { + if ( NULL == errl ) + { + errl = createErrl( PRDF_DETECTED_FAIL_HARDWARE, i_trgt, + PRDFSIG_RdrRepairUnavail ); + } + + o_calloutMade = true; + errl->addHwCallout( i, HWAS::SRCI_PRIORITY_HIGH, + HWAS::DELAYED_DECONFIG, + HWAS::GARD_Predictive ); + } + } + + // Commit the error log, if needed. + commitErrl<TYPE_MCA>( errl, i_trgt ); + + // Commit an additional error log indicating something failed in the + // analysis, if needed. + commitSoftError<TYPE_MCA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, + PRDFSIG_RdrInternalFail, analysisErrors ); + + return o_calloutMade; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<> +bool processBadDimms<TYPE_MBA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask ) +{ + #define PRDF_FUNC "[processBadDimms] " + + // The bits in i_badDimmMask represent DIMMs that have exceeded the + // available repairs. Callout these DIMMs. + + bool o_calloutMade = false; + + /* TODO RTC 178743 + bool analysisErrors = false; + + errlHndl_t errl = NULL; // Initially NULL, will create if needed. + // Iterate the list of all DIMMs be TargetHandleList dimms = getConnected( i_mba, TYPE_DIMM ); for ( TargetHandleList::iterator i = dimms.begin(); i < dimms.end(); i++ ) @@ -276,6 +454,7 @@ bool processBadDimms( TargetHandle_t i_mba, uint8_t i_badDimmMask ) // analysis, if needed. commitSoftError( PRDF_DETECTED_FAIL_SOFTWARE, i_mba, PRDFSIG_RdrInternalFail, analysisErrors ); + */ return o_calloutMade; @@ -284,28 +463,28 @@ bool processBadDimms( TargetHandle_t i_mba, uint8_t i_badDimmMask ) //------------------------------------------------------------------------------ -bool screenBadDqs( TargetHandle_t i_mba, const std::vector<CenRank> & i_ranks ) +template<DIMMS_PER_RANK T> +bool screenBadDqs( TargetHandle_t i_trgt, const std::vector<MemRank> & i_ranks ) { - #define PRDF_FUNC "[screenBadDqs] " + #define PRDF_FUNC "[screenBadDqs<T>] " // Callout any attached DIMMs that have any bad DQs. bool o_calloutMade = false; bool analysisErrors = false; - for ( std::vector<CenRank>::const_iterator rank = i_ranks.begin(); - rank != i_ranks.end(); rank++ ) + for ( auto & rank : i_ranks ) { // The HW procedure to read the bad DQ attribute will callout the DIMM // if it has DRAM Repairs VPD and the DISABLE_DRAM_REPAIRS MNFG policy // flag is set. PRD will simply need to iterate through all the ranks // to ensure all DIMMs are screen and the procedure will do the rest. - CenDqBitmap bitmap; - if ( SUCCESS != getBadDqBitmap(i_mba, *rank, bitmap, true) ) + MemDqBitmap<T> bitmap; + if ( SUCCESS != getBadDqBitmap<T>(i_trgt, rank, bitmap) ) { - PRDF_ERR( PRDF_FUNC "getBadDqBitmap() failed: MBA=0x%08x rank=%d", - getHuid(i_mba), rank->getMaster() ); + PRDF_ERR( PRDF_FUNC "getBadDqBitmap() failed: TRGT=0x%08x " + "rank=0x%02x", getHuid(i_trgt), rank.getKey() ); analysisErrors = true; continue; // skip this rank } @@ -313,8 +492,16 @@ bool screenBadDqs( TargetHandle_t i_mba, const std::vector<CenRank> & i_ranks ) // Commit an additional error log indicating something failed in the // analysis, if needed. - commitSoftError( PRDF_DETECTED_FAIL_SOFTWARE, i_mba, - PRDFSIG_RdrInternalFail, analysisErrors ); + if ( DIMMS_PER_RANK::MBA == T ) + { + commitSoftError<TYPE_MBA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, + PRDFSIG_RdrInternalFail, analysisErrors ); + } + else + { + commitSoftError<TYPE_MCA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, + PRDFSIG_RdrInternalFail, analysisErrors ); + } return o_calloutMade; @@ -326,6 +513,7 @@ bool screenBadDqs( TargetHandle_t i_mba, const std::vector<CenRank> & i_ranks ) void deployDramSpares( TargetHandle_t i_mba, const std::vector<CenRank> & i_ranks ) { + /* TODO RTC 178743 bool x4 = isDramWidthX4(i_mba); bool cenDimm = false; @@ -364,6 +552,7 @@ void deployDramSpares( TargetHandle_t i_mba, continue; } } + */ } } // end namespace RDR @@ -372,11 +561,15 @@ void deployDramSpares( TargetHandle_t i_mba, // External functions - declared in prdfMain.H //------------------------------------------------------------------------------ -int32_t restoreDramRepairs( TargetHandle_t i_mba ) +template<TARGETING::TYPE T> +uint32_t restoreDramRepairs( TargetHandle_t i_trgt ); + +template<> +uint32_t restoreDramRepairs<TYPE_MCA>( TargetHandle_t i_trgt ) { - #define PRDF_FUNC "PRDF::restoreDramRepairs" + #define PRDF_FUNC "PRDF::restoreDramRepairs<TYPE_MCA>" - PRDF_ENTER( PRDF_FUNC "(0x%08x)", getHuid(i_mba) ); + PRDF_ENTER( PRDF_FUNC "(0x%08x)", getHuid(i_trgt) ); // will unlock when going out of scope PRDF_SYSTEM_SCOPELOCK; @@ -385,13 +578,88 @@ int32_t restoreDramRepairs( TargetHandle_t i_mba ) do { + std::vector<MemRank> ranks; + getMasterRanks<TYPE_MCA>( i_trgt, ranks ); + + if ( areDramRepairsDisabled() ) + { + // DRAM Repairs are disabled in MNFG mode, so screen all DIMMs with + // VPD information. + if ( RDR::screenBadDqs<DIMMS_PER_RANK::MCA>(i_trgt, ranks) ) + calloutMade = true; + + // No need to continue because there will not be anything to + // restore. + break; + } + + uint8_t rankMask = 0, dimmMask = 0; + if ( SUCCESS != mssRestoreDramRepairs<TYPE_MCA>(i_trgt, rankMask, + dimmMask) ) + { + // Can't check anything if this doesn't work. + PRDF_ERR( "[" PRDF_FUNC "] mssRestoreDramRepairs() failed" ); + break; + } + + // Callout DIMMs with too many bad bits and not enough repairs available + if ( RDR::processBadDimms<TYPE_MCA>(i_trgt, dimmMask) ) + { + // Clear VPD after callout of ISDIMMs + uint8_t data[DIMMS_PER_RANK::MCA][DQ_BITMAP::BITMAP_SIZE]; + memset( data, 0x00, sizeof(data) ); + for ( auto & rank : ranks ) + { + MemDqBitmap<DIMMS_PER_RANK::MCA> dqBitmap( i_trgt, rank, data ); + if ( SUCCESS != setBadDqBitmap<DIMMS_PER_RANK::MCA>( i_trgt, + rank, dqBitmap ) ) + { + PRDF_ERR( PRDF_FUNC "setBadDqBitmap<DIMMS_PER_RANK::MCA>" + "(0x%08x,0x%02x) failed.", getHuid(i_trgt), + rank.getKey() ); + continue; + } + } + calloutMade = true; + } + + // Check repaired ranks for RAS policy violations. + if ( RDR::processRepairedRanks<TYPE_MCA>(i_trgt, rankMask) ) + calloutMade = true; + + } while(0); + + PRDF_EXIT( PRDF_FUNC "(0x%08x)", getHuid(i_trgt) ); + + return calloutMade ? FAIL : SUCCESS; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<> +uint32_t restoreDramRepairs<TYPE_MBA>( TargetHandle_t i_trgt ) +{ + #define PRDF_FUNC "PRDF::restoreDramRepairs<TYPE_MBA>" + + PRDF_ENTER( PRDF_FUNC "(0x%08x)", getHuid(i_trgt) ); + + // will unlock when going out of scope + PRDF_SYSTEM_SCOPELOCK; + + bool calloutMade = false; + + /* TODO RTC 178743 + do + { std::vector<CenRank> ranks; - int32_t l_rc = getMasterRanks( i_mba, ranks ); + int32_t l_rc = getMasterRanks( i_trgt, ranks ); if ( SUCCESS != l_rc ) { PRDF_ERR( "[" PRDF_FUNC "] getMasterRanks() failed" ); - RDR::commitSoftError( PRDF_DETECTED_FAIL_SOFTWARE, i_mba, + RDR::commitSoftError( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt, PRDFSIG_RdrInternalFail, true ); break; // Assume user meant to disable DRAM repairs. @@ -402,14 +670,14 @@ int32_t restoreDramRepairs( TargetHandle_t i_mba ) if ( spareDramDeploy ) { // Deploy all spares for MNFG corner tests. - RDR::deployDramSpares( i_mba, ranks ); + RDR::deployDramSpares( i_trgt, ranks ); } if ( areDramRepairsDisabled() ) { // DRAM Repairs are disabled in MNFG mode, so screen all DIMMs with // VPD information. - if ( RDR::screenBadDqs(i_mba, ranks) ) calloutMade = true; + if ( RDR::screenBadDqs(i_trgt, ranks) ) calloutMade = true; // No need to continue because there will not be anything to // restore. @@ -424,14 +692,14 @@ int32_t restoreDramRepairs( TargetHandle_t i_mba ) PRDF_ERR( "[" PRDF_FUNC "] MNFG spare deploy enabled, but DRAM " "repairs are not disabled" ); - RDR::commitSoftError( PRDF_INVALID_CONFIG, i_mba, + RDR::commitSoftError( PRDF_INVALID_CONFIG, i_trgt, PRDFSIG_RdrInvalidConfig, true ); break; // Assume user meant to disable DRAM repairs. } uint8_t rankMask = 0, dimmMask = 0; - if ( SUCCESS != mssRestoreDramRepairs(i_mba, rankMask, dimmMask) ) + if ( SUCCESS != mssRestoreDramRepairs(i_trgt, rankMask, dimmMask) ) { // Can't check anything if this doesn't work. PRDF_ERR( "[" PRDF_FUNC "] mssRestoreDramRepairs() failed" ); @@ -439,19 +707,21 @@ int32_t restoreDramRepairs( TargetHandle_t i_mba ) } // Callout DIMMs with too many bad bits and not enough repairs available - if ( RDR::processBadDimms(i_mba, dimmMask) ) calloutMade = true; + if ( RDR::processBadDimms(i_trgt, dimmMask) ) calloutMade = true; // Check repaired ranks for RAS policy violations. - if ( RDR::processRepairedRanks(i_mba, rankMask) ) calloutMade = true; + if ( RDR::processRepairedRanks(i_trgt, rankMask) ) calloutMade = true; } while(0); + */ - PRDF_EXIT( PRDF_FUNC "(0x%08x)", getHuid(i_mba) ); + PRDF_EXIT( PRDF_FUNC "(0x%08x)", getHuid(i_trgt) ); return calloutMade ? FAIL : SUCCESS; #undef PRDF_FUNC } + } // end namespace PRDF diff --git a/src/usr/diag/prdf/plat/mem/prdf_plat_mem_hb_only.mk b/src/usr/diag/prdf/plat/mem/prdf_plat_mem_hb_only.mk index cfc4b0056..a89ec90f6 100644 --- a/src/usr/diag/prdf/plat/mem/prdf_plat_mem_hb_only.mk +++ b/src/usr/diag/prdf/plat/mem/prdf_plat_mem_hb_only.mk @@ -58,6 +58,7 @@ prd_obj += prdfMemTps_ipl.o prd_obj += prdfMemVcm_ipl.o prd_obj += prdfP9McbistDomain.o prd_obj += prdfMemIplCeStats.o +prd_obj += prdfRestoreDramRepairs.o endif diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C index 4f8747a8c..843c4875a 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C @@ -37,7 +37,7 @@ #include <prdfErrlUtil.H> #include <prdfTrace.H> -//#include <prdfCenDqBitmap.H> TODO RTC 164707 +#include <prdfMemDqBitmap.H> #include <prdfMemScrubUtils.H> #include <prdfMfgThresholdMgr.H> @@ -154,31 +154,65 @@ bool rcdParityErrorReconfigLoop( TargetHandle_t i_trgt ) //------------------------------------------------------------------------------ -/* TODO RTC 164707 -int32_t mssRestoreDramRepairs( TargetHandle_t i_mbaTarget, - uint8_t & o_repairedRankMask, - uint8_t & o_badDimmMask ) +template<> +uint32_t mssRestoreDramRepairs<TYPE_MCA>( TargetHandle_t i_target, + uint8_t & o_repairedRankMask, + uint8_t & o_badDimmMask ) { - int32_t o_rc = SUCCESS; + uint32_t o_rc = SUCCESS; + + errlHndl_t errl = NULL; + + + fapi2::buffer<uint8_t> tmpRepairedRankMask, tmpBadDimmMask; + FAPI_INVOKE_HWP( errl, mss::restore_repairs, + fapi2::Target<fapi2::TARGET_TYPE_MCA>( i_target ), + tmpRepairedRankMask, tmpBadDimmMask ); + + if ( NULL != errl ) + { + PRDF_ERR( "[PlatServices::mssRestoreDramRepairs] " + "restore_repairs() failed. HUID: 0x%08x", + getHuid(i_target) ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; + } + + o_repairedRankMask = (uint8_t)tmpRepairedRankMask; + o_badDimmMask = (uint8_t)tmpBadDimmMask; + + return o_rc; +} + +//------------------------------------------------------------------------------ +template<> +uint32_t mssRestoreDramRepairs<TYPE_MBA>( TargetHandle_t i_target, + uint8_t & o_repairedRankMask, + uint8_t & o_badDimmMask ) +{ + uint32_t o_rc = SUCCESS; + + /* TODO RTC 178743 errlHndl_t errl = NULL; FAPI_INVOKE_HWP( errl, mss_restore_DRAM_repairs, - fapi::Target(fapi::TARGET_TYPE_MBA_CHIPLET, i_mbaTarget), + fapi::Target(fapi::TARGET_TYPE_MCA_CHIPLET, i_target), o_repairedRankMask, o_badDimmMask ); if ( NULL != errl ) { PRDF_ERR( "[PlatServices::mssRestoreDramRepairs] " "mss_restore_dram_repairs() failed. HUID: 0x%08x", - getHuid(i_mbaTarget) ); + getHuid(i_target) ); PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); o_rc = FAIL; } + */ return o_rc; } -*/ + //------------------------------------------------------------------------------ diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.H b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.H index 2dcdc628c..f6dd933d0 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.H +++ b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.H @@ -73,16 +73,15 @@ bool rcdParityErrorReconfigLoop( TARGETING::TargetHandle_t i_trgt ); /** * @brief Invokes the restore DRAM repairs hardware procedure. - * @param i_mbaTarget + * @param i_target * @param o_repairedRankMask An encoded bitmask of repaired ranks. * @param o_badDimm An encoded bitmask of bad DIMMs. * @return Non-SUCCESS in internal function fails, SUCCESS otherwise. */ -/* TODO RTC 164707 -int32_t mssRestoreDramRepairs( TARGETING::TargetHandle_t i_mbaTarget, - uint8_t & o_repairedRankMask, - uint8_t & o_badDimmMask ); -*/ +template<TARGETING::TYPE T> +uint32_t mssRestoreDramRepairs( TARGETING::TargetHandle_t i_target, + uint8_t & o_repairedRankMask, + uint8_t & o_badDimmMask ); /** * @brief Invokes the mss_IPL_UE_isolation hardware procedure. |