From a5a132a1c27b5fae0ad5a3c5aaa25d7f4cecd701 Mon Sep 17 00:00:00 2001 From: Zane Shelley Date: Tue, 4 Mar 2014 17:42:07 -0600 Subject: PRD: Added callout support for ECC errors on spare DRAMs Change-Id: Ibdacb838430469463d4f68d7c5965b3b0356120c Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/9301 Tested-by: Jenkins Server Reviewed-by: Christopher T. Phan Reviewed-by: Bilicon Patil Reviewed-by: Sachin Gupta Reviewed-by: BENJAMIN J. WEISENBECK Reviewed-by: A. Patrick Williams III Reviewed-by: Zane Shelley Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/9473 --- .../framework/service/prdfPlatServices_common.C | 66 +++++++++++++++++----- .../prdf/common/plat/pegasus/prdfCenMarkstore.H | 10 +--- .../common/plat/pegasus/prdfCenMbaTdCtlr_common.C | 2 - .../prdf/common/plat/pegasus/prdfCenMemUtils.C | 35 ++++++++++++ .../diag/prdf/common/plat/pegasus/prdfCenMembuf.C | 19 +++++++ .../diag/prdf/common/plat/pegasus/prdfCenSymbol.C | 4 +- .../diag/prdf/common/plat/pegasus/prdfCenSymbol.H | 22 +++++++- .../diag/prdf/common/plat/pegasus/prdfMemoryMru.C | 11 +++- .../diag/prdf/common/plat/pegasus/prdfMemoryMru.H | 5 +- .../diag/prdf/common/plugins/prdfMemoryMruData.H | 8 ++- 10 files changed, 141 insertions(+), 41 deletions(-) (limited to 'src/usr/diag') diff --git a/src/usr/diag/prdf/common/framework/service/prdfPlatServices_common.C b/src/usr/diag/prdf/common/framework/service/prdfPlatServices_common.C index f79f197b7..0288031b1 100755 --- a/src/usr/diag/prdf/common/framework/service/prdfPlatServices_common.C +++ b/src/usr/diag/prdf/common/framework/service/prdfPlatServices_common.C @@ -413,23 +413,55 @@ int32_t mssGetMarkStore( TargetHandle_t i_mba, const CenRank & i_rank, int32_t o_rc = SUCCESS; - errlHndl_t errl = NULL; + do + { + errlHndl_t errl = NULL; + uint8_t symbolMark, chipMark; + PRD_FAPI_TO_ERRL( errl, mss_get_mark_store, getFapiTarget(i_mba), + i_rank.getMaster(), symbolMark, chipMark ); - uint8_t symbolMark, chipMark; - PRD_FAPI_TO_ERRL( errl, mss_get_mark_store, getFapiTarget(i_mba), - i_rank.getMaster(), symbolMark, chipMark ); + if ( NULL != errl ) + { + PRDF_ERR( PRDF_FUNC"mss_get_mark_store() failed. HUID: 0x%08x " + "rank: %d", getHuid(i_mba), i_rank.getMaster() ); + PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); + o_rc = FAIL; break; + } - if ( NULL != errl ) - { - PRDF_ERR( PRDF_FUNC"mss_get_mark_store() failed. HUID: 0x%08x rank: %d", - getHuid(i_mba), i_rank.getMaster() ); - PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT ); - o_rc = FAIL; - } - else - { - o_mark = CenMark( i_mba, i_rank, symbolMark, chipMark ); - } + CenSymbol sm = CenSymbol::fromSymbol( i_mba, i_rank, symbolMark ); + CenSymbol cm = CenSymbol::fromSymbol( i_mba, i_rank, chipMark ); + + // Check if the chip or symbol mark are on any of the spares. + CenSymbol sp0, sp1, ecc; + o_rc = mssGetSteerMux( i_mba, i_rank, sp0, sp1, ecc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"mssGetSteerMux() failed. HUID: 0x%08x " + "rank: %d", getHuid(i_mba), i_rank.getMaster() ); + break; + } + + if ( sp0.isValid() ) + { + if ( sp0.getDram() == sm.getDram() ) sm.setDramSpared(); + if ( sp0.getDram() == cm.getDram() ) cm.setDramSpared(); + } + + if ( sp1.isValid() ) + { + if ( sp1.getDram() == sm.getDram() ) sm.setDramSpared(); + if ( sp1.getDram() == cm.getDram() ) cm.setDramSpared(); + } + + if ( ecc.isValid() ) + { + if ( ecc.getDram() == sm.getDram() ) sm.setEccSpared(); + if ( ecc.getDram() == cm.getDram() ) cm.setEccSpared(); + } + + o_mark = CenMark( sm, cm ); + + } while (0); return o_rc; @@ -519,6 +551,10 @@ int32_t mssGetSteerMux( TargetHandle_t i_mba, const CenRank & i_rank, o_port0Spare = CenSymbol::fromSymbol( i_mba, i_rank, port0Spare ); o_port1Spare = CenSymbol::fromSymbol( i_mba, i_rank, port1Spare ); o_eccSpare = CenSymbol::fromSymbol( i_mba, i_rank, eccSpare ); + + if ( o_port0Spare.isValid() ) o_port0Spare.setDramSpared(); + if ( o_port1Spare.isValid() ) o_port1Spare.setDramSpared(); + if ( o_eccSpare.isValid() ) o_eccSpare.setEccSpared(); } return o_rc; diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMarkstore.H b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMarkstore.H index e015ceb5b..42d0752c5 100644 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMarkstore.H +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMarkstore.H @@ -5,7 +5,7 @@ /* */ /* IBM CONFIDENTIAL */ /* */ -/* COPYRIGHT International Business Machines Corp. 2013 */ +/* COPYRIGHT International Business Machines Corp. 2013,2014 */ /* */ /* p1 */ /* */ @@ -58,14 +58,6 @@ class CenMark iv_symbolMark(i_symbolMark), iv_chipMark(i_chipMark) {} - /** @brief Constructor from components */ - CenMark( TARGETING::TargetHandle_t i_mba, const CenRank & i_rank, - uint8_t i_symbolMark, uint8_t i_chipMark ) - { - iv_symbolMark = CenSymbol::fromSymbol( i_mba, i_rank, i_symbolMark ); - iv_chipMark = CenSymbol::fromSymbol( i_mba, i_rank, i_chipMark ); - } - public: // functions /** @return A symbol representing the symbol mark. */ diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C index ff116a44a..3601a6a8b 100644 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_common.C @@ -480,7 +480,6 @@ int32_t CenMbaTdCtlrCommon::handleMCE_VCM2( STEP_CODE_DATA_STRUCT & io_sc ) // call it out and set it in VPD. MemoryMru memmru ( iv_mbaTrgt, iv_rank, iv_mark.getCM() ); - memmru.setDramSpared(); io_sc.service_data->SetCallout( memmru ); io_sc.service_data->SetServiceCall(); @@ -592,7 +591,6 @@ int32_t CenMbaTdCtlrCommon::handleMCE_DSD2( STEP_CODE_DATA_STRUCT & io_sc ) // Callout spare DRAM. MemoryMru memmru ( iv_mbaTrgt, iv_rank, iv_mark.getCM() ); - memmru.setDramSpared(); io_sc.service_data->SetCallout( memmru ); // The spare DRAM is bad, so set it in VPD. At this point, the chip mark diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMemUtils.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMemUtils.C index 2a36179fc..3b09433e8 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMemUtils.C +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMemUtils.C @@ -89,6 +89,15 @@ int32_t collectCeStats( ExtensibleChip * i_mbaChip, const CenRank & i_rank, bool isX4 = isDramWidthX4(mbaTrgt); + // Get the current spares on this rank. + CenSymbol sp0, sp1, ecc; + o_rc = mssGetSteerMux( mbaTrgt, i_rank, sp0, sp1, ecc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"mssGetSteerMux() failed." ); + break; + } + // Use this map to keep track of the total counts per DRAM. typedef std::map DramCount; DramCount dramCounts; @@ -137,6 +146,21 @@ int32_t collectCeStats( ExtensibleChip * i_mbaChip, const CenRank & i_rank, } else { + // Check if this symbol is on any of the spares. + if ( ( sp0.isValid() && + (sp0.getDram() == symData.symbol.getDram()) ) || + ( sp1.isValid() && + (sp1.getDram() == symData.symbol.getDram()) ) ) + { + symData.symbol.setDramSpared(); + } + if ( ecc.isValid() && + (ecc.getDram() == symData.symbol.getDram()) ) + { + symData.symbol.setEccSpared(); + } + + // Add the symbol to the list. symData.count = count; o_maintStats.push_back( symData ); } @@ -163,6 +187,17 @@ int32_t collectCeStats( ExtensibleChip * i_mbaChip, const CenRank & i_rank, uint8_t sym = CenSymbol::dram2Symbol( highestEntry->first, isX4 ); o_highestDram = CenSymbol::fromSymbol( mbaTrgt, i_rank, sym ); + // Check if this symbol is on any of the spares. + if ( ( sp0.isValid() && (sp0.getDram() == o_highestDram.getDram()) ) || + ( sp1.isValid() && (sp1.getDram() == o_highestDram.getDram()) ) ) + { + o_highestDram.setDramSpared(); + } + if ( ecc.isValid() && (ecc.getDram() == o_highestDram.getDram()) ) + { + o_highestDram.setEccSpared(); + } + } while(0); if ( SUCCESS != o_rc ) diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C index 9924ab215..0a8a1c083 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C @@ -556,6 +556,25 @@ int32_t AnalyzeFetchNce( ExtensibleChip * i_membChip, break; } + // Check if this symbol is on any of the spares. + CenSymbol sp0, sp1, ecc; + l_rc = mssGetSteerMux( mbaTrgt, rank, sp0, sp1, ecc ); + if ( SUCCESS != l_rc ) + { + PRDF_ERR( PRDF_FUNC"mssGetSteerMux() failed. HUID: 0x%08x " + "rank: %d", getHuid(mbaTrgt), rank.getMaster() ); + break; + } + if ( (sp0.isValid() && (sp0.getDram() == symbol.getDram())) || + (sp1.isValid() && (sp1.getDram() == symbol.getDram())) ) + { + symbol.setDramSpared(); + } + if ( ecc.isValid() && (ecc.getDram() == symbol.getDram()) ) + { + symbol.setEccSpared(); + } + // Add the DIMM to the callout list MemoryMru memmru ( mbaTrgt, rank, symbol ); i_sc.service_data->SetCallout( memmru ); diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenSymbol.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenSymbol.C index 93f752571..bc4f5c172 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenSymbol.C +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenSymbol.C @@ -5,7 +5,7 @@ /* */ /* IBM CONFIDENTIAL */ /* */ -/* COPYRIGHT International Business Machines Corp. 2013 */ +/* COPYRIGHT International Business Machines Corp. 2013,2014 */ /* */ /* p1 */ /* */ @@ -293,7 +293,7 @@ int32_t CenSymbol::getSymbol( const CenRank & i_rank, WiringType i_wiringType, uint8_t i_dimmDq, uint8_t i_portSlct, uint8_t & o_symbol ) { - #define PRDF_FUNC "[CenSymbol::fromDq] " + #define PRDF_FUNC "[CenSymbol::getSymbol] " int32_t o_rc = SUCCESS; diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenSymbol.H b/src/usr/diag/prdf/common/plat/pegasus/prdfCenSymbol.H index 85859f583..d738a9671 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenSymbol.H +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenSymbol.H @@ -5,7 +5,7 @@ /* */ /* IBM CONFIDENTIAL */ /* */ -/* COPYRIGHT International Business Machines Corp. 2013 */ +/* COPYRIGHT International Business Machines Corp. 2013,2014 */ /* */ /* p1 */ /* */ @@ -73,7 +73,8 @@ class CenSymbol */ CenSymbol() : iv_mbaTarget(NULL), iv_rank(), iv_wiringType(WIRING_INVALID), - iv_symbol(SYMBOLS_PER_RANK), iv_pins(NO_SYMBOL_DQS) + iv_symbol(SYMBOLS_PER_RANK), iv_pins(NO_SYMBOL_DQS), iv_x4Dram(false), + iv_isDramSpared(false), iv_isEccSpared(false) {} private: // constructor @@ -85,7 +86,8 @@ class CenSymbol WiringType i_wiringType, uint8_t i_symbol, uint8_t i_pins, bool i_x4Dram ) : iv_mbaTarget(i_mba), iv_rank(i_rank), iv_wiringType(i_wiringType), - iv_symbol(i_symbol), iv_pins(i_pins), iv_x4Dram(i_x4Dram) + iv_symbol(i_symbol), iv_pins(i_pins), iv_x4Dram(i_x4Dram), + iv_isDramSpared(false), iv_isEccSpared(false) {} public: // functions @@ -191,6 +193,18 @@ class CenSymbol /** @return TRUE this symbol is on a x4 DRAM, FALSE otherwise. */ bool isX4Dram() const { return iv_x4Dram; } + /** @return Marks this symbol as steered to a DRAM spare. */ + void setDramSpared() { iv_isDramSpared = true; } + + /** @return Marks this symbol as steered to the ECC spare. */ + void setEccSpared() { iv_isEccSpared = true; } + + /** @return TRUE this symbol is on a DRAM spare, FALSE otherwise. */ + bool isDramSpared() const { return iv_isDramSpared; } + + /** @return TRUE this symbol is on the ECC spare, FALSE otherwise. */ + bool isEccSpared() const { return iv_isEccSpared; } + /** @return The symbol of the given Centaur DQ and port select. */ static uint8_t cenDq2Symbol( uint8_t i_CenDq, uint8_t i_ps ); @@ -256,6 +270,8 @@ class CenSymbol uint8_t iv_symbol; ///< This symbol's numerical value. uint8_t iv_pins; ///< See enum DqMask. bool iv_x4Dram; ///< TRUE x4 DRAM, FALSE x8 DRAM. + bool iv_isDramSpared; ///< TRUE if symbol resides on DRAM spare. + bool iv_isEccSpared; ///< TRUE if symbol resides on ECC spare. }; } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfMemoryMru.C b/src/usr/diag/prdf/common/plat/pegasus/prdfMemoryMru.C index fff1d3146..fa3b84187 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfMemoryMru.C +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfMemoryMru.C @@ -5,7 +5,7 @@ /* */ /* IBM CONFIDENTIAL */ /* */ -/* COPYRIGHT International Business Machines Corp. 2008,2013 */ +/* COPYRIGHT International Business Machines Corp. 2008,2014 */ /* */ /* p1 */ /* */ @@ -133,10 +133,13 @@ MemoryMru::MemoryMru( uint32_t i_memMru ) : break; } + if ( iv_memMruMeld.s.dramSpared ) iv_symbol.setDramSpared(); + if ( iv_memMruMeld.s.eccSpared ) iv_symbol.setEccSpared(); + // Validation checks CenSymbol::WiringType type = iv_symbol.getWiringType(); - if ( type != CenSymbol::WiringType (iv_memMruMeld.s.wiringType)) + if ( type != CenSymbol::WiringType(iv_memMruMeld.s.wiringType) ) { PRDF_ERR( PRDF_FUNC"Wiring Type does not match type:%u " "iv_memMruMeld.s.wiringType :%u", @@ -201,7 +204,8 @@ MemoryMru::MemoryMru( TARGETING::TargetHandle_t i_mbaTarget, iv_memMruMeld.s.srankValid = iv_rank.isSlaveValid(); iv_memMruMeld.s.symbol = iv_symbol.getSymbol(); iv_memMruMeld.s.pins = iv_symbol.getPins(); - iv_memMruMeld.s.dramSpared = 0; // manually set by setDramSpared() + iv_memMruMeld.s.dramSpared = iv_symbol.isDramSpared() ? 1 : 0; + iv_memMruMeld.s.eccSpared = iv_symbol.isEccSpared() ? 1 : 0; iv_memMruMeld.s.wiringType = iv_symbol.getWiringType(); // If the code gets to this point the MemoryMru is valid. @@ -306,6 +310,7 @@ TargetHandleList MemoryMru::getCalloutList() const { // Add DIMM represented by symbol uint8_t ps = iv_symbol.getPortSlct(); + if ( iv_memMruMeld.s.eccSpared ) ps = 1; // Adjust for ECC spare o_list = CalloutUtil::getConnectedDimms( iv_mbaTarget, iv_rank, ps ); } diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfMemoryMru.H b/src/usr/diag/prdf/common/plat/pegasus/prdfMemoryMru.H index 448649385..d38b39083 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfMemoryMru.H +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfMemoryMru.H @@ -5,7 +5,7 @@ /* */ /* IBM CONFIDENTIAL */ /* */ -/* COPYRIGHT International Business Machines Corp. 2008,2013 */ +/* COPYRIGHT International Business Machines Corp. 2008,2014 */ /* */ /* p1 */ /* */ @@ -71,9 +71,6 @@ class MemoryMru MemoryMru( TARGETING::TargetHandle_t i_mbaTarget, const CenRank & i_rank, MemoryMruData::Callout i_specialCallout ); - /** @brief Indicates that the symbol actually resides on the DRAM spare. */ - void setDramSpared() { iv_memMruMeld.s.dramSpared = 1; } - public: // functions /** @return The 32-bit representation of this MemoryMru. */ diff --git a/src/usr/diag/prdf/common/plugins/prdfMemoryMruData.H b/src/usr/diag/prdf/common/plugins/prdfMemoryMruData.H index 92a3631ef..dd76fb177 100644 --- a/src/usr/diag/prdf/common/plugins/prdfMemoryMruData.H +++ b/src/usr/diag/prdf/common/plugins/prdfMemoryMruData.H @@ -5,7 +5,7 @@ /* */ /* IBM CONFIDENTIAL */ /* */ -/* COPYRIGHT International Business Machines Corp. 2013 */ +/* COPYRIGHT International Business Machines Corp. 2013,2014 */ /* */ /* p1 */ /* */ @@ -99,8 +99,9 @@ union MemMruMeld uint32_t dramSpared : 1; ///< True if symbol is on spared DRAM uint32_t symbol : 7; ///< Symbol or SpecialCallout + uint32_t eccSpared : 1; ///< True if symbol is on ECC DRAM // TODO: RTC 67376 Check if width for wiring type is enough - uint32_t wiringType : 4; ///< Wiring type + uint32_t wiringType : 3; ///< Wiring type uint32_t srankValid : 1; ///< TRUE if slave rank is valid uint32_t srank : 3; ///< Slave rank (0-7) #else @@ -108,7 +109,8 @@ union MemMruMeld // simulator. uint32_t srank : 3; uint32_t srankValid : 1; - uint32_t wiringType : 4; + uint32_t wiringType : 3; + uint32_t eccSpared : 1; uint32_t symbol : 7; uint32_t dramSpared : 1; uint32_t mrank : 3; -- cgit v1.2.1