diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C | 126 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H | 20 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H | 5 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfMemVcm.C | 214 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfMemVcm.H | 13 |
5 files changed, 334 insertions, 44 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C index 4efb13f4e..20c3eca3e 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C @@ -212,6 +212,45 @@ int32_t MemDqBitmap<T>::setDram( const MemSymbol & i_symbol, uint8_t i_pins ) //------------------------------------------------------------------------------ template <DIMMS_PER_RANK T> +uint32_t MemDqBitmap<T>::clearDram( const MemSymbol & i_symbol, uint8_t i_pins ) +{ + #define PRDF_FUNC "[MemDqBitmap::clearDram] " + + int32_t o_rc = SUCCESS; + + do + { + uint8_t portSlct, byteIdx, bitIdx; + o_rc = getPortByteBitIdx( i_symbol, portSlct, byteIdx, bitIdx ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getPortByteBitIdx() failed" ); + break; + } + + if ( iv_x4Dram ) + { + i_pins &= 0xf; // limit to 4 bits + uint32_t shift = (DQS_PER_BYTE-1) - bitIdx; + shift = (shift / DQS_PER_NIBBLE) * DQS_PER_NIBBLE; // 0,4 + iv_data[portSlct][byteIdx] &= ~(i_pins << shift); + } + else + { + i_pins &= 0xff; // limit to 8 bits + iv_data[portSlct][byteIdx] &= ~(i_pins); + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template <DIMMS_PER_RANK T> void MemDqBitmap<T>::getCaptureData( CaptureData & o_cd ) const { uint8_t rank = iv_rank.getMaster(); @@ -654,26 +693,24 @@ template class MemDqBitmap<DIMMS_PER_RANK::MBA>; // Utility Functions //############################################################################## -template<> -uint32_t setDramInVpd<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip, - const MemRank & i_rank, - MemSymbol i_symbol ) +template<TARGETING::TYPE T, DIMMS_PER_RANK D> +uint32_t __setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, + MemSymbol i_symbol ) { - #define PRDF_FUNC "[MemDqBitmap::setDramInVpd] " + #define PRDF_FUNC "[MemDqBitmap::__setDramInVpd] " uint32_t o_rc = SUCCESS; do { + TARGETING::TargetHandle_t trgt = i_chip->getTrgt(); - TARGETING::TargetHandle_t mcaTrgt = i_chip->getTrgt(); - - MemDqBitmap<DIMMS_PER_RANK::MCA> dqBitmap; - o_rc = getBadDqBitmap<DIMMS_PER_RANK::MCA>( mcaTrgt, i_rank, dqBitmap ); + MemDqBitmap<D> dqBitmap; + o_rc = getBadDqBitmap<D>( trgt, i_rank, dqBitmap ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "getBadDqBitmap<DIMMS_PER_RANK::MCA>(0x%08x, " - "0x%02x) failed.", getHuid(mcaTrgt), i_rank.getKey() ); + PRDF_ERR( PRDF_FUNC "getBadDqBitmap(0x%08x, 0x%02x) failed.", + getHuid(trgt), i_rank.getKey() ); break; } @@ -684,11 +721,11 @@ uint32_t setDramInVpd<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip, break; } - o_rc = setBadDqBitmap<DIMMS_PER_RANK::MCA>( mcaTrgt, i_rank, dqBitmap ); + o_rc = setBadDqBitmap<D>( trgt, i_rank, dqBitmap ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "setBadDqBitmap<DIMMS_PER_RANK::MCA>(0x%08x, " - "0x%02x) failed.", getHuid(mcaTrgt), i_rank.getKey() ); + PRDF_ERR( PRDF_FUNC "setBadDqBitmap(0x%08x, 0x%02x) failed.", + getHuid(trgt), i_rank.getKey() ); break; } }while(0); @@ -698,43 +735,60 @@ uint32_t setDramInVpd<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } -//------------------------------------------------------------------------------ template<> uint32_t setDramInVpd<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, MemSymbol i_symbol ) { - #define PRDF_FUNC "[MemDqBitmap::setDramInVpd] " + return __setDramInVpd<TARGETING::TYPE_MBA, DIMMS_PER_RANK::MBA>(i_chip, + i_rank, i_symbol); +} + +template<> +uint32_t setDramInVpd<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MemSymbol i_symbol ) +{ + return __setDramInVpd<TARGETING::TYPE_MCA, DIMMS_PER_RANK::MCA>(i_chip, + i_rank, i_symbol); +} + +//------------------------------------------------------------------------------ + +template<TARGETING::TYPE T, DIMMS_PER_RANK D> +uint32_t __clearDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, + MemSymbol i_symbol ) +{ + #define PRDF_FUNC "[MemDqBitmap::__clearDramInVpd] " uint32_t o_rc = SUCCESS; do { + TARGETING::TargetHandle_t trgt = i_chip->getTrgt(); - TARGETING::TargetHandle_t mbaTrgt = i_chip->getTrgt(); - - MemDqBitmap<DIMMS_PER_RANK::MBA> dqBitmap; - o_rc = getBadDqBitmap<DIMMS_PER_RANK::MBA>( mbaTrgt, i_rank, dqBitmap ); + MemDqBitmap<D> dqBitmap; + o_rc = getBadDqBitmap<D>( trgt, i_rank, dqBitmap ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "getBadDqBitmap<DIMMS_PER_RANK::MBA>(0x%08x, " - "0x%02x) failed.", getHuid(mbaTrgt), i_rank.getKey() ); + PRDF_ERR( PRDF_FUNC "getBadDqBitmap(0x%08x, 0x%02x) failed.", + getHuid(trgt), i_rank.getKey() ); break; } - o_rc = dqBitmap.setDram( i_symbol ); + o_rc = dqBitmap.clearDram( i_symbol ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "setDram() failed." ); + PRDF_ERR( PRDF_FUNC "clearDram() failed." ); break; } - o_rc = setBadDqBitmap<DIMMS_PER_RANK::MBA>( mbaTrgt, i_rank, dqBitmap ); + o_rc = setBadDqBitmap<D>( trgt, i_rank, dqBitmap ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "setBadDqBitmap<DIMMS_PER_RANK::MBA>(0x%08x, " - "0x%02x) failed.", getHuid(mbaTrgt), i_rank.getKey() ); + PRDF_ERR( PRDF_FUNC "setBadDqBitmap(0x%08x, 0x%02x) failed.", + getHuid(trgt), i_rank.getKey() ); break; } }while(0); @@ -744,6 +798,24 @@ uint32_t setDramInVpd<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +template<> +uint32_t clearDramInVpd<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MemSymbol i_symbol ) +{ + return __clearDramInVpd<TARGETING::TYPE_MCA, DIMMS_PER_RANK::MCA>(i_chip, + i_rank, i_symbol); +} + +template<> +uint32_t clearDramInVpd<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip, + const MemRank & i_rank, + MemSymbol i_symbol ) +{ + return __clearDramInVpd<TARGETING::TYPE_MBA, DIMMS_PER_RANK::MBA>(i_chip, + i_rank, i_symbol); +} + //------------------------------------------------------------------------------ } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H index 390e6d3b7..14ba29d1a 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H @@ -127,6 +127,16 @@ class MemDqBitmap int32_t setDram( const MemSymbol & i_symbol, uint8_t i_pins = 0xff ); /** + * @brief Clears the specified DRAM. + * @note Will adjust for DRAM or ECC spares, if applicable. + * @param i_symbol A symbol on the target DRAM. + * @param i_pins Optional 8-bit (x8 mode) or 4-bit (x4 mode) value of the + * DRAM's pins. The default is to clear all pins. + * @return Non-SUCCESS if an internal function failed, SUCCESS otherwise. + */ + uint32_t clearDram( const MemSymbol & i_symbol, uint8_t i_pins = 0xff ); + + /** * @brief Adds the bitmaps for both ports to the capture data. * @param o_cd Capture data struct. */ @@ -220,6 +230,16 @@ template<TARGETING::TYPE T> uint32_t setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, MemSymbol i_symbol ); +/** + * @brief Clears the inputted dram in DRAM repairs VPD. + * @param i_chip MBA or MCA chip. + * @param i_rank Target rank. + * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise. + */ +template<TARGETING::TYPE T> +uint32_t clearDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank, + MemSymbol i_symbol ); + } // end namespace PRDF #endif // __prdfMemDqBitmap_H diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H b/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H index 836ff3af9..0557ed2a6 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H @@ -47,6 +47,11 @@ PRDR_ERROR_SIGNATURE(MaintRETRY_CTE, 0xffff0016, "", "Maintenance RETRY CTE"); PRDR_ERROR_SIGNATURE(VcmVerified, 0xffff0020, "", "VCM: verified"); PRDR_ERROR_SIGNATURE(VcmFalseAlarm, 0xffff0021, "", "VCM: false alarm"); PRDR_ERROR_SIGNATURE(VcmFalseAlarmTH, 0xffff0022, "", "VCM: false alarm threshold"); +PRDR_ERROR_SIGNATURE(VcmVerSameDram, 0xffff0023, "", "VCM: verified: previous PPR on same DRAM"); +PRDR_ERROR_SIGNATURE(VcmVerDiffDram, 0xffff0024, "", "VCM: verified: previous PPR on different DRAM"); +PRDR_ERROR_SIGNATURE(VcmVerFirstMce, 0xffff0025, "", "VCM: verified: first MCE"); +PRDR_ERROR_SIGNATURE(VcmVerSecMce, 0xffff0026, "", "VCM: verified: second MCE"); +PRDR_ERROR_SIGNATURE(VcmVerRowFail, 0xffff0027, "", "VCM: verified: common row fail"); PRDR_ERROR_SIGNATURE(AllDramRepairs, 0xffff002F, "", "all DRAM repairs used"); PRDR_ERROR_SIGNATURE(RdrInternalFail, 0xffff0040, "", "RDR: Internal failure"); diff --git a/src/usr/diag/prdf/plat/mem/prdfMemVcm.C b/src/usr/diag/prdf/plat/mem/prdfMemVcm.C index e0feeb362..5009b6aa6 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemVcm.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemVcm.C @@ -112,10 +112,169 @@ uint32_t VcmEvent<TYPE_MBA>::rowRepair( STEP_CODE_DATA_STRUCT & io_sc, { #define PRDF_FUNC "[VcmEvent::rowRepair] " + PRDF_ASSERT( iv_rowRepairEnabled ) + uint32_t o_rc = SUCCESS; do { + // get port select + uint8_t l_ps = iv_mark.getSymbol().getPortSlct(); + + // get if the spares are available + bool l_spAvail, l_eccAvail; + o_rc = PlatServices::isSpareAvailable<TYPE_MBA>( iv_chip->getTrgt(), + iv_rank, l_ps, l_spAvail, l_eccAvail ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "isChipMarkOnSpare(0x%08x) failed", + iv_chip->getHuid() ); + break; + } + + // get dimm + TARGETING::TargetHandle_t l_dimm = + PlatServices::getConnectedDimm( iv_chip->getTrgt(), iv_rank, + l_ps ); + + // If scrub stops on first MCE, and static row repair + // not supported or both spare and chip mark used + if ( 1 == iv_mceCount && ( !l_spAvail && !l_eccAvail ) ) + { + // Record bad DQs in VPD - done when verified() + // No need to continue scrubbing, VCM verified, VCM done. + o_done = true; + } + // Else if scrub stops on first MCE and static row repair + // supported + else if ( 1 == iv_mceCount ) + { + MemRowRepair l_rowRepair; + o_rc = getRowRepairData<TYPE_MBA>( l_dimm, iv_rank, l_rowRepair ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getRowRepairData(0x%08x, 0x%02x)", + PlatServices::getHuid(l_dimm), iv_rank.getKey() ); + break; + } + + // If the port, dimm, master rank has previous row repair in VPD + if ( l_rowRepair.isValid() ) + { + // If previous repair for same DRAM + if ( l_rowRepair.getRowRepairDram() == + iv_mark.getSymbol().getDram() ) + { + // Clear previous row repair from VPD + o_rc = clearRowRepairData<TYPE_MBA>( l_dimm, iv_rank ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "clearRowRepairData" + "(0x%08x, 0x%02x) failed", + PlatServices::getHuid(l_dimm), + iv_rank.getKey() ); + break; + } + + // Record bad DQs in VPD - done when verified() + // Signature: "VCM: verified: previous PPR on same DRAM" + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_VcmVerSameDram ); + + // No need to continue scrubbing, VCM verified, VCM done + o_done = true; + } + // Else if previous repair for different DRAM + else + { + // Leave previous row repair in VPD + // Record bad DQs in VPD - done when verified() + // Signature:"VCM: verified: previous PPR on + // different DRAM" + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_VcmVerDiffDram ); + + // No need to continue scrubbing, VCM verified, VCM done + o_done = true; + } + } + // Else if no previous row repair + else + { + // Signature: "VCM: verified: first MCE" + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_VcmVerFirstMce ); + + // Record bad DQs in VPD - done when verified() + // Remember address + MemAddr l_addr; + o_rc = getMemMaintAddr<TYPE_MBA>( iv_chip, + iv_rowRepairFailAddr ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed", + iv_chip->getHuid() ); + break; + } + + // Continue scrub, don't set procedure to done + } + } + // Else if scrub stops on second MCE + else if ( iv_mceCount > 1 ) + { + // Since at least 2 bad rows, don't bother with row repair + // No need to continue scrubbing, VCM verified, VCM done + o_done = true; + + // Signature: "VCM: verified: second MCE" + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_VcmVerSecMce ); + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +template<> +uint32_t VcmEvent<TYPE_MBA>::rowRepairEndRank( STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[VcmEvent::rowRepairEndRank] " + + PRDF_ASSERT( !iv_canResumeScrub ); + PRDF_ASSERT( iv_rowRepairEnabled ); + PRDF_ASSERT( 0 != iv_mceCount ); + + uint32_t o_rc = SUCCESS; + + do + { + // get dimm + uint8_t l_ps = iv_mark.getSymbol().getPortSlct(); + TARGETING::TargetHandle_t l_dimm = + PlatServices::getConnectedDimm( iv_chip->getTrgt(), iv_rank, + l_ps ); + + // If scrub gets to the end of the master rank with an MCE + // Update VPD with row repair + o_rc = setRowRepairData<TYPE_MBA>( l_dimm, iv_rank, + iv_rowRepairFailAddr, iv_mark.getSymbol().getDram() ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "setRowRepairData(0x%08x, 0x%02x) " + "failed", PlatServices::getHuid(l_dimm), + iv_rank.getKey() ); + break; + } + + // Signature: "VCM: verified: common row fail" + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_VcmVerRowFail ); + + // VCM verified, VCM done } while (0); @@ -196,41 +355,64 @@ uint32_t VcmEvent<TYPE_MBA>::handlePhaseComplete( const uint32_t & i_eccAttns, { if ( i_eccAttns & MAINT_MCE ) { - if ( iv_rowRepairEnabled ) + iv_mceCount++; + + // Only need to call verified on the first mce we hit + if ( 1 == iv_mceCount ) { - o_rc = rowRepair( io_sc, o_done ); + o_rc = verified( io_sc ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "rowRepair() failed on 0x%08x", + PRDF_ERR( PRDF_FUNC "verified() failed on 0x%08x", iv_chip->getHuid() ); break; } } - else + + if ( iv_rowRepairEnabled ) { - o_rc = verified( io_sc ); + o_rc = rowRepair( io_sc, o_done ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "verified() failed on 0x%08x", + PRDF_ERR( PRDF_FUNC "rowRepair() failed on 0x%08x", iv_chip->getHuid() ); break; } - + if ( o_done ) break; + } + else + { o_done = true; // Procedure is complete. + break; } } - else if ( !iv_canResumeScrub ) + + if ( !iv_canResumeScrub ) { - // The chip mark is not verified and the command has reached the - // end of the rank. So this is a false alarm. - o_rc = falseAlarm( io_sc ); - if ( SUCCESS != o_rc ) + // If row repair is enabled, we reached the end of the rank, and + // we got an MCE, we need to apply the row repair. + if ( iv_rowRepairEnabled && 0 != iv_mceCount ) { - PRDF_ERR( PRDF_FUNC "falseAlarm() failed on 0x%08x", - iv_chip->getHuid() ); - break; + o_rc = rowRepairEndRank( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "rowRepairEndRank() failed on " + "0x%08x", iv_chip->getHuid() ); + break; + } + } + else + { + // The chip mark is not verified and the command has reached + // the end of the rank. So this is a false alarm. + o_rc = falseAlarm( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "falseAlarm() failed on 0x%08x", + iv_chip->getHuid() ); + break; + } } - o_done = true; // Procedure is complete. } } diff --git a/src/usr/diag/prdf/plat/mem/prdfMemVcm.H b/src/usr/diag/prdf/plat/mem/prdfMemVcm.H index 0bf7cfbb9..92c8d3b54 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemVcm.H +++ b/src/usr/diag/prdf/plat/mem/prdfMemVcm.H @@ -29,9 +29,11 @@ #define __prdfMemVcm_H // Platform includes +#include <prdfErrlUtil.H> #include <prdfMemDbUtils.H> #include <prdfMemEccAnalysis.H> #include <prdfMemMark.H> +#include <prdfMemRowRepair.H> #include <prdfMemScrubUtils.H> #include <prdfMemTdFalseAlarm.H> #include <prdfMemTdQueue.H> @@ -315,11 +317,20 @@ class VcmEvent : public TdEntry */ uint32_t rowRepair( STEP_CODE_DATA_STRUCT & io_sc, bool & o_done ); + /** + * @brief Do extra analysis needed for Row Repair once scrub has reached + * the end of the rank. + * @param io_sc The step code data struct. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + uint32_t rowRepairEndRank( STEP_CODE_DATA_STRUCT & io_sc ); + private: // instance variables const MemMark iv_mark; ///< The chip mark from hardware. - const bool iv_rowRepairEnabled; ///< True if Row Repair is enabled. + uint8_t iv_mceCount = 0; ///< MCEs hit count, currently for Row Repair only. + MemAddr iv_rowRepairFailAddr; ///< Address stored to apply Row Repair on. }; //------------------------------------------------------------------------------ |