summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C126
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H20
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H5
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemVcm.C214
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemVcm.H13
5 files changed, 334 insertions, 44 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C
index 4efb13f4e..20c3eca3e 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.C
@@ -212,6 +212,45 @@ int32_t MemDqBitmap<T>::setDram( const MemSymbol & i_symbol, uint8_t i_pins )
//------------------------------------------------------------------------------
template <DIMMS_PER_RANK T>
+uint32_t MemDqBitmap<T>::clearDram( const MemSymbol & i_symbol, uint8_t i_pins )
+{
+ #define PRDF_FUNC "[MemDqBitmap::clearDram] "
+
+ int32_t o_rc = SUCCESS;
+
+ do
+ {
+ uint8_t portSlct, byteIdx, bitIdx;
+ o_rc = getPortByteBitIdx( i_symbol, portSlct, byteIdx, bitIdx );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "getPortByteBitIdx() failed" );
+ break;
+ }
+
+ if ( iv_x4Dram )
+ {
+ i_pins &= 0xf; // limit to 4 bits
+ uint32_t shift = (DQS_PER_BYTE-1) - bitIdx;
+ shift = (shift / DQS_PER_NIBBLE) * DQS_PER_NIBBLE; // 0,4
+ iv_data[portSlct][byteIdx] &= ~(i_pins << shift);
+ }
+ else
+ {
+ i_pins &= 0xff; // limit to 8 bits
+ iv_data[portSlct][byteIdx] &= ~(i_pins);
+ }
+
+ } while (0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
+
+template <DIMMS_PER_RANK T>
void MemDqBitmap<T>::getCaptureData( CaptureData & o_cd ) const
{
uint8_t rank = iv_rank.getMaster();
@@ -654,26 +693,24 @@ template class MemDqBitmap<DIMMS_PER_RANK::MBA>;
// Utility Functions
//##############################################################################
-template<>
-uint32_t setDramInVpd<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip,
- const MemRank & i_rank,
- MemSymbol i_symbol )
+template<TARGETING::TYPE T, DIMMS_PER_RANK D>
+uint32_t __setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank,
+ MemSymbol i_symbol )
{
- #define PRDF_FUNC "[MemDqBitmap::setDramInVpd] "
+ #define PRDF_FUNC "[MemDqBitmap::__setDramInVpd] "
uint32_t o_rc = SUCCESS;
do
{
+ TARGETING::TargetHandle_t trgt = i_chip->getTrgt();
- TARGETING::TargetHandle_t mcaTrgt = i_chip->getTrgt();
-
- MemDqBitmap<DIMMS_PER_RANK::MCA> dqBitmap;
- o_rc = getBadDqBitmap<DIMMS_PER_RANK::MCA>( mcaTrgt, i_rank, dqBitmap );
+ MemDqBitmap<D> dqBitmap;
+ o_rc = getBadDqBitmap<D>( trgt, i_rank, dqBitmap );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "getBadDqBitmap<DIMMS_PER_RANK::MCA>(0x%08x, "
- "0x%02x) failed.", getHuid(mcaTrgt), i_rank.getKey() );
+ PRDF_ERR( PRDF_FUNC "getBadDqBitmap(0x%08x, 0x%02x) failed.",
+ getHuid(trgt), i_rank.getKey() );
break;
}
@@ -684,11 +721,11 @@ uint32_t setDramInVpd<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip,
break;
}
- o_rc = setBadDqBitmap<DIMMS_PER_RANK::MCA>( mcaTrgt, i_rank, dqBitmap );
+ o_rc = setBadDqBitmap<D>( trgt, i_rank, dqBitmap );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "setBadDqBitmap<DIMMS_PER_RANK::MCA>(0x%08x, "
- "0x%02x) failed.", getHuid(mcaTrgt), i_rank.getKey() );
+ PRDF_ERR( PRDF_FUNC "setBadDqBitmap(0x%08x, 0x%02x) failed.",
+ getHuid(trgt), i_rank.getKey() );
break;
}
}while(0);
@@ -698,43 +735,60 @@ uint32_t setDramInVpd<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip,
#undef PRDF_FUNC
}
-//------------------------------------------------------------------------------
template<>
uint32_t setDramInVpd<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip,
const MemRank & i_rank,
MemSymbol i_symbol )
{
- #define PRDF_FUNC "[MemDqBitmap::setDramInVpd] "
+ return __setDramInVpd<TARGETING::TYPE_MBA, DIMMS_PER_RANK::MBA>(i_chip,
+ i_rank, i_symbol);
+}
+
+template<>
+uint32_t setDramInVpd<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip,
+ const MemRank & i_rank,
+ MemSymbol i_symbol )
+{
+ return __setDramInVpd<TARGETING::TYPE_MCA, DIMMS_PER_RANK::MCA>(i_chip,
+ i_rank, i_symbol);
+}
+
+//------------------------------------------------------------------------------
+
+template<TARGETING::TYPE T, DIMMS_PER_RANK D>
+uint32_t __clearDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank,
+ MemSymbol i_symbol )
+{
+ #define PRDF_FUNC "[MemDqBitmap::__clearDramInVpd] "
uint32_t o_rc = SUCCESS;
do
{
+ TARGETING::TargetHandle_t trgt = i_chip->getTrgt();
- TARGETING::TargetHandle_t mbaTrgt = i_chip->getTrgt();
-
- MemDqBitmap<DIMMS_PER_RANK::MBA> dqBitmap;
- o_rc = getBadDqBitmap<DIMMS_PER_RANK::MBA>( mbaTrgt, i_rank, dqBitmap );
+ MemDqBitmap<D> dqBitmap;
+ o_rc = getBadDqBitmap<D>( trgt, i_rank, dqBitmap );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "getBadDqBitmap<DIMMS_PER_RANK::MBA>(0x%08x, "
- "0x%02x) failed.", getHuid(mbaTrgt), i_rank.getKey() );
+ PRDF_ERR( PRDF_FUNC "getBadDqBitmap(0x%08x, 0x%02x) failed.",
+ getHuid(trgt), i_rank.getKey() );
break;
}
- o_rc = dqBitmap.setDram( i_symbol );
+ o_rc = dqBitmap.clearDram( i_symbol );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "setDram() failed." );
+ PRDF_ERR( PRDF_FUNC "clearDram() failed." );
break;
}
- o_rc = setBadDqBitmap<DIMMS_PER_RANK::MBA>( mbaTrgt, i_rank, dqBitmap );
+ o_rc = setBadDqBitmap<D>( trgt, i_rank, dqBitmap );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "setBadDqBitmap<DIMMS_PER_RANK::MBA>(0x%08x, "
- "0x%02x) failed.", getHuid(mbaTrgt), i_rank.getKey() );
+ PRDF_ERR( PRDF_FUNC "setBadDqBitmap(0x%08x, 0x%02x) failed.",
+ getHuid(trgt), i_rank.getKey() );
break;
}
}while(0);
@@ -744,6 +798,24 @@ uint32_t setDramInVpd<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip,
#undef PRDF_FUNC
}
+template<>
+uint32_t clearDramInVpd<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip,
+ const MemRank & i_rank,
+ MemSymbol i_symbol )
+{
+ return __clearDramInVpd<TARGETING::TYPE_MCA, DIMMS_PER_RANK::MCA>(i_chip,
+ i_rank, i_symbol);
+}
+
+template<>
+uint32_t clearDramInVpd<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip,
+ const MemRank & i_rank,
+ MemSymbol i_symbol )
+{
+ return __clearDramInVpd<TARGETING::TYPE_MBA, DIMMS_PER_RANK::MBA>(i_chip,
+ i_rank, i_symbol);
+}
+
//------------------------------------------------------------------------------
} // end namespace PRDF
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H
index 390e6d3b7..14ba29d1a 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemDqBitmap.H
@@ -127,6 +127,16 @@ class MemDqBitmap
int32_t setDram( const MemSymbol & i_symbol, uint8_t i_pins = 0xff );
/**
+ * @brief Clears the specified DRAM.
+ * @note Will adjust for DRAM or ECC spares, if applicable.
+ * @param i_symbol A symbol on the target DRAM.
+ * @param i_pins Optional 8-bit (x8 mode) or 4-bit (x4 mode) value of the
+ * DRAM's pins. The default is to clear all pins.
+ * @return Non-SUCCESS if an internal function failed, SUCCESS otherwise.
+ */
+ uint32_t clearDram( const MemSymbol & i_symbol, uint8_t i_pins = 0xff );
+
+ /**
* @brief Adds the bitmaps for both ports to the capture data.
* @param o_cd Capture data struct.
*/
@@ -220,6 +230,16 @@ template<TARGETING::TYPE T>
uint32_t setDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank,
MemSymbol i_symbol );
+/**
+ * @brief Clears the inputted dram in DRAM repairs VPD.
+ * @param i_chip MBA or MCA chip.
+ * @param i_rank Target rank.
+ * @return Non-SUCCESS if an internal function fails. SUCCESS otherwise.
+ */
+template<TARGETING::TYPE T>
+uint32_t clearDramInVpd( ExtensibleChip * i_chip, const MemRank & i_rank,
+ MemSymbol i_symbol );
+
} // end namespace PRDF
#endif // __prdfMemDqBitmap_H
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H b/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H
index 836ff3af9..0557ed2a6 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H
@@ -47,6 +47,11 @@ PRDR_ERROR_SIGNATURE(MaintRETRY_CTE, 0xffff0016, "", "Maintenance RETRY CTE");
PRDR_ERROR_SIGNATURE(VcmVerified, 0xffff0020, "", "VCM: verified");
PRDR_ERROR_SIGNATURE(VcmFalseAlarm, 0xffff0021, "", "VCM: false alarm");
PRDR_ERROR_SIGNATURE(VcmFalseAlarmTH, 0xffff0022, "", "VCM: false alarm threshold");
+PRDR_ERROR_SIGNATURE(VcmVerSameDram, 0xffff0023, "", "VCM: verified: previous PPR on same DRAM");
+PRDR_ERROR_SIGNATURE(VcmVerDiffDram, 0xffff0024, "", "VCM: verified: previous PPR on different DRAM");
+PRDR_ERROR_SIGNATURE(VcmVerFirstMce, 0xffff0025, "", "VCM: verified: first MCE");
+PRDR_ERROR_SIGNATURE(VcmVerSecMce, 0xffff0026, "", "VCM: verified: second MCE");
+PRDR_ERROR_SIGNATURE(VcmVerRowFail, 0xffff0027, "", "VCM: verified: common row fail");
PRDR_ERROR_SIGNATURE(AllDramRepairs, 0xffff002F, "", "all DRAM repairs used");
PRDR_ERROR_SIGNATURE(RdrInternalFail, 0xffff0040, "", "RDR: Internal failure");
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemVcm.C b/src/usr/diag/prdf/plat/mem/prdfMemVcm.C
index e0feeb362..5009b6aa6 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemVcm.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemVcm.C
@@ -112,10 +112,169 @@ uint32_t VcmEvent<TYPE_MBA>::rowRepair( STEP_CODE_DATA_STRUCT & io_sc,
{
#define PRDF_FUNC "[VcmEvent::rowRepair] "
+ PRDF_ASSERT( iv_rowRepairEnabled )
+
uint32_t o_rc = SUCCESS;
do
{
+ // get port select
+ uint8_t l_ps = iv_mark.getSymbol().getPortSlct();
+
+ // get if the spares are available
+ bool l_spAvail, l_eccAvail;
+ o_rc = PlatServices::isSpareAvailable<TYPE_MBA>( iv_chip->getTrgt(),
+ iv_rank, l_ps, l_spAvail, l_eccAvail );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "isChipMarkOnSpare(0x%08x) failed",
+ iv_chip->getHuid() );
+ break;
+ }
+
+ // get dimm
+ TARGETING::TargetHandle_t l_dimm =
+ PlatServices::getConnectedDimm( iv_chip->getTrgt(), iv_rank,
+ l_ps );
+
+ // If scrub stops on first MCE, and static row repair
+ // not supported or both spare and chip mark used
+ if ( 1 == iv_mceCount && ( !l_spAvail && !l_eccAvail ) )
+ {
+ // Record bad DQs in VPD - done when verified()
+ // No need to continue scrubbing, VCM verified, VCM done.
+ o_done = true;
+ }
+ // Else if scrub stops on first MCE and static row repair
+ // supported
+ else if ( 1 == iv_mceCount )
+ {
+ MemRowRepair l_rowRepair;
+ o_rc = getRowRepairData<TYPE_MBA>( l_dimm, iv_rank, l_rowRepair );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "getRowRepairData(0x%08x, 0x%02x)",
+ PlatServices::getHuid(l_dimm), iv_rank.getKey() );
+ break;
+ }
+
+ // If the port, dimm, master rank has previous row repair in VPD
+ if ( l_rowRepair.isValid() )
+ {
+ // If previous repair for same DRAM
+ if ( l_rowRepair.getRowRepairDram() ==
+ iv_mark.getSymbol().getDram() )
+ {
+ // Clear previous row repair from VPD
+ o_rc = clearRowRepairData<TYPE_MBA>( l_dimm, iv_rank );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "clearRowRepairData"
+ "(0x%08x, 0x%02x) failed",
+ PlatServices::getHuid(l_dimm),
+ iv_rank.getKey() );
+ break;
+ }
+
+ // Record bad DQs in VPD - done when verified()
+ // Signature: "VCM: verified: previous PPR on same DRAM"
+ io_sc.service_data->setSignature( iv_chip->getHuid(),
+ PRDFSIG_VcmVerSameDram );
+
+ // No need to continue scrubbing, VCM verified, VCM done
+ o_done = true;
+ }
+ // Else if previous repair for different DRAM
+ else
+ {
+ // Leave previous row repair in VPD
+ // Record bad DQs in VPD - done when verified()
+ // Signature:"VCM: verified: previous PPR on
+ // different DRAM"
+ io_sc.service_data->setSignature( iv_chip->getHuid(),
+ PRDFSIG_VcmVerDiffDram );
+
+ // No need to continue scrubbing, VCM verified, VCM done
+ o_done = true;
+ }
+ }
+ // Else if no previous row repair
+ else
+ {
+ // Signature: "VCM: verified: first MCE"
+ io_sc.service_data->setSignature( iv_chip->getHuid(),
+ PRDFSIG_VcmVerFirstMce );
+
+ // Record bad DQs in VPD - done when verified()
+ // Remember address
+ MemAddr l_addr;
+ o_rc = getMemMaintAddr<TYPE_MBA>( iv_chip,
+ iv_rowRepairFailAddr );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed",
+ iv_chip->getHuid() );
+ break;
+ }
+
+ // Continue scrub, don't set procedure to done
+ }
+ }
+ // Else if scrub stops on second MCE
+ else if ( iv_mceCount > 1 )
+ {
+ // Since at least 2 bad rows, don't bother with row repair
+ // No need to continue scrubbing, VCM verified, VCM done
+ o_done = true;
+
+ // Signature: "VCM: verified: second MCE"
+ io_sc.service_data->setSignature( iv_chip->getHuid(),
+ PRDFSIG_VcmVerSecMce );
+ }
+
+ } while (0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+template<>
+uint32_t VcmEvent<TYPE_MBA>::rowRepairEndRank( STEP_CODE_DATA_STRUCT & io_sc )
+{
+ #define PRDF_FUNC "[VcmEvent::rowRepairEndRank] "
+
+ PRDF_ASSERT( !iv_canResumeScrub );
+ PRDF_ASSERT( iv_rowRepairEnabled );
+ PRDF_ASSERT( 0 != iv_mceCount );
+
+ uint32_t o_rc = SUCCESS;
+
+ do
+ {
+ // get dimm
+ uint8_t l_ps = iv_mark.getSymbol().getPortSlct();
+ TARGETING::TargetHandle_t l_dimm =
+ PlatServices::getConnectedDimm( iv_chip->getTrgt(), iv_rank,
+ l_ps );
+
+ // If scrub gets to the end of the master rank with an MCE
+ // Update VPD with row repair
+ o_rc = setRowRepairData<TYPE_MBA>( l_dimm, iv_rank,
+ iv_rowRepairFailAddr, iv_mark.getSymbol().getDram() );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "setRowRepairData(0x%08x, 0x%02x) "
+ "failed", PlatServices::getHuid(l_dimm),
+ iv_rank.getKey() );
+ break;
+ }
+
+ // Signature: "VCM: verified: common row fail"
+ io_sc.service_data->setSignature( iv_chip->getHuid(),
+ PRDFSIG_VcmVerRowFail );
+
+ // VCM verified, VCM done
} while (0);
@@ -196,41 +355,64 @@ uint32_t VcmEvent<TYPE_MBA>::handlePhaseComplete( const uint32_t & i_eccAttns,
{
if ( i_eccAttns & MAINT_MCE )
{
- if ( iv_rowRepairEnabled )
+ iv_mceCount++;
+
+ // Only need to call verified on the first mce we hit
+ if ( 1 == iv_mceCount )
{
- o_rc = rowRepair( io_sc, o_done );
+ o_rc = verified( io_sc );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "rowRepair() failed on 0x%08x",
+ PRDF_ERR( PRDF_FUNC "verified() failed on 0x%08x",
iv_chip->getHuid() );
break;
}
}
- else
+
+ if ( iv_rowRepairEnabled )
{
- o_rc = verified( io_sc );
+ o_rc = rowRepair( io_sc, o_done );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "verified() failed on 0x%08x",
+ PRDF_ERR( PRDF_FUNC "rowRepair() failed on 0x%08x",
iv_chip->getHuid() );
break;
}
-
+ if ( o_done ) break;
+ }
+ else
+ {
o_done = true; // Procedure is complete.
+ break;
}
}
- else if ( !iv_canResumeScrub )
+
+ if ( !iv_canResumeScrub )
{
- // The chip mark is not verified and the command has reached the
- // end of the rank. So this is a false alarm.
- o_rc = falseAlarm( io_sc );
- if ( SUCCESS != o_rc )
+ // If row repair is enabled, we reached the end of the rank, and
+ // we got an MCE, we need to apply the row repair.
+ if ( iv_rowRepairEnabled && 0 != iv_mceCount )
{
- PRDF_ERR( PRDF_FUNC "falseAlarm() failed on 0x%08x",
- iv_chip->getHuid() );
- break;
+ o_rc = rowRepairEndRank( io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "rowRepairEndRank() failed on "
+ "0x%08x", iv_chip->getHuid() );
+ break;
+ }
+ }
+ else
+ {
+ // The chip mark is not verified and the command has reached
+ // the end of the rank. So this is a false alarm.
+ o_rc = falseAlarm( io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "falseAlarm() failed on 0x%08x",
+ iv_chip->getHuid() );
+ break;
+ }
}
-
o_done = true; // Procedure is complete.
}
}
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemVcm.H b/src/usr/diag/prdf/plat/mem/prdfMemVcm.H
index 0bf7cfbb9..92c8d3b54 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemVcm.H
+++ b/src/usr/diag/prdf/plat/mem/prdfMemVcm.H
@@ -29,9 +29,11 @@
#define __prdfMemVcm_H
// Platform includes
+#include <prdfErrlUtil.H>
#include <prdfMemDbUtils.H>
#include <prdfMemEccAnalysis.H>
#include <prdfMemMark.H>
+#include <prdfMemRowRepair.H>
#include <prdfMemScrubUtils.H>
#include <prdfMemTdFalseAlarm.H>
#include <prdfMemTdQueue.H>
@@ -315,11 +317,20 @@ class VcmEvent : public TdEntry
*/
uint32_t rowRepair( STEP_CODE_DATA_STRUCT & io_sc, bool & o_done );
+ /**
+ * @brief Do extra analysis needed for Row Repair once scrub has reached
+ * the end of the rank.
+ * @param io_sc The step code data struct.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
+ */
+ uint32_t rowRepairEndRank( STEP_CODE_DATA_STRUCT & io_sc );
+
private: // instance variables
const MemMark iv_mark; ///< The chip mark from hardware.
-
const bool iv_rowRepairEnabled; ///< True if Row Repair is enabled.
+ uint8_t iv_mceCount = 0; ///< MCEs hit count, currently for Row Repair only.
+ MemAddr iv_rowRepairFailAddr; ///< Address stored to apply Row Repair on.
};
//------------------------------------------------------------------------------
OpenPOWER on IntegriCloud