summaryrefslogtreecommitdiffstats
path: root/src/usr/diag/prdf/plat/mem/prdfMemVcm.C
diff options
context:
space:
mode:
authorCaleb Palmer <cnpalmer@us.ibm.com>2018-07-11 08:29:41 -0500
committerZane C. Shelley <zshelle@us.ibm.com>2018-08-23 10:19:18 -0500
commit4dee8a0a654536130b4886c114ba7fe1d23bcee0 (patch)
tree669d5607d6fbe320df1f2383d63b041b66829f08 /src/usr/diag/prdf/plat/mem/prdfMemVcm.C
parent0a6c8e400c83c63057cede1446a5e63ba35606e3 (diff)
downloadtalos-hostboot-4dee8a0a654536130b4886c114ba7fe1d23bcee0.tar.gz
talos-hostboot-4dee8a0a654536130b4886c114ba7fe1d23bcee0.zip
PRD: Row Repair VCM Updates
Change-Id: I809cdc7fc2a010c5cb421bf5410d43f9bb690fee RTC: 196073 Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/63375 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/64982 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr/diag/prdf/plat/mem/prdfMemVcm.C')
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemVcm.C214
1 files changed, 198 insertions, 16 deletions
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemVcm.C b/src/usr/diag/prdf/plat/mem/prdfMemVcm.C
index e0feeb362..5009b6aa6 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemVcm.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemVcm.C
@@ -112,10 +112,169 @@ uint32_t VcmEvent<TYPE_MBA>::rowRepair( STEP_CODE_DATA_STRUCT & io_sc,
{
#define PRDF_FUNC "[VcmEvent::rowRepair] "
+ PRDF_ASSERT( iv_rowRepairEnabled )
+
uint32_t o_rc = SUCCESS;
do
{
+ // get port select
+ uint8_t l_ps = iv_mark.getSymbol().getPortSlct();
+
+ // get if the spares are available
+ bool l_spAvail, l_eccAvail;
+ o_rc = PlatServices::isSpareAvailable<TYPE_MBA>( iv_chip->getTrgt(),
+ iv_rank, l_ps, l_spAvail, l_eccAvail );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "isChipMarkOnSpare(0x%08x) failed",
+ iv_chip->getHuid() );
+ break;
+ }
+
+ // get dimm
+ TARGETING::TargetHandle_t l_dimm =
+ PlatServices::getConnectedDimm( iv_chip->getTrgt(), iv_rank,
+ l_ps );
+
+ // If scrub stops on first MCE, and static row repair
+ // not supported or both spare and chip mark used
+ if ( 1 == iv_mceCount && ( !l_spAvail && !l_eccAvail ) )
+ {
+ // Record bad DQs in VPD - done when verified()
+ // No need to continue scrubbing, VCM verified, VCM done.
+ o_done = true;
+ }
+ // Else if scrub stops on first MCE and static row repair
+ // supported
+ else if ( 1 == iv_mceCount )
+ {
+ MemRowRepair l_rowRepair;
+ o_rc = getRowRepairData<TYPE_MBA>( l_dimm, iv_rank, l_rowRepair );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "getRowRepairData(0x%08x, 0x%02x)",
+ PlatServices::getHuid(l_dimm), iv_rank.getKey() );
+ break;
+ }
+
+ // If the port, dimm, master rank has previous row repair in VPD
+ if ( l_rowRepair.isValid() )
+ {
+ // If previous repair for same DRAM
+ if ( l_rowRepair.getRowRepairDram() ==
+ iv_mark.getSymbol().getDram() )
+ {
+ // Clear previous row repair from VPD
+ o_rc = clearRowRepairData<TYPE_MBA>( l_dimm, iv_rank );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "clearRowRepairData"
+ "(0x%08x, 0x%02x) failed",
+ PlatServices::getHuid(l_dimm),
+ iv_rank.getKey() );
+ break;
+ }
+
+ // Record bad DQs in VPD - done when verified()
+ // Signature: "VCM: verified: previous PPR on same DRAM"
+ io_sc.service_data->setSignature( iv_chip->getHuid(),
+ PRDFSIG_VcmVerSameDram );
+
+ // No need to continue scrubbing, VCM verified, VCM done
+ o_done = true;
+ }
+ // Else if previous repair for different DRAM
+ else
+ {
+ // Leave previous row repair in VPD
+ // Record bad DQs in VPD - done when verified()
+ // Signature:"VCM: verified: previous PPR on
+ // different DRAM"
+ io_sc.service_data->setSignature( iv_chip->getHuid(),
+ PRDFSIG_VcmVerDiffDram );
+
+ // No need to continue scrubbing, VCM verified, VCM done
+ o_done = true;
+ }
+ }
+ // Else if no previous row repair
+ else
+ {
+ // Signature: "VCM: verified: first MCE"
+ io_sc.service_data->setSignature( iv_chip->getHuid(),
+ PRDFSIG_VcmVerFirstMce );
+
+ // Record bad DQs in VPD - done when verified()
+ // Remember address
+ MemAddr l_addr;
+ o_rc = getMemMaintAddr<TYPE_MBA>( iv_chip,
+ iv_rowRepairFailAddr );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed",
+ iv_chip->getHuid() );
+ break;
+ }
+
+ // Continue scrub, don't set procedure to done
+ }
+ }
+ // Else if scrub stops on second MCE
+ else if ( iv_mceCount > 1 )
+ {
+ // Since at least 2 bad rows, don't bother with row repair
+ // No need to continue scrubbing, VCM verified, VCM done
+ o_done = true;
+
+ // Signature: "VCM: verified: second MCE"
+ io_sc.service_data->setSignature( iv_chip->getHuid(),
+ PRDFSIG_VcmVerSecMce );
+ }
+
+ } while (0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+template<>
+uint32_t VcmEvent<TYPE_MBA>::rowRepairEndRank( STEP_CODE_DATA_STRUCT & io_sc )
+{
+ #define PRDF_FUNC "[VcmEvent::rowRepairEndRank] "
+
+ PRDF_ASSERT( !iv_canResumeScrub );
+ PRDF_ASSERT( iv_rowRepairEnabled );
+ PRDF_ASSERT( 0 != iv_mceCount );
+
+ uint32_t o_rc = SUCCESS;
+
+ do
+ {
+ // get dimm
+ uint8_t l_ps = iv_mark.getSymbol().getPortSlct();
+ TARGETING::TargetHandle_t l_dimm =
+ PlatServices::getConnectedDimm( iv_chip->getTrgt(), iv_rank,
+ l_ps );
+
+ // If scrub gets to the end of the master rank with an MCE
+ // Update VPD with row repair
+ o_rc = setRowRepairData<TYPE_MBA>( l_dimm, iv_rank,
+ iv_rowRepairFailAddr, iv_mark.getSymbol().getDram() );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "setRowRepairData(0x%08x, 0x%02x) "
+ "failed", PlatServices::getHuid(l_dimm),
+ iv_rank.getKey() );
+ break;
+ }
+
+ // Signature: "VCM: verified: common row fail"
+ io_sc.service_data->setSignature( iv_chip->getHuid(),
+ PRDFSIG_VcmVerRowFail );
+
+ // VCM verified, VCM done
} while (0);
@@ -196,41 +355,64 @@ uint32_t VcmEvent<TYPE_MBA>::handlePhaseComplete( const uint32_t & i_eccAttns,
{
if ( i_eccAttns & MAINT_MCE )
{
- if ( iv_rowRepairEnabled )
+ iv_mceCount++;
+
+ // Only need to call verified on the first mce we hit
+ if ( 1 == iv_mceCount )
{
- o_rc = rowRepair( io_sc, o_done );
+ o_rc = verified( io_sc );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "rowRepair() failed on 0x%08x",
+ PRDF_ERR( PRDF_FUNC "verified() failed on 0x%08x",
iv_chip->getHuid() );
break;
}
}
- else
+
+ if ( iv_rowRepairEnabled )
{
- o_rc = verified( io_sc );
+ o_rc = rowRepair( io_sc, o_done );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "verified() failed on 0x%08x",
+ PRDF_ERR( PRDF_FUNC "rowRepair() failed on 0x%08x",
iv_chip->getHuid() );
break;
}
-
+ if ( o_done ) break;
+ }
+ else
+ {
o_done = true; // Procedure is complete.
+ break;
}
}
- else if ( !iv_canResumeScrub )
+
+ if ( !iv_canResumeScrub )
{
- // The chip mark is not verified and the command has reached the
- // end of the rank. So this is a false alarm.
- o_rc = falseAlarm( io_sc );
- if ( SUCCESS != o_rc )
+ // If row repair is enabled, we reached the end of the rank, and
+ // we got an MCE, we need to apply the row repair.
+ if ( iv_rowRepairEnabled && 0 != iv_mceCount )
{
- PRDF_ERR( PRDF_FUNC "falseAlarm() failed on 0x%08x",
- iv_chip->getHuid() );
- break;
+ o_rc = rowRepairEndRank( io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "rowRepairEndRank() failed on "
+ "0x%08x", iv_chip->getHuid() );
+ break;
+ }
+ }
+ else
+ {
+ // The chip mark is not verified and the command has reached
+ // the end of the rank. So this is a false alarm.
+ o_rc = falseAlarm( io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "falseAlarm() failed on 0x%08x",
+ iv_chip->getHuid() );
+ break;
+ }
}
-
o_done = true; // Procedure is complete.
}
}
OpenPOWER on IntegriCloud