summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCaleb Palmer <cnpalmer@us.ibm.com>2018-04-23 08:55:42 -0500
committerZane C. Shelley <zshelle@us.ibm.com>2018-05-18 10:39:40 -0400
commit41a25f11016a7847565e323f42615e460354afa4 (patch)
treec83727db6669e6f9a52bec739db452a90bcf6165
parent1b04e458595a9e9c5c04dd322f90d4c44129e111 (diff)
downloadtalos-hostboot-41a25f11016a7847565e323f42615e460354afa4.tar.gz
talos-hostboot-41a25f11016a7847565e323f42615e460354afa4.zip
PRD: Resume maint cmd support for MBA
Change-Id: I77b56983eba633104f8b15d6b608cb490c5be48d RTC: 191647 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/57918 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Matt Derksen <mderkse1@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/59013 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C11
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H12
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.C140
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.H9
-rw-r--r--src/usr/diag/prdf/plat/prdfPlatServices_rt.C158
5 files changed, 309 insertions, 21 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C
index 5893d6dc9..de44ed87e 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C
@@ -103,6 +103,17 @@ MemAddr MemAddr::fromMaintAddr<TYPE_MBA>( uint64_t i_addr )
return MemAddr( MemRank(mrnk, srnk), bnk, row, col );
}
+template<>
+uint64_t MemAddr::toMaintAddr<TYPE_MBA>() const
+{
+ return ( ((uint64_t) iv_rnk.getMaster() << 60) |
+ ((uint64_t) iv_rnk.getSlave() << 57) |
+ ((uint64_t) iv_bnk << 53) |
+ ((uint64_t)(iv_row & 0x1ffff) << 36) | // r16-r0
+ ((uint64_t) iv_col << 24) |
+ ((uint64_t)(iv_row & 0x20000) << 13) ); // r17
+}
+
//------------------------------------------------------------------------------
// Address Accessor Functions
//------------------------------------------------------------------------------
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H
index 4b1fc07fb..b0b86af04 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H
@@ -87,12 +87,22 @@ class MemAddr
static MemAddr fromReadAddr( uint64_t i_addr );
/**
- * @brief Creates a MemAddr from a maintenance address.
+ * @brief Creates a MemAddr from the current maintenance address.
* @param i_addr 64-bit address.
*/
template<TARGETING::TYPE T>
static MemAddr fromMaintAddr( uint64_t i_addr );
+ /**
+ * @brief Converts internal data structure to a maintenance address.
+ * @return A uint64_t version of the address.
+ * @note Does not include error type. This is because in most cases we
+ * will use this function to write out to hardware and in doing so
+ * we will want to clear the status bits anyway.
+ */
+ template<TARGETING::TYPE T>
+ uint64_t toMaintAddr() const;
+
/** @return This address's rank. */
const MemRank& getRank() const { return iv_rnk; }
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.C b/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.C
index 5b7e72c03..43fd84545 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.C
@@ -386,6 +386,146 @@ uint32_t checkEccFirs<TYPE_MBA>( ExtensibleChip * i_chip,
//------------------------------------------------------------------------------
template<>
+uint32_t conditionallyClearEccCounters<TYPE_MBA>( ExtensibleChip * i_chip )
+{
+ #define PRDF_FUNC "[conditionallyClearEccCounters] "
+
+ PRDF_ASSERT( nullptr != i_chip );
+ PRDF_ASSERT( TYPE_MBA == i_chip->getType() );
+
+ uint32_t o_rc = SUCCESS;
+
+ do
+ {
+ // Check for maintenance ECC errors.
+ uint32_t eccAttns = 0;
+ o_rc = checkEccFirs<TYPE_MBA>( i_chip, eccAttns );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "checkEccFirs<TYPE_MBA>(0x%08x) failed",
+ i_chip->getHuid() );
+ break;
+ }
+
+ ExtensibleChip * membChip = getConnectedParent( i_chip, TYPE_MEMBUF );
+ uint8_t mbaPos = i_chip->getPos();
+
+ const char * ec0Reg_str = (0 == mbaPos) ? "MBA0_MBSEC0" : "MBA1_MBSEC0";
+ SCAN_COMM_REGISTER_CLASS * ec0Reg = membChip->getRegister( ec0Reg_str );
+ o_rc = ec0Reg->Read();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "Read() failed on %s", ec0Reg_str );
+ break;
+ }
+
+ const char * mbstr_str = (0 == mbaPos) ? "MBSTR_0" : "MBSTR_1";
+ SCAN_COMM_REGISTER_CLASS * mbstr = membChip->getRegister( mbstr_str );
+ o_rc = mbstr->Read();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "Read() failed on %s", mbstr_str );
+ break;
+ }
+
+ bool updateEc0 = false;
+ bool clearSymCntrs = false;
+
+ if ( eccAttns & MAINT_SOFT_NCE_ETE )
+ {
+ // Clear Soft CE total count.
+ ec0Reg->SetBitFieldJustified( 0, 12, 0 );
+ updateEc0 = true;
+
+ if ( mbstr->IsBitSet(55) ) clearSymCntrs = true;
+ }
+
+ if ( eccAttns & MAINT_INT_NCE_ETE )
+ {
+ // Clear Intermittent CE total count.
+ ec0Reg->SetBitFieldJustified( 12, 12, 0 );
+ updateEc0 = true;
+
+ if ( mbstr->IsBitSet(56) ) clearSymCntrs = true;
+ }
+
+ if ( eccAttns & MAINT_HARD_NCE_ETE )
+ {
+ // Clear the hard CE total count.
+ ec0Reg->SetBitFieldJustified( 24, 12, 0 );
+ updateEc0 = true;
+
+ if ( mbstr->IsBitSet(57) ) clearSymCntrs = true;
+ }
+
+ if ( updateEc0 )
+ {
+ o_rc = ec0Reg->Write();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "Write() failed on %s", ec0Reg_str );
+ break;
+ }
+ }
+
+ if ( clearSymCntrs )
+ {
+ // Clear all of the per symbol counters. Note that there are a total
+ // of 9 MBSSYMECx registers (MBSSYMEC0-MBSSYMEC8) per MBA.
+ for ( uint8_t i = 0; i < 9; i++ )
+ {
+ char reg_str[20];
+ snprintf( reg_str, 20, "MBA%d_MBSSYMEC%d", mbaPos, i );
+
+ SCAN_COMM_REGISTER_CLASS * reg = membChip->getRegister(reg_str);
+
+ reg->clearAllBits();
+
+ o_rc = reg->Write();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "Write() failed on %s", reg_str );
+ break;
+ }
+ }
+ if ( SUCCESS != o_rc ) break;
+ }
+
+ if ( eccAttns & MAINT_RCE_ETE )
+ {
+ // Clear only the RCE total count.
+ const char * ec1Reg_str =
+ (0 == mbaPos) ? "MBA0_MBSEC1" : "MBA1_MBSEC1";
+ SCAN_COMM_REGISTER_CLASS * ec1Reg =
+ membChip->getRegister( ec1Reg_str );
+
+ o_rc = ec1Reg->Read();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "Read() failed on %s", ec1Reg_str );
+ break;
+ }
+
+ ec1Reg->SetBitFieldJustified( 0, 12, 0 );
+
+ o_rc = ec1Reg->Write();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "Write() failed on %s", ec1Reg_str );
+ break;
+ }
+ }
+
+ } while(0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
+
+template<>
uint32_t setBgScrubThresholds<TYPE_MBA>( ExtensibleChip * i_chip,
const MemRank & i_rank )
{
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.H b/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.H
index 72b52cbcc..bfa4e6a87 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.H
+++ b/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.H
@@ -66,6 +66,15 @@ template<TARGETING::TYPE T>
uint32_t clearEccCounters( ExtensibleChip * i_chip );
/**
+ * @brief Calls checkEccFirs() and clears the maintenance ECC counters based on
+ * the active error types.
+ * @param i_chip MBA.
+ * @return Non-SUCCESS on SCOM failures, SUCCESS otherwise.
+ */
+template<TARGETING::TYPE T>
+uint32_t conditionallyClearEccCounters( ExtensibleChip * i_chip );
+
+/**
* @brief Clears the maintenance ECC FIRs.
* @param i_chip MBA, MCA, or MCBIST.
* @return Non-SUCCESS on SCOM failures, SUCCESS otherwise.
diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C
index 52ca3ef46..d0ad9b2f6 100644
--- a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C
+++ b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C
@@ -34,6 +34,7 @@
// Framework includes
#include <prdfErrlUtil.H>
#include <prdfTrace.H>
+#include <prdfRegisterCache.H>
// Platform includes
#include <prdfCenMbaDataBundle.H>
@@ -215,29 +216,28 @@ uint32_t stopBgScrub<TYPE_MBA>( ExtensibleChip * i_chip )
PRDF_ASSERT( nullptr != i_chip );
PRDF_ASSERT( TYPE_MBA == i_chip->getType() );
- uint32_t rc = SUCCESS;
+ uint32_t o_rc = SUCCESS;
+
+ fapi2::Target<fapi2::TARGET_TYPE_MBA> fapiTrgt ( i_chip->getTrgt() );
+ errlHndl_t errl = nullptr;
- PRDF_ERR( PRDF_FUNC "function not implemented yet" );
-/* TODO RTC 157888
// It is safe to create a dummy command object because runtime commands do
// not store anything for cleanupCmd() and the stopCmd() function is generic
// for all command types. Also, since we are only stopping the command, all
// of the parameters for the command object are junk except for the target.
- ecmdDataBufferBase i_startAddr, i_endAddr;
- mss_TimeBaseScrub cmd { getFapiTarget(i_trgt), i_startAddr, i_endAddr,
+ fapi2::buffer<uint64_t> startAddr, endAddr;
+ mss_TimeBaseScrub cmd { fapiTrgt, startAddr, endAddr,
mss_MaintCmd::FAST_MAX_BW_IMPACT, 0, false };
-
- errlHndl_t errl = fapi::fapiRcToErrl( cmd.stopCmd() );
+ FAPI_INVOKE_HWP( errl, cmd.stopCmd );
if ( nullptr != errl )
{
PRDF_ERR( PRDF_FUNC "mss_TimeBaseScrub::stop(0x%08x) failed",
- getHuid(i_trgt) );
+ i_chip->getHuid() );
PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT );
- rc = FAIL;
+ o_rc = FAIL;
}
-*/
- return rc;
+ return o_rc;
#undef PRDF_FUNC
}
@@ -262,6 +262,14 @@ uint32_t __resumeScrub<TYPE_MBA>( ExtensibleChip * i_chip,
uint32_t o_rc = SUCCESS;
+ // Make sure there is a command complete attention when the command stops.
+ i_stopCond |= mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION;
+
+ // Make sure the command stops immediately on error or on the end address if
+ // there are no errors.
+ i_stopCond |= mss_MaintCmd::STOP_IMMEDIATE;
+ i_stopCond |= mss_MaintCmd::STOP_ON_END_ADDRESS;
+
if ( getMbaDataBundle(i_chip)->iv_scrubResumeCounter.atTh() )
{
// We have resumed scrubbing on this rank too many times. We still want
@@ -277,11 +285,108 @@ uint32_t __resumeScrub<TYPE_MBA>( ExtensibleChip * i_chip,
i_stopCond &= ~mss_MaintCmd::STOP_ON_UE;
}
+ fapi2::Target<fapi2::TARGET_TYPE_MBA> fapiTrgt ( i_chip->getTrgt() );
+ errlHndl_t errl = nullptr;
+
do
{
- // TODO: Clear ECC counters/FIRs. Increment the current address. Clear
- // FIRs again. Start the command from the current address to the
- // end of the rank.
+ // Manually clear the CE counters based on the error type and clear the
+ // maintenance FIRs. Note that we only want to clear counters that are
+ // at attention to allow the other CE types the opportunity to reach
+ // threshold, if possible.
+ o_rc = conditionallyClearEccCounters<TYPE_MBA>( i_chip );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "conditionallyClearEccCounters(0x%08x) failed",
+ i_chip->getHuid() );
+ break;
+ }
+
+ o_rc = clearEccFirs<TYPE_MBA>( i_chip );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "clearEccFirs(0x%08x) failed",
+ i_chip->getHuid() );
+ break;
+ }
+
+ o_rc = clearCmdCompleteAttn<TYPE_MBA>( i_chip );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "clearCmdCompleteAttn(0x%08x) failed",
+ i_chip->getHuid() );
+ break;
+ }
+
+ // Increment the current maintenance address.
+ mss_IncrementAddress incCmd { fapiTrgt };
+ FAPI_INVOKE_HWP( errl, incCmd.setupAndExecuteCmd );
+ if ( nullptr != errl )
+ {
+ PRDF_ERR( PRDF_FUNC "mss_IncrementAddress setupAndExecuteCmd() on "
+ "0x%08x failed", i_chip->getHuid() );
+ PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT );
+ o_rc = FAIL;
+ break;
+ }
+
+ // Clear the maintenance FIRs again. This time do not clear the CE
+ // counters.
+ o_rc = clearEccFirs<TYPE_MBA>( i_chip );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "clearEccFirs(0x%08x) failed",
+ i_chip->getHuid() );
+ break;
+ }
+
+ o_rc = clearCmdCompleteAttn<TYPE_MBA>( i_chip );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "clearCmdCompleteAttn(0x%08x) failed",
+ i_chip->getHuid() );
+ break;
+ }
+
+ // The address register has been updated so we need to clear our cache
+ // to ensure we can do a new read.
+ SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister( "MBMACA" );
+ RegDataCache::getCachedRegisters().flush( i_chip, reg );
+
+ // Read the new start address from hardware.
+ MemAddr addr;
+ o_rc = getMemMaintAddr<TYPE_MBA>( i_chip, addr );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed",
+ i_chip->getHuid() );
+ break;
+ }
+ fapi2::buffer<uint64_t> saddr = addr.toMaintAddr<TYPE_MBA>();
+
+ // Get the end address of the current rank.
+ fapi2::buffer<uint64_t> eaddr, junk;
+ MemRank rank = addr.getRank();
+ o_rc = getMemAddrRange<TYPE_MBA>( i_chip, rank, junk, eaddr,
+ i_rangeType );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "getMemAddrRange(0x%08x,0x%2x) failed",
+ i_chip->getHuid(), rank.getKey() );
+ break;
+ }
+
+ // Resume the scrub command.
+ mss_TimeBaseScrub scrubCmd { fapiTrgt, saddr, eaddr, i_cmdSpeed,
+ i_stopCond, false };
+ FAPI_INVOKE_HWP( errl, scrubCmd.setupAndExecuteCmd );
+ if ( nullptr != errl )
+ {
+ PRDF_ERR( PRDF_FUNC "setupAndExecuteCmd() on 0x%08x,0x%02x failed",
+ i_chip->getHuid(), rank.getKey() );
+ PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT );
+ o_rc = FAIL; break;
+ }
// Resume successful. So increment the resume counter.
getMbaDataBundle(i_chip)->iv_scrubResumeCounter.inc();
@@ -301,10 +406,22 @@ uint32_t resumeBgScrub<TYPE_MBA>( ExtensibleChip * i_chip )
PRDF_ASSERT( nullptr != i_chip );
PRDF_ASSERT( TYPE_MBA == i_chip->getType() );
- /* TODO:
+ uint32_t stopCond = mss_MaintCmd::STOP_ON_HARD_NCE_ETE |
+ mss_MaintCmd::STOP_ON_INT_NCE_ETE |
+ mss_MaintCmd::STOP_ON_SOFT_NCE_ETE |
+ mss_MaintCmd::STOP_ON_RETRY_CE_ETE |
+ mss_MaintCmd::STOP_ON_MPE |
+ mss_MaintCmd::STOP_ON_UE;
+
+ mss_MaintCmd::TimeBaseSpeed cmdSpeed = enableFastBgScrub()
+ ? mss_MaintCmd::FAST_MED_BW_IMPACT
+ : mss_MaintCmd::BG_SCRUB;
+
+ // Because of the Centaur workarounds, we have to limit the number of times
+ // a command has been resumed on a rank. Therefore, we must always resume
+ // the command to the end of the current slave rank.
+
return __resumeScrub<TYPE_MBA>( i_chip, SLAVE_RANK, stopCond, cmdSpeed );
- */
- return SUCCESS;
}
//------------------------------------------------------------------------------
@@ -317,10 +434,11 @@ uint32_t resumeTdScrub<TYPE_MBA>( ExtensibleChip * i_chip,
PRDF_ASSERT( nullptr != i_chip );
PRDF_ASSERT( TYPE_MBA == i_chip->getType() );
- /* TODO:
+ mss_MaintCmd::TimeBaseSpeed cmdSpeed = enableFastBgScrub()
+ ? mss_MaintCmd::FAST_MAX_BW_IMPACT
+ : mss_MaintCmd::FAST_MIN_BW_IMPACT;
+
return __resumeScrub<TYPE_MBA>( i_chip, i_rangeType, i_stopCond, cmdSpeed );
- */
- return SUCCESS;
}
//##############################################################################
OpenPOWER on IntegriCloud