summaryrefslogtreecommitdiffstats
path: root/src/usr
diff options
context:
space:
mode:
authorZane Shelley <zshelle@us.ibm.com>2017-05-05 22:05:06 -0500
committerZane C. Shelley <zshelle@us.ibm.com>2017-05-19 10:54:04 -0400
commita12b4ce0769e07495726733c6d55a90358cf86bd (patch)
treef127e5dfa0f87f2afc3508d408a79e1f1c4f544c /src/usr
parent513e460747a3275fcbfd5deb585bfb2836f8fbc9 (diff)
downloadblackbird-hostboot-a12b4ce0769e07495726733c6d55a90358cf86bd.tar.gz
blackbird-hostboot-a12b4ce0769e07495726733c6d55a90358cf86bd.zip
PRD: generic function for IUE attention handling
Change-Id: I0ed418f3934aaceee0e3949ad91af45879f9004d RTC: 173944 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/40423 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/40228 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr')
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C174
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H36
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfP9Mca.C13
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfP9Mcbist.C6
4 files changed, 127 insertions, 102 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C
index 1aea86c2d..7ad37bcca 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C
@@ -153,19 +153,49 @@ uint32_t handleMemUe<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr,
#ifdef __HOSTBOOT_MODULE
-uint32_t maskMemPort( ExtensibleChip * i_chip )
+template<>
+uint32_t maskMemPort<TYPE_MCA>( ExtensibleChip * i_chip )
{
- #define PRDF_FUNC "[MemEcc::maskMemPort] "
+ #define PRDF_FUNC "[MemEcc::maskMemPort<TYPE_MCA>] "
+ PRDF_ASSERT( nullptr != i_chip );
PRDF_ASSERT( TYPE_MCA == i_chip->getType() );
- SCAN_COMM_REGISTER_CLASS * c = i_chip->getRegister("MCACALFIR_MASK_OR");
- SCAN_COMM_REGISTER_CLASS * d = i_chip->getRegister("DDRPHYFIR_MASK_OR");
- SCAN_COMM_REGISTER_CLASS * e = i_chip->getRegister("MCAECCFIR_MASK_OR");
+ uint32_t o_rc = SUCCESS;
+
+ do
+ {
+ // Mask all FIRs on the port.
+ SCAN_COMM_REGISTER_CLASS * c = i_chip->getRegister("MCACALFIR_MASK_OR");
+ SCAN_COMM_REGISTER_CLASS * d = i_chip->getRegister("DDRPHYFIR_MASK_OR");
+ SCAN_COMM_REGISTER_CLASS * e = i_chip->getRegister("MCAECCFIR_MASK_OR");
+
+ c->setAllBits(); d->setAllBits(); e->setAllBits();
+
+ o_rc = c->Write() | d->Write() | e->Write();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "Write() failed on 0x%08x", i_chip->getHuid() );
+ break;
+ }
+
+ #ifdef __HOSTBOOT_RUNTIME
- c->setAllBits(); d->setAllBits(); e->setAllBits();
+ /* TODO RTC 136129
+ // Dynamically deallocate the port.
+ o_rc = MemDealloc::port<TYPE_MCA>( i_chip );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "MemDealloc::port<TYPE_MCA>(0x%08x) failed",
+ i_chip->getHuid() );
+ }
+ */
+
+ #endif
- return ( c->Write() | d->Write() | e->Write() );
+ } while (0);
+
+ return o_rc;
#undef PRDF_FUNC
}
@@ -176,10 +206,13 @@ uint32_t maskMemPort( ExtensibleChip * i_chip )
#ifdef __HOSTBOOT_RUNTIME
-uint32_t iuePortFail(ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc)
+template<>
+uint32_t iuePortFail<TYPE_MCA>( ExtensibleChip * i_chip,
+ STEP_CODE_DATA_STRUCT & io_sc )
{
- #define PRDF_FUNC "[MemEcc::iuePortFail] "
+ #define PRDF_FUNC "[MemEcc::iuePortFail<TYPE_MCA>] "
+ PRDF_ASSERT( nullptr != i_chip );
PRDF_ASSERT( TYPE_MCA == i_chip->getType() );
uint32_t o_rc = SUCCESS;
@@ -714,89 +747,68 @@ uint32_t analyzeFetchUe<TYPE_MCA, McaDataBundle *>( ExtensibleChip * i_chip,
//------------------------------------------------------------------------------
-#ifdef __HOSTBOOT_MODULE
-
template<TARGETING::TYPE T, typename D>
-uint32_t __analyzeIue( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc,
- MemAddr i_addr )
+uint32_t handleMemIue( ExtensibleChip * i_chip, const MemRank & i_rank,
+ STEP_CODE_DATA_STRUCT & io_sc )
{
- #define PRDF_FUNC "[MemEcc::__analyzeIue] "
+ #define PRDF_FUNC "[MemEcc::handleMemIue] "
+ PRDF_ASSERT( nullptr != i_chip );
PRDF_ASSERT( T == i_chip->getType() );
+
uint32_t o_rc = SUCCESS;
- do
- {
- // get data bundle from chip
- D db = static_cast<D>( i_chip->getDataBundle() );
+ // Add the DIMM to the callout list.
+ MemoryMru mm { i_chip->getTrgt(), i_rank, MemoryMruData::CALLOUT_RANK };
+ io_sc.service_data->SetCallout( mm );
- // get the rank
- MemRank rank = i_addr.getRank();
+ #ifdef __HOSTBOOT_MODULE
- TargetHandle_t trgt = i_chip->getTrgt();
+ do
+ {
+ // Nothing else to do if handling a system checkstop.
+ if ( CHECK_STOP == io_sc.service_data->getPrimaryAttnType() ) break;
- // Add the DIMM to the callout list
- MemoryMru memmru(trgt, rank, MemoryMruData::CALLOUT_RANK);
- io_sc.service_data->SetCallout( memmru );
+ // Get the data bundle from chip.
+ D db = static_cast<D>( i_chip->getDataBundle() );
- uint8_t ds = rank.getDimmSlct();
+ // Get the DIMM select.
+ uint8_t ds = i_rank.getDimmSlct();
- // Initialize threshold if it doesn't exist yet
+ // Initialize threshold if it doesn't exist yet.
if ( 0 == db->iv_iueTh.count(ds) )
{
db->iv_iueTh[ds] = TimeBasedThreshold( getIueTh() );
}
- // increment the threshold - check if at threshold
+ // Increment the count and check if at threshold.
if ( db->iv_iueTh[ds].inc(io_sc) )
{
- // Make the error log predictive
+ // Make the error log predictive.
io_sc.service_data->setServiceCall();
- #ifdef __HOSTBOOT_RUNTIME
-
- /* TODO RTC 136129
- // Dynamically deallocate the rank.
- uint32_t dealloc_rc = MemDealloc::rank<T>( i_chip, rank );
- if ( SUCCESS != dealloc_rc )
- {
- PRDF_ERR( PRDF_FUNC "MemDealloc::rank() failed: i_chip=0x%08x "
- "rank=m%ds%d", i_chip->getHuid(), rank.getMaster(),
- rank.getSlave() );
- o_rc = dealloc_rc; break;
- }
- */
-
- #endif // __HOSTBOOT_RUNTIME
+ // The port fail will be triggered in the PostAnalysis plugin after
+ // the error log has been committed.
- // mask off the entire port to avoid collateral
- o_rc = maskMemPort( i_chip );
+ // Mask off the entire port to avoid collateral.
+ o_rc = MemEcc::maskMemPort<T>( i_chip );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "MemEcc::maskMemPort failed: i_chip=0x%08x",
- i_chip->getHuid() );
+ PRDF_ERR( PRDF_FUNC "MemEcc::maskMemPort<T>(0x%08x) failed",
+ i_chip->getHuid() );
break;
}
-
- // Port fail will be triggered in PostAnalysis after the error log
- // has been committed.
}
- }while(0);
+ } while (0);
+
+ #endif // __HOSTBOOT_MODULE
return o_rc;
#undef PRDF_FUNC
}
-// To resolve template linker errors.
-template
-uint32_t __analyzeIue<TYPE_MCA, McaDataBundle*>(ExtensibleChip * i_chip,
- STEP_CODE_DATA_STRUCT & io_sc,
- MemAddr i_addr );
-
-#endif // __HOSTBOOT_MODULE
-
//------------------------------------------------------------------------------
template<TARGETING::TYPE T, typename D>
@@ -805,44 +817,39 @@ uint32_t analyzeMainlineIue( ExtensibleChip * i_chip,
{
#define PRDF_FUNC "[MemEcc::analyzeMainlineIue] "
+ PRDF_ASSERT( nullptr != i_chip );
PRDF_ASSERT( T == i_chip->getType() );
- uint32_t o_rc = SUCCESS;
- #ifdef __HOSTBOOT_MODULE
+ uint32_t o_rc = SUCCESS;
do
{
-
- // get the address of the failure
- MemAddr addr;
-
// Use the address in MBRCER. This address also traps IRCDs, but it is
// not likely that we will have two independent failure modes at the
// same time. So we just assume the address is correct.
+ MemAddr addr;
o_rc = getMemReadAddr<T>( i_chip, MemAddr::READ_RCE_ADDR, addr );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "getMemReadAddr(0x%08x, READ_RCE_ADDR) failed",
- i_chip->getHuid() );
+ i_chip->getHuid() );
break;
}
+ MemRank rank = addr.getRank();
- o_rc = __analyzeIue<T,D>( i_chip, io_sc, addr );
+ o_rc = handleMemIue<T,D>( i_chip, rank, io_sc );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "__analyzeIue failed. Chip HUID: 0x%08x",
- i_chip->getHuid() );
+ PRDF_ERR( PRDF_FUNC "handleMemIue<T,D>(0x%08x,m%ds%d) failed",
+ i_chip->getHuid(), rank.getMaster(), rank.getSlave() );
break;
}
- }while(0);
-
- #endif
+ } while (0);
return o_rc;
#undef PRDF_FUNC
-
}
// To resolve template linker errors.
@@ -858,40 +865,37 @@ uint32_t analyzeMaintIue( ExtensibleChip * i_chip,
{
#define PRDF_FUNC "[MemEcc::analyzeMaintIue] "
+ PRDF_ASSERT( nullptr != i_chip );
PRDF_ASSERT( T == i_chip->getType() );
- uint32_t o_rc = SUCCESS;
- #ifdef __HOSTBOOT_MODULE
+ uint32_t o_rc = SUCCESS;
do
{
+ // Use the current address in the MCBMCAT.
MemAddr addr;
-
- // Use the current address in the MCBMCAT
o_rc = getMemMaintAddr<T>( i_chip, addr );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed",
- i_chip->getHuid() );
+ i_chip->getHuid() );
break;
}
+ MemRank rank = addr.getRank();
- o_rc = __analyzeIue<T,D>( i_chip, io_sc, addr );
+ o_rc = handleMemIue<T,D>( i_chip, rank, io_sc );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "__analyzeIue failed. Chip HUID: "
- "0x%08x", i_chip->getHuid() );
+ PRDF_ERR( PRDF_FUNC "handleMemIue<T,D>(0x%08x,m%ds%d) failed",
+ i_chip->getHuid(), rank.getMaster(), rank.getSlave() );
break;
}
- }while(0);
-
- #endif
+ } while (0);
return o_rc;
#undef PRDF_FUNC
-
}
// To resolve template linker errors.
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H
index 330fb2525..37beecdaf 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H
@@ -85,6 +85,27 @@ uint32_t handleMemUe( ExtensibleChip * i_chip, const MemAddr & i_addr,
UE_TABLE::Type i_type, STEP_CODE_DATA_STRUCT & io_sc );
/**
+ * @brief Does mainline and maintenance IUE handling.
+ *
+ * Adds the memory IUE to the callout list. At threshold, will make the error
+ * log predictive. When threshold is reached at runtime there is a good chance
+ * these IUEs are going to lead to a data integrity issue. Therefore, the port
+ * will be forced to fail, the entire port will be masked off, and dynamic
+ * memory deallocation will be applied. Note that this function will not issue
+ * the port failure because it is possible that it may crash the host. Instead,
+ * the port failure is issued in the PostAnalysis plugin after the error log has
+ * been committed.
+ *
+ * @param i_chip MCA chip.
+ * @param i_rank Rank containing the IUE.
+ * @param io_sc The step code data struct.
+ * @return Non-SUCCESS if an interal function fails, SUCCESS otherwise.
+ */
+template<TARGETING::TYPE T, typename D>
+uint32_t handleMemIue( ExtensibleChip * i_chip, const MemRank & i_rank,
+ STEP_CODE_DATA_STRUCT & io_sc );
+
+/**
* @brief Analyzes a fetch MPE attention.
* @param i_chip MCA or MBA.
* @param i_rank Target rank.
@@ -158,22 +179,25 @@ uint32_t analyzeImpe( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc );
#ifdef __HOSTBOOT_RUNTIME
/**
- * @brief Will trigger a port fail if the number of IUEs is over threshold
- * @param i_chip MCA chip
- * @param io_sc The step code data struct.
+ * @brief Will trigger a port fail if the number of IUEs is over threshold.
+ * @param i_chip MCA chip
+ * @param io_sc The step code data struct.
* @return Non-SUCCESS if an internal function fails, SUCCESS otherwise
*/
-uint32_t iuePortFail(ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc);
+template<TARGETING::TYPE T>
+uint32_t iuePortFail( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc );
#endif // __HOSTBOOT_RUNTIME
#ifdef __HOSTBOOT_MODULE
/**
- * @brief Will mask off the entire mem port
- * @param i_chip MCA chip
+ * @brief Will mask off an entire memory port. At runtime will issue dynamic
+ * memory deallocation of the port.
+ * @param i_chip MCA chip
* @return Non-SUCCESS if an internal function fails, SUCCESS otherwise
*/
+template<TARGETING::TYPE T>
uint32_t maskMemPort( ExtensibleChip * i_chip );
template<TARGETING::TYPE T, typename D>
diff --git a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C
index 7016b06bd..9b54037ba 100644
--- a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C
+++ b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C
@@ -69,16 +69,14 @@ int32_t PostAnalysis( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc )
#ifdef __HOSTBOOT_RUNTIME
-
// If the IUE threshold in our data bundle has been reached, we trigger
// a port fail. Once we trigger the port fail, the system may crash
// right away. Since PRD is running in the hypervisor, it is possible we
// may not get the error log. To better our chances, we trigger the port
// fail here after the error log has been committed.
- if ( SUCCESS != MemEcc::iuePortFail(i_chip, io_sc) )
+ if ( SUCCESS != MemEcc::iuePortFail<TYPE_MCA>(i_chip, io_sc) )
{
- PRDF_ERR( PRDF_FUNC "iuePortFail failed: i_chip=0x%08x",
- i_chip->getHuid() );
+ PRDF_ERR( PRDF_FUNC "iuePortFail(0x%08x) failed", i_chip->getHuid() );
}
#endif // __HOSTBOOT_RUNTIME
@@ -197,14 +195,13 @@ int32_t MemPortFailure( ExtensibleChip * i_chip,
if ( CHECK_STOP != io_sc.service_data->getPrimaryAttnType() )
{
- // The port is dead mask off the entire port.
- uint32_t l_rc = MemEcc::maskMemPort( i_chip );
+ // The port is dead. Mask off the entire port.
+ uint32_t l_rc = MemEcc::maskMemPort<TYPE_MCA>( i_chip );
if ( SUCCESS != l_rc )
{
- PRDF_ERR( PRDF_FUNC "MemEcc::maskMemPort failed: i_chip=0x%08x",
+ PRDF_ERR( PRDF_FUNC "MemEcc::maskMemPort<TYPE_MCA>(0x%08x) failed",
i_chip->getHuid() );
}
-
}
return SUCCESS; // nothing to return to rule code
diff --git a/src/usr/diag/prdf/plat/mem/prdfP9Mcbist.C b/src/usr/diag/prdf/plat/mem/prdfP9Mcbist.C
index 4a80c2203..1b017194c 100644
--- a/src/usr/diag/prdf/plat/mem/prdfP9Mcbist.C
+++ b/src/usr/diag/prdf/plat/mem/prdfP9Mcbist.C
@@ -112,10 +112,10 @@ int32_t PostAnalysis( ExtensibleChip * i_mcbChip,
// if there's an IUE and we've reached threshold trigger a port fail
if ( eccAttns & MAINT_IUE )
{
- if ( SUCCESS != MemEcc::iuePortFail(mca, io_sc) )
+ if ( SUCCESS != MemEcc::iuePortFail<TYPE_MCA>(mca, io_sc) )
{
- PRDF_ERR( PRDF_FUNC "iuePortFail failed: i_mcbChip="
- "0x%08x", i_mcbChip->getHuid() );
+ PRDF_ERR( PRDF_FUNC "iuePortFail(0x%08x) failed",
+ i_mcbChip->getHuid() );
}
}
}
OpenPOWER on IntegriCloud