summaryrefslogtreecommitdiffstats
path: root/src/usr/diag
diff options
context:
space:
mode:
authorZane Shelley <zshelle@us.ibm.com>2017-04-28 17:06:14 -0500
committerZane C. Shelley <zshelle@us.ibm.com>2017-05-03 10:43:14 -0400
commit4c2df3a7280e94e7bee4b00eb40c7b476a2722d8 (patch)
treeca3c56088b266b415406f57e787a004d7c139a40 /src/usr/diag
parent4ac944be420f4d5e1635aae520674bb6c5e4582f (diff)
downloadblackbird-hostboot-4c2df3a7280e94e7bee4b00eb40c7b476a2722d8.tar.gz
blackbird-hostboot-4c2df3a7280e94e7bee4b00eb40c7b476a2722d8.zip
PRD: Updates to AUE/IAUE handling
Change-Id: I7d403cb29bbeb2d5f383a38816b579e71fc3dc0d RTC: 173491 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/39851 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com> Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/39967 Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr/diag')
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C5
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H4
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C63
-rw-r--r--src/usr/diag/prdf/common/plat/p9/p9_mca.rule8
-rw-r--r--src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule33
-rw-r--r--src/usr/diag/prdf/plat/prdfPlatServices.C13
6 files changed, 110 insertions, 16 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C
index e0bcd5bb6..29b9815d8 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C
@@ -233,8 +233,6 @@ uint32_t getMemReadAddr<TYPE_MBA>( ExtensibleChip * i_chip,
//------------------------------------------------------------------------------
-#ifdef __HOSTBOOT_MODULE
-
template<>
uint32_t getMemMaintAddr<TYPE_MCBIST>( ExtensibleChip * i_chip,
MemAddr & o_addr )
@@ -286,6 +284,7 @@ uint32_t getMemMaintAddr<TYPE_MCA>( ExtensibleChip * i_chip, MemAddr & o_addr )
}
//------------------------------------------------------------------------------
+
template<>
uint32_t getMemMaintAddr<TYPE_MBA>( ExtensibleChip * i_chip, MemAddr & o_addr )
{
@@ -319,6 +318,8 @@ uint32_t getMemMaintAddr<TYPE_MBA>( ExtensibleChip * i_chip, MemAddr & o_addr )
//------------------------------------------------------------------------------
+#ifdef __HOSTBOOT_MODULE
+
uint32_t getMcbistMaintPort( ExtensibleChip * i_mcbChip,
std::vector<ExtensibleChip *> & o_mcaList )
{
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H
index fcbe33af9..07808a990 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H
@@ -172,8 +172,6 @@ template<TARGETING::TYPE T>
uint32_t getMemReadAddr( ExtensibleChip * i_chip, MemAddr::ReadReg i_reg,
MemAddr & o_addr );
-#ifdef __HOSTBOOT_MODULE
-
/**
* @brief Reads the maintenance address from hardware.
*
@@ -194,6 +192,8 @@ uint32_t getMemReadAddr( ExtensibleChip * i_chip, MemAddr::ReadReg i_reg,
template<TARGETING::TYPE T>
uint32_t getMemMaintAddr( ExtensibleChip * i_chip, MemAddr & o_addr );
+#ifdef __HOSTBOOT_MODULE
+
/**
* @brief Queries broadcast mode information and determines which of the MCBIST
* ports were targeted for the command.
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C b/src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C
index cac225d23..22dd60554 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C
+++ b/src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C
@@ -272,6 +272,69 @@ PRDF_PLUGIN_DEFINE( p9_mca, AnalyzeImpe );
//------------------------------------------------------------------------------
+/**
+ * @brief MCAECCFIR[13,16] - Mainline AUE and IAUE
+ * @param i_chip MCA chip.
+ * @param io_sc The step code data struct.
+ * @return SUCCESS
+ */
+int32_t AnalyzeFetchAueIaue( ExtensibleChip * i_chip,
+ STEP_CODE_DATA_STRUCT & io_sc )
+{
+ #define PRDF_FUNC "[p9_mca::AnalyzeFetchAueIaue] "
+
+ MemAddr addr;
+ if ( SUCCESS != getMemReadAddr<TYPE_MCA>(i_chip, MemAddr::READ_AUE_ADDR,
+ addr) )
+ {
+ PRDF_ERR( PRDF_FUNC "getMemReadAddr(0x%08x,READ_AUE_ADDR) failed",
+ i_chip->getHuid() );
+ }
+ else
+ {
+ MemRank rank = addr.getRank();
+ MemoryMru mm { i_chip->getTrgt(), rank, MemoryMruData::CALLOUT_RANK };
+ io_sc.service_data->SetCallout( mm, MRU_HIGH );
+ }
+
+ return SUCCESS; // nothing to return to rule code
+
+ #undef PRDF_FUNC
+}
+PRDF_PLUGIN_DEFINE( p9_mca, AnalyzeFetchAueIaue );
+
+/**
+ * @brief MCAECCFIR[33] - Maintenance AUE
+ * @param i_chip MCA chip.
+ * @param io_sc The step code data struct.
+ * @return SUCCESS
+ */
+int32_t AnalyzeMaintAue( ExtensibleChip * i_chip,
+ STEP_CODE_DATA_STRUCT & io_sc )
+{
+ #define PRDF_FUNC "[p9_mca::AnalyzeMaintAue] "
+
+ MemAddr addr;
+ if ( SUCCESS != getMemMaintAddr<TYPE_MCA>(i_chip, addr) )
+ {
+ PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed",
+ i_chip->getHuid() );
+ }
+ else
+ {
+ MemRank rank = addr.getRank();
+ MemoryMru mm { i_chip->getTrgt(), rank, MemoryMruData::CALLOUT_RANK };
+ io_sc.service_data->SetCallout( mm, MRU_HIGH );
+ }
+
+ return SUCCESS; // nothing to return to rule code
+
+ #undef PRDF_FUNC
+}
+PRDF_PLUGIN_DEFINE( p9_mca, AnalyzeMaintAue );
+
+//------------------------------------------------------------------------------
+
} // end namespace p9_mca
} // end namespace PRDF
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule
index cda13518f..2f62b13ac 100644
--- a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule
+++ b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule
@@ -407,7 +407,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14, 17, 37 )
/** MCAECCFIR[13]
* Mainline read AUE
*/
- (rMCAECCFIR, bit(13)) ? all_dimm_H_th_1;
+ (rMCAECCFIR, bit(13)) ? mainline_aue_iaue_handling;
/** MCAECCFIR[14]
* Mainline read UE
@@ -422,7 +422,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14, 17, 37 )
/** MCAECCFIR[16]
* Mainline read IAUE
*/
- (rMCAECCFIR, bit(16)) ? all_dimm_H_th_1;
+ (rMCAECCFIR, bit(16)) ? mainline_aue_iaue_handling;
/** MCAECCFIR[17]
* Mainline read IUE
@@ -472,7 +472,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14, 17, 37 )
/** MCAECCFIR[33]
* Maintenance AUE
*/
- (rMCAECCFIR, bit(33)) ? all_dimm_H_th_1;
+ (rMCAECCFIR, bit(33)) ? maintenance_aue_handling;
/** MCAECCFIR[34]
* Maintenance UE
@@ -487,7 +487,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14, 17, 37 )
/** MCAECCFIR[36]
* Maintenance IAUE
*/
- (rMCAECCFIR, bit(36)) ? all_dimm_H_th_1;
+ (rMCAECCFIR, bit(36)) ? maintenance_iaue_handling;
/** MCAECCFIR[37]
* Maintenance IUE
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule
index 95d591c49..a18372c0f 100644
--- a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule
+++ b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule
@@ -23,14 +23,6 @@
#
# IBM_PROLOG_END_TAG
-/** Callout all connected DIMMs HIGH on first occurence. */
-actionclass all_dimm_H_th_1
-{
- callout(connected(TYPE_DIMM,0), MRU_HIGH);
- callout(connected(TYPE_DIMM,1), MRU_HIGH);
- threshold1;
-};
-
/** Verify Chip Mark */
actionclass verify_chip_mark_0 { funccall("AnalyzeFetchMpe_0"); };
actionclass verify_chip_mark_1 { funccall("AnalyzeFetchMpe_1"); };
@@ -95,6 +87,31 @@ actionclass maintenance_iue_handling
actionclass impe_handling { funccall("AnalyzeImpe"); };
+/** Handle Mainline AUEs/IAUEs */
+actionclass mainline_aue_iaue_handling
+{
+ funccall("AnalyzeFetchAueIaue");
+ calloutSelfLow;
+ threshold1;
+};
+
+/** Handle Maintenance AUEs */
+actionclass maintenance_aue_handling
+{
+ funccall("AnalyzeMaintAue");
+ calloutSelfLow;
+ threshold1;
+};
+
+/** Handle Maintenance IAUEs */
+actionclass maintenance_iaue_handling
+{
+ callout(connected(TYPE_DIMM,0), MRU_HIGH);
+ callout(connected(TYPE_DIMM,1), MRU_HIGH);
+ calloutSelfLow;
+ threshold1;
+};
+
/** MCA/UE algroithm, threshold 5 per day */
actionclass mca_ue_algorithm_th_5perDay
{
diff --git a/src/usr/diag/prdf/plat/prdfPlatServices.C b/src/usr/diag/prdf/plat/prdfPlatServices.C
index 59600c116..6e0c3878b 100644
--- a/src/usr/diag/prdf/plat/prdfPlatServices.C
+++ b/src/usr/diag/prdf/plat/prdfPlatServices.C
@@ -314,6 +314,19 @@ uint32_t startBgScrub<TYPE_MCA>( ExtensibleChip * i_mcaChip,
// background scrubbing never stops.
mss::mcbist::stop_conditions stopCond;
+ // AUEs are checkstop attentions. Unfortunately, MCBIST commands do not stop
+ // when the system checkstops. Therefore, we must set the stop condition for
+ // AUEs so that we can use the MCBMCAT register to determine where the error
+ // occurred. Note that there isn't a stop condition specifically for IAUEs.
+ // Instead, there is the RCE threshold. Unfortunately, the RCE counter is a
+ // combination of IUE, IAUE, IMPE, and IRCD errors. It is possible to use
+ // this threshold and simply restart background scrubbing each time there is
+ // an IUE, IMPE, or IRCD but there is concern that PRD might get stuck
+ // handling those attentions on every address even after thresholds have
+ // been reached. Therefore, we simplified the design and will simply call
+ // out both DIMMs for maintenance IAUEs.
+ stopCond.set_pause_on_aue(mss::ON);
+
#ifdef CONFIG_HBRT_PRD
stopCond.set_thresh_nce_int(1)
OpenPOWER on IntegriCloud