diff options
| author | Zane Shelley <zshelle@us.ibm.com> | 2017-04-28 17:06:14 -0500 |
|---|---|---|
| committer | Zane C. Shelley <zshelle@us.ibm.com> | 2017-05-03 10:43:14 -0400 |
| commit | 4c2df3a7280e94e7bee4b00eb40c7b476a2722d8 (patch) | |
| tree | ca3c56088b266b415406f57e787a004d7c139a40 /src/usr/diag | |
| parent | 4ac944be420f4d5e1635aae520674bb6c5e4582f (diff) | |
| download | blackbird-hostboot-4c2df3a7280e94e7bee4b00eb40c7b476a2722d8.tar.gz blackbird-hostboot-4c2df3a7280e94e7bee4b00eb40c7b476a2722d8.zip | |
PRD: Updates to AUE/IAUE handling
Change-Id: I7d403cb29bbeb2d5f383a38816b579e71fc3dc0d
RTC: 173491
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/39851
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/39967
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr/diag')
| -rw-r--r-- | src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C | 5 | ||||
| -rw-r--r-- | src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H | 4 | ||||
| -rw-r--r-- | src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C | 63 | ||||
| -rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_mca.rule | 8 | ||||
| -rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule | 33 | ||||
| -rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices.C | 13 |
6 files changed, 110 insertions, 16 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C index e0bcd5bb6..29b9815d8 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.C @@ -233,8 +233,6 @@ uint32_t getMemReadAddr<TYPE_MBA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ -#ifdef __HOSTBOOT_MODULE - template<> uint32_t getMemMaintAddr<TYPE_MCBIST>( ExtensibleChip * i_chip, MemAddr & o_addr ) @@ -286,6 +284,7 @@ uint32_t getMemMaintAddr<TYPE_MCA>( ExtensibleChip * i_chip, MemAddr & o_addr ) } //------------------------------------------------------------------------------ + template<> uint32_t getMemMaintAddr<TYPE_MBA>( ExtensibleChip * i_chip, MemAddr & o_addr ) { @@ -319,6 +318,8 @@ uint32_t getMemMaintAddr<TYPE_MBA>( ExtensibleChip * i_chip, MemAddr & o_addr ) //------------------------------------------------------------------------------ +#ifdef __HOSTBOOT_MODULE + uint32_t getMcbistMaintPort( ExtensibleChip * i_mcbChip, std::vector<ExtensibleChip *> & o_mcaList ) { diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H index fcbe33af9..07808a990 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemAddress.H @@ -172,8 +172,6 @@ template<TARGETING::TYPE T> uint32_t getMemReadAddr( ExtensibleChip * i_chip, MemAddr::ReadReg i_reg, MemAddr & o_addr ); -#ifdef __HOSTBOOT_MODULE - /** * @brief Reads the maintenance address from hardware. * @@ -194,6 +192,8 @@ uint32_t getMemReadAddr( ExtensibleChip * i_chip, MemAddr::ReadReg i_reg, template<TARGETING::TYPE T> uint32_t getMemMaintAddr( ExtensibleChip * i_chip, MemAddr & o_addr ); +#ifdef __HOSTBOOT_MODULE + /** * @brief Queries broadcast mode information and determines which of the MCBIST * ports were targeted for the command. diff --git a/src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C b/src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C index cac225d23..22dd60554 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C @@ -272,6 +272,69 @@ PRDF_PLUGIN_DEFINE( p9_mca, AnalyzeImpe ); //------------------------------------------------------------------------------ +/** + * @brief MCAECCFIR[13,16] - Mainline AUE and IAUE + * @param i_chip MCA chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t AnalyzeFetchAueIaue( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[p9_mca::AnalyzeFetchAueIaue] " + + MemAddr addr; + if ( SUCCESS != getMemReadAddr<TYPE_MCA>(i_chip, MemAddr::READ_AUE_ADDR, + addr) ) + { + PRDF_ERR( PRDF_FUNC "getMemReadAddr(0x%08x,READ_AUE_ADDR) failed", + i_chip->getHuid() ); + } + else + { + MemRank rank = addr.getRank(); + MemoryMru mm { i_chip->getTrgt(), rank, MemoryMruData::CALLOUT_RANK }; + io_sc.service_data->SetCallout( mm, MRU_HIGH ); + } + + return SUCCESS; // nothing to return to rule code + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( p9_mca, AnalyzeFetchAueIaue ); + +/** + * @brief MCAECCFIR[33] - Maintenance AUE + * @param i_chip MCA chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t AnalyzeMaintAue( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[p9_mca::AnalyzeMaintAue] " + + MemAddr addr; + if ( SUCCESS != getMemMaintAddr<TYPE_MCA>(i_chip, addr) ) + { + PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed", + i_chip->getHuid() ); + } + else + { + MemRank rank = addr.getRank(); + MemoryMru mm { i_chip->getTrgt(), rank, MemoryMruData::CALLOUT_RANK }; + io_sc.service_data->SetCallout( mm, MRU_HIGH ); + } + + return SUCCESS; // nothing to return to rule code + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( p9_mca, AnalyzeMaintAue ); + +//------------------------------------------------------------------------------ + } // end namespace p9_mca } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule index cda13518f..2f62b13ac 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule @@ -407,7 +407,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14, 17, 37 ) /** MCAECCFIR[13] * Mainline read AUE */ - (rMCAECCFIR, bit(13)) ? all_dimm_H_th_1; + (rMCAECCFIR, bit(13)) ? mainline_aue_iaue_handling; /** MCAECCFIR[14] * Mainline read UE @@ -422,7 +422,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14, 17, 37 ) /** MCAECCFIR[16] * Mainline read IAUE */ - (rMCAECCFIR, bit(16)) ? all_dimm_H_th_1; + (rMCAECCFIR, bit(16)) ? mainline_aue_iaue_handling; /** MCAECCFIR[17] * Mainline read IUE @@ -472,7 +472,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14, 17, 37 ) /** MCAECCFIR[33] * Maintenance AUE */ - (rMCAECCFIR, bit(33)) ? all_dimm_H_th_1; + (rMCAECCFIR, bit(33)) ? maintenance_aue_handling; /** MCAECCFIR[34] * Maintenance UE @@ -487,7 +487,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14, 17, 37 ) /** MCAECCFIR[36] * Maintenance IAUE */ - (rMCAECCFIR, bit(36)) ? all_dimm_H_th_1; + (rMCAECCFIR, bit(36)) ? maintenance_iaue_handling; /** MCAECCFIR[37] * Maintenance IUE diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule index 95d591c49..a18372c0f 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule @@ -23,14 +23,6 @@ # # IBM_PROLOG_END_TAG -/** Callout all connected DIMMs HIGH on first occurence. */ -actionclass all_dimm_H_th_1 -{ - callout(connected(TYPE_DIMM,0), MRU_HIGH); - callout(connected(TYPE_DIMM,1), MRU_HIGH); - threshold1; -}; - /** Verify Chip Mark */ actionclass verify_chip_mark_0 { funccall("AnalyzeFetchMpe_0"); }; actionclass verify_chip_mark_1 { funccall("AnalyzeFetchMpe_1"); }; @@ -95,6 +87,31 @@ actionclass maintenance_iue_handling actionclass impe_handling { funccall("AnalyzeImpe"); }; +/** Handle Mainline AUEs/IAUEs */ +actionclass mainline_aue_iaue_handling +{ + funccall("AnalyzeFetchAueIaue"); + calloutSelfLow; + threshold1; +}; + +/** Handle Maintenance AUEs */ +actionclass maintenance_aue_handling +{ + funccall("AnalyzeMaintAue"); + calloutSelfLow; + threshold1; +}; + +/** Handle Maintenance IAUEs */ +actionclass maintenance_iaue_handling +{ + callout(connected(TYPE_DIMM,0), MRU_HIGH); + callout(connected(TYPE_DIMM,1), MRU_HIGH); + calloutSelfLow; + threshold1; +}; + /** MCA/UE algroithm, threshold 5 per day */ actionclass mca_ue_algorithm_th_5perDay { diff --git a/src/usr/diag/prdf/plat/prdfPlatServices.C b/src/usr/diag/prdf/plat/prdfPlatServices.C index 59600c116..6e0c3878b 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices.C @@ -314,6 +314,19 @@ uint32_t startBgScrub<TYPE_MCA>( ExtensibleChip * i_mcaChip, // background scrubbing never stops. mss::mcbist::stop_conditions stopCond; + // AUEs are checkstop attentions. Unfortunately, MCBIST commands do not stop + // when the system checkstops. Therefore, we must set the stop condition for + // AUEs so that we can use the MCBMCAT register to determine where the error + // occurred. Note that there isn't a stop condition specifically for IAUEs. + // Instead, there is the RCE threshold. Unfortunately, the RCE counter is a + // combination of IUE, IAUE, IMPE, and IRCD errors. It is possible to use + // this threshold and simply restart background scrubbing each time there is + // an IUE, IMPE, or IRCD but there is concern that PRD might get stuck + // handling those attentions on every address even after thresholds have + // been reached. Therefore, we simplified the design and will simply call + // out both DIMMs for maintenance IAUEs. + stopCond.set_pause_on_aue(mss::ON); + #ifdef CONFIG_HBRT_PRD stopCond.set_thresh_nce_int(1) |

