diff options
author | sachin gupta <sgupta2m@in.ibm.com> | 2013-05-21 07:40:21 -0500 |
---|---|---|
committer | A. Patrick Williams III <iawillia@us.ibm.com> | 2013-06-11 09:18:53 -0500 |
commit | 5e51cb6bdfef5f1e6b4a38c015309fed3db9aa09 (patch) | |
tree | 5f428831e0423bb57720031691873420a6421ca7 /src/usr/diag/prdf/common/plat | |
parent | c41304e281e34a3b5e4a865e2db0f173331fb259 (diff) | |
download | talos-hostboot-5e51cb6bdfef5f1e6b4a38c015309fed3db9aa09.tar.gz talos-hostboot-5e51cb6bdfef5f1e6b4a38c015309fed3db9aa09.zip |
PRD: Handling dependency between DMI Bus FIR and MBI/MCIFIR
Change-Id: I35894f00d38db9afcaef6574fd7492322d2a8398
Reviewed-on: http://gfw160.austin.ibm.com:8080/gerrit/4637
Reviewed-by: Christopher T. Phan <cphan@us.ibm.com>
Tested-by: Jenkins Server
Reviewed-by: A. Patrick Williams III <iawillia@us.ibm.com>
Reviewed-by: Zane Shelley <zshelle@us.ibm.com>
Reviewed-on: http://gfw160.austin.ibm.com:8080/gerrit/4938
Diffstat (limited to 'src/usr/diag/prdf/common/plat')
9 files changed, 330 insertions, 144 deletions
diff --git a/src/usr/diag/prdf/common/plat/pegasus/CommonActions.rule b/src/usr/diag/prdf/common/plat/pegasus/CommonActions.rule index 6a30df0e2..903af5263 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/CommonActions.rule +++ b/src/usr/diag/prdf/common/plat/pegasus/CommonActions.rule @@ -64,6 +64,12 @@ actionclass threshold1 threshold( field(1) ); }; +/** Threshold of 2 per day */ +actionclass threshold2pday +{ + threshold( field(2 / day) ); +}; + /** Threshold of 32 per day */ actionclass threshold32pday { @@ -105,6 +111,7 @@ actionclass SUEGenerationPoint # Callout self actionclass calloutSelfHigh { callout(MRU_HIGH); }; actionclass calloutSelfMed { callout(MRU_MED); }; +actionclass calloutSelfHigh { callout(MRU_HIGH); }; actionclass calloutSelfMedA { callout(MRU_MEDA); }; actionclass calloutSelfLow { callout(MRU_LOW); }; diff --git a/src/usr/diag/prdf/common/plat/pegasus/Mcs.rule b/src/usr/diag/prdf/common/plat/pegasus/Mcs.rule index 8a65235b0..3c9ee41dd 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/Mcs.rule +++ b/src/usr/diag/prdf/common/plat/pegasus/Mcs.rule @@ -215,8 +215,7 @@ group gMciFir attntype CHECK_STOP, RECOVERABLE, SPECIAL, UNIT_CS /** MCIFIR[10] * MCIFIRQ_CRC_PERFORMANCE_DEGRADATION */ - # FIXME RTC 23127 To be updated in a separate activity along with MBIFIR[10] - (MciFir, bit(10)) ? defaultMaskedError; + (MciFir, bit(10)) ? analyzeSpareBitAndThr; /** MCIFIR[11] * MCIFIRQ_CHANNEL_INTERLOCK_FAIL @@ -451,12 +450,18 @@ actionclass calloutConnCenTh1 threshold1; }; -/** Callout the DMI bus (MEDA), threshold 1 */ -actionclass calloutDmiBusTh1 +/** Callout the DMI bus (MEDA) */ +actionclass calloutDmiBus { calloutSelfMedA; callout(connected(TYPE_MEMBUF), MRU_MEDA); callout(procedure(MEMBUS_ERROR_ENUM), MRU_LOW); +}; + +/** Callout the DMI bus (MEDA), threshold 1 */ +actionclass calloutDmiBusTh1 +{ + calloutDmiBus; threshold1; }; @@ -466,3 +471,14 @@ actionclass calloutDmiBusAndLvl2Th1 calloutDmiBusTh1; callout2ndLvlLow; }; + +actionclass CalloutDmiBusAndThr2pd +{ + calloutDmiBus; + threshold2pday; +}; + +actionclass analyzeSpareBitAndThr +{ + try( funccall("checkSpareBit"), CalloutDmiBusAndThr2pd ); +}; diff --git a/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule b/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule index 6c0751204..4d5bc5d58 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule +++ b/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule @@ -162,6 +162,7 @@ group gNestLFir filter singlebit ################################################################################ # NEST Chiplet DMIFIR ################################################################################ +# RAS spreadsheet: p8dd1_mss_FFDC_37_ reviewd.xls rule DmiFir { @@ -175,22 +176,22 @@ group gDmiFir filter singlebit /** DMIFIR[0] * FIR_RX_INVALID_STATE_OR_PARITY_ERROR */ - (DmiFir, bit(0)) ? TBDDefaultCallout; + (DmiFir, bit(0)) ? SelfHighThr1; /** DMIFIR[1] * FIR_TX_INVALID_STATE_OR_PARITY_ERROR */ - (DmiFir, bit(1)) ? TBDDefaultCallout; + (DmiFir, bit(1)) ? SelfHighThr1; /** DMIFIR[2] * FIR_GCR_HANG_ERROR */ - (DmiFir, bit(2)) ? TBDDefaultCallout; + (DmiFir, bit(2)) ? SelfHighThr1; /** DMIFIR[8] * FIR_RX_BUS0_TRAINING_ERROR */ - (DmiFir, bit(8)) ? TBDDefaultCallout; + (DmiFir, bit(8)) ? defaultMaskedError; /** DMIFIR[9] * FIR_RX_BUS0_SPARE_DEPLOYED @@ -205,122 +206,122 @@ group gDmiFir filter singlebit /** DMIFIR[11] * FIR_RX_BUS0_RECAL_OR_DYN_REPAIR_ERROR */ - (DmiFir, bit(11)) ? TBDDefaultCallout; + (DmiFir, bit(11)) ? calloutDmiBusTh1; /** DMIFIR[12] * FIR_RX_BUS0_TOO_MANY_BUS_ERRORS */ - (DmiFir, bit(12)) ? bus0TooManyBusErrors; + (DmiFir, bit(12)) ? defaultMaskedError; /** DMIFIR[16] * FIR_RX_BUS1_TRAINING_ERROR */ - (DmiFir, bit(16)) ? TBDDefaultCallout; + (DmiFir, bit(16)) ? defaultMaskedError; /** DMIFIR[17] * FIR_RX_BUS1_SPARE_DEPLOYED */ - (DmiFir, bit(17)) ? TBDDefaultCallout; + (DmiFir, bit(17)) ? defaultMaskedError; /** DMIFIR[18] * FIR_RX_BUS1_MAX_SPARES_EXCEEDED */ - (DmiFir, bit(18)) ? TBDDefaultCallout; + (DmiFir, bit(18)) ? defaultMaskedError; /** DMIFIR[19] * FIR_RX_BUS1_RECAL_OR_DYN_REPAIR_ERROR */ - (DmiFir, bit(19)) ? TBDDefaultCallout; + (DmiFir, bit(19)) ? defaultMaskedError; /** DMIFIR[20] * FIR_RX_BUS1_TOO_MANY_BUS_ERRORS */ - (DmiFir, bit(20)) ? TBDDefaultCallout; + (DmiFir, bit(20)) ? defaultMaskedError; /** DMIFIR[24] * FIR_RX_BUS2_TRAINING_ERROR */ - (DmiFir, bit(24)) ? TBDDefaultCallout; + (DmiFir, bit(24)) ? defaultMaskedError; /** DMIFIR[25] * FIR_RX_BUS2_SPARE_DEPLOYED */ - (DmiFir, bit(25)) ? TBDDefaultCallout; + (DmiFir, bit(25)) ? defaultMaskedError; /** DMIFIR[26] * FIR_RX_BUS2_MAX_SPARES_EXCEEDED */ - (DmiFir, bit(26)) ? TBDDefaultCallout; + (DmiFir, bit(26)) ? defaultMaskedError; /** DMIFIR[27] * FIR_RX_BUS2_RECAL_OR_DYN_REPAIR_ERROR */ - (DmiFir, bit(27)) ? TBDDefaultCallout; + (DmiFir, bit(27)) ? defaultMaskedError; /** DMIFIR[28] * FIR_RX_BUS2_TOO_MANY_BUS_ERRORS */ - (DmiFir, bit(28)) ? TBDDefaultCallout; + (DmiFir, bit(28)) ? defaultMaskedError; /** DMIFIR[32] * FIR_RX_BUS3_TRAINING_ERROR */ - (DmiFir, bit(32)) ? TBDDefaultCallout; + (DmiFir, bit(32)) ? defaultMaskedError; /** DMIFIR[33] * FIR_RX_BUS3_SPARE_DEPLOYED */ - (DmiFir, bit(33)) ? TBDDefaultCallout; + (DmiFir, bit(33)) ? defaultMaskedError; /** DMIFIR[34] * FIR_RX_BUS3_MAX_SPARES_EXCEEDED */ - (DmiFir, bit(34)) ? TBDDefaultCallout; + (DmiFir, bit(34)) ? defaultMaskedError; /** DMIFIR[35] * FIR_RX_BUS3_RECAL_OR_DYN_REPAIR_ERROR */ - (DmiFir, bit(35)) ? TBDDefaultCallout; + (DmiFir, bit(35)) ? defaultMaskedError; /** DMIFIR[36] * FIR_RX_BUS3_TOO_MANY_BUS_ERRORS */ - (DmiFir, bit(36)) ? TBDDefaultCallout; + (DmiFir, bit(36)) ? defaultMaskedError; /** DMIFIR[40] * FIR_RX_BUS4_TRAINING_ERROR */ - (DmiFir, bit(40)) ? TBDDefaultCallout; + (DmiFir, bit(40)) ? defaultMaskedError; /** DMIFIR[41] * FIR_RX_BUS4_SPARE_DEPLOYED */ - (DmiFir, bit(41)) ? TBDDefaultCallout; + (DmiFir, bit(41)) ? defaultMaskedError; /** DMIFIR[42] * FIR_RX_BUS4_MAX_SPARES_EXCEEDED */ - (DmiFir, bit(42)) ? TBDDefaultCallout; + (DmiFir, bit(42)) ? defaultMaskedError; /** DMIFIR[43] * FIR_RX_BUS4_RECAL_OR_DYN_REPAIR_ERROR */ - (DmiFir, bit(43)) ? TBDDefaultCallout; + (DmiFir, bit(43)) ? defaultMaskedError; /** DMIFIR[44] * FIR_RX_BUS4_TOO_MANY_BUS_ERRORS */ - (DmiFir, bit(44)) ? TBDDefaultCallout; + (DmiFir, bit(44)) ? defaultMaskedError; /** DMIFIR[48] * FIR_SCOMFIR_ERROR */ - (DmiFir, bit(48)) ? TBDDefaultCallout; + (DmiFir, bit(48)) ? defaultMaskedError; /** DMIFIR[49] * FIR_SCOMFIR_ERROR_CLONE */ - (DmiFir, bit(49)) ? TBDDefaultCallout; + (DmiFir, bit(49)) ? defaultMaskedError; }; ################################################################################ @@ -544,8 +545,7 @@ group gMbiFir filter singlebit /** MBIFIR[10] * MBIFIRQ_CRC_PERFORMANCE_DEGRADATION */ - # TODO RTC 23125. In RAS spreadsheet, threshold value is not clear - (MbiFir, bit(10)) ? calloutDmiBus; + (MbiFir, bit(10)) ? analyzeSpareBitAndThr; /** MBIFIR[11] * MBIFIRQ_HOST_MC_GLOBAL_CHECKSTOP @@ -1201,13 +1201,6 @@ actionclass bus0MaxSparesExceeded threshold1; }; -actionclass bus0TooManyBusErrors -{ - callout(procedure(MEMBUS_ERROR_ENUM), MRU_LOW); - funccall("dmiBus0TooManyErrors"); - threshold1; -}; - /** Callout the DMI bus */ actionclass calloutDmiBus { @@ -1223,6 +1216,12 @@ actionclass calloutDmiBusTh1 threshold1; }; +actionclass calloutDmiBusTh2pday +{ + calloutDmiBus; + threshold2pday; +}; + /** Callout the DMI bus and 2nd Level (LOW), threhold 1 */ actionclass calloutDmiBusLvl2Th1 { @@ -1235,3 +1234,9 @@ actionclass CalloutMcsHighAndThr1 callout(connected(TYPE_MCS),MRU_HIGH); threshold1; }; + +actionclass analyzeSpareBitAndThr +{ + try( funccall("checkSpareBit"), calloutDmiBusTh2pday ); +}; + diff --git a/src/usr/diag/prdf/common/plat/pegasus/Proc_acts_PB.rule b/src/usr/diag/prdf/common/plat/pegasus/Proc_acts_PB.rule index 0eda12fcb..6a90574f1 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/Proc_acts_PB.rule +++ b/src/usr/diag/prdf/common/plat/pegasus/Proc_acts_PB.rule @@ -2787,6 +2787,7 @@ group gPciNestFir filter singlebit ################################################################################ # PB Chiplet IOMCFIR_1 ################################################################################ +# RAS spreadsheet: p8dd1_mss_FFDC_37_ reviewd.xls rule IomcFir_1 { @@ -2799,17 +2800,17 @@ group gIomcFir_1 filter singlebit /** IOMCFIR_1[0] * FIR_RX_INVALID_STATE_OR_PARITY_ERROR */ - (IomcFir_1, bit(0)) ? TBDDefaultCallout; + (IomcFir_1, bit(0)) ? SelfHighThr1; /** IOMCFIR_1[1] * FIR_TX_INVALID_STATE_OR_PARITY_ERROR */ - (IomcFir_1, bit(1)) ? TBDDefaultCallout; + (IomcFir_1, bit(1)) ? SelfHighThr1; /** IOMCFIR_1[2] * FIR_GCR_HANG_ERROR */ - (IomcFir_1, bit(2)) ? TBDDefaultCallout; + (IomcFir_1, bit(2)) ? SelfHighThr1; /** IOMCFIR_1[3|4|5|6|7] * Reserved @@ -2819,7 +2820,7 @@ group gIomcFir_1 filter singlebit /** IOMCFIR_1[8] * FIR_RX_BUS0_TRAINING_ERROR */ - (IomcFir_1, bit(8)) ? TBDDefaultCallout; + (IomcFir_1, bit(8)) ? defaultMaskedError; /** IOMCFIR_1[9] * FIR_RX_BUS0_SPARE_DEPLOYED @@ -2834,12 +2835,12 @@ group gIomcFir_1 filter singlebit /** IOMCFIR_1[11] * FIR_RX_BUS0_RECAL_OR_DYN_REPAIR_ERROR */ - (IomcFir_1, bit(11)) ? TBDDefaultCallout; + (IomcFir_1, bit(11)) ? calloutDmiBus4Thr1; /** IOMCFIR_1[12] * FIR_RX_BUS0_TOO_MANY_BUS_ERRORS */ - (IomcFir_1, bit(12)) ? dmiBus0TooManyBusErrors; + (IomcFir_1, bit(12)) ? defaultMaskedError; /** IOMCFIR_1[13|14|15] * Reserved @@ -2849,7 +2850,7 @@ group gIomcFir_1 filter singlebit /** IOMCFIR_1[16] * FIR_RX_BUS1_TRAINING_ERROR */ - (IomcFir_1, bit(16)) ? TBDDefaultCallout; + (IomcFir_1, bit(16)) ? defaultMaskedError; /** IOMCFIR_1[17] * FIR_RX_BUS1_SPARE_DEPLOYED @@ -2864,12 +2865,12 @@ group gIomcFir_1 filter singlebit /** IOMCFIR_1[19] * FIR_RX_BUS1_RECAL_OR_DYN_REPAIR_ERROR */ - (IomcFir_1, bit(19)) ? TBDDefaultCallout; + (IomcFir_1, bit(19)) ? calloutDmiBus5Thr1; /** IOMCFIR_1[20] * FIR_RX_BUS1_TOO_MANY_BUS_ERRORS */ - (IomcFir_1, bit(20)) ? dmiBus1TooManyBusErrors; + (IomcFir_1, bit(20)) ? defaultMaskedError; /** IOMCFIR_1[21|22|23] * Reserved @@ -2879,7 +2880,7 @@ group gIomcFir_1 filter singlebit /** IOMCFIR_1[24] * FIR_RX_BUS2_TRAINING_ERROR */ - (IomcFir_1, bit(24)) ? TBDDefaultCallout; + (IomcFir_1, bit(24)) ? defaultMaskedError; /** IOMCFIR_1[25] * FIR_RX_BUS2_SPARE_DEPLOYED @@ -2894,12 +2895,12 @@ group gIomcFir_1 filter singlebit /** IOMCFIR_1[27] * FIR_RX_BUS2_RECAL_OR_DYN_REPAIR_ERROR */ - (IomcFir_1, bit(27)) ? TBDDefaultCallout; + (IomcFir_1, bit(27)) ? calloutDmiBus6Thr1; /** IOMCFIR_1[28] * FIR_RX_BUS2_TOO_MANY_BUS_ERRORS */ - (IomcFir_1, bit(28)) ? dmiBus2TooManyBusErrors; + (IomcFir_1, bit(28)) ? defaultMaskedError; /** IOMCFIR_1[29|30|31] * Reserved @@ -2909,7 +2910,7 @@ group gIomcFir_1 filter singlebit /** IOMCFIR_1[32] * FIR_RX_BUS3_TRAINING_ERROR */ - (IomcFir_1, bit(32)) ? TBDDefaultCallout; + (IomcFir_1, bit(32)) ? defaultMaskedError; /** IOMCFIR_1[33] * FIR_RX_BUS3_SPARE_DEPLOYED @@ -2924,12 +2925,12 @@ group gIomcFir_1 filter singlebit /** IOMCFIR_1[35] * FIR_RX_BUS3_RECAL_OR_DYN_REPAIR_ERROR */ - (IomcFir_1, bit(35)) ? TBDDefaultCallout; + (IomcFir_1, bit(35)) ? calloutDmiBus7Thr1; /** IOMCFIR_1[36] * FIR_RX_BUS3_TOO_MANY_BUS_ERRORS */ - (IomcFir_1, bit(36)) ? dmiBus3TooManyBusErrors; + (IomcFir_1, bit(36)) ? defaultMaskedError; /** IOMCFIR_1[37|38|39] * Reserved @@ -2939,27 +2940,27 @@ group gIomcFir_1 filter singlebit /** IOMCFIR_1[40] * FIR_RX_BUS4_TRAINING_ERROR */ - (IomcFir_1, bit(40)) ? TBDDefaultCallout; + (IomcFir_1, bit(40)) ? defaultMaskedError; /** IOMCFIR_1[41] * FIR_RX_BUS4_SPARE_DEPLOYED */ - (IomcFir_1, bit(41)) ? TBDDefaultCallout; + (IomcFir_1, bit(41)) ? defaultMaskedError; /** IOMCFIR_1[42] * FIR_RX_BUS4_MAX_SPARES_EXCEEDED */ - (IomcFir_1, bit(42)) ? TBDDefaultCallout; + (IomcFir_1, bit(42)) ? defaultMaskedError; /** IOMCFIR_1[43] * FIR_RX_BUS4_RECAL_OR_DYN_REPAIR_ERROR */ - (IomcFir_1, bit(43)) ? TBDDefaultCallout; + (IomcFir_1, bit(43)) ? defaultMaskedError; /** IOMCFIR_1[44] * FIR_RX_BUS4_TOO_MANY_BUS_ERRORS */ - (IomcFir_1, bit(44)) ? TBDDefaultCallout; + (IomcFir_1, bit(44)) ? defaultMaskedError; /** IOMCFIR_1[45|46|47] * Reserved @@ -2968,12 +2969,12 @@ group gIomcFir_1 filter singlebit /** IOMCFIR_1[48] * FIR_SCOMFIR_ERROR */ - (IomcFir_1, bit(48)) ? TBDDefaultCallout; + (IomcFir_1, bit(48)) ? defaultMaskedError; /** IOMCFIR_1[49] * FIR_SCOMFIR_ERROR_CLONE */ - (IomcFir_1, bit(49)) ? TBDDefaultCallout; + (IomcFir_1, bit(49)) ? defaultMaskedError; }; ################################################################################ @@ -3044,35 +3045,6 @@ actionclass dmiBus3MaxSparesExceeded threshold1; }; -# DMI Bus 0-3 Too Many bus errors -actionclass dmiBus0TooManyBusErrors -{ - callout(procedure(MEMBUS_ERROR_ENUM), MRU_LOW); - funccall("dmiBus0TooManyErrors"); - threshold1; -}; - -actionclass dmiBus1TooManyBusErrors -{ - callout(procedure(MEMBUS_ERROR_ENUM), MRU_LOW); - funccall("dmiBus1TooManyErrors"); - threshold1; -}; - -actionclass dmiBus2TooManyBusErrors -{ - callout(procedure(MEMBUS_ERROR_ENUM), MRU_LOW); - funccall("dmiBus2TooManyErrors"); - threshold1; -}; - -actionclass dmiBus3TooManyBusErrors -{ - callout(procedure(MEMBUS_ERROR_ENUM), MRU_LOW); - funccall("dmiBus3TooManyErrors"); - threshold1; -}; - /** Callout the connected PCI 0 controller. */ actionclass calloutConnPci0 { callout(connected(TYPE_PCI,0), MRU_MED); }; @@ -3121,3 +3093,42 @@ actionclass calloutConnPci2AndIsolatePathTh1 calloutConnPci2Th1; }; +/** Callout MEMBUS_ERROR_ENUM procedure , threshold 1 */ +actionclass calloutMemErrProceThr1 +{ + callout(procedure(MEMBUS_ERROR_ENUM), MRU_LOW); + threshold1; +}; + +/** Callout the DMI bus4 (MEDA), threshold 1 */ +actionclass calloutDmiBus4Thr1 +{ + callout(connected(TYPE_MCS, 4), MRU_MEDA); + callout(connected(TYPE_MEMBUF, 4), MRU_MEDA); + calloutMemErrProceThr1; +}; + +/** Callout the DMI bus5 (MEDA), threshold 1 */ +actionclass calloutDmiBus5Thr1 +{ + callout(connected(TYPE_MCS, 5), MRU_MEDA); + callout(connected(TYPE_MEMBUF, 5), MRU_MEDA); + calloutMemErrProceThr1; +}; + +/** Callout the DMI bus6 (MEDA), threshold 1 */ +actionclass calloutDmiBus6Thr1 +{ + callout(connected(TYPE_MCS, 6), MRU_MEDA); + callout(connected(TYPE_MEMBUF, 6), MRU_MEDA); + calloutMemErrProceThr1; +}; + +/** Callout the DMI bus7 (MEDA), threshold 1 */ +actionclass calloutDmiBus7Thr1 +{ + callout(connected(TYPE_MCS, 7), MRU_MEDA); + callout(connected(TYPE_MEMBUF, 7), MRU_MEDA); + calloutMemErrProceThr1; +}; + diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C index b35326c15..9879ed35e 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C @@ -364,26 +364,27 @@ int32_t dmiBus1SparesExceeded( ExtensibleChip * i_chip, PRDF_PLUGIN_DEFINE( Membuf, dmiBus1SparesExceeded ); /** - * @brief Handle DMI Bus 0-1 Too Many Bus Errors - * @param i_chip Mem Buf chip - * @param i_sc The step code data struct - * @returns Failure or Success + * @brief Checks if spare deployed bit for DMI bus is set. + * @param i_mbChip Membuf chip + * @param i_sc The step code data struct. + * @return SUCCESS if bit is on, FAIL otherwise. */ -int32_t dmiBus0TooManyErrors( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & i_sc ) +int32_t checkSpareBit( ExtensibleChip * i_mbChip, + STEP_CODE_DATA_STRUCT & i_sc ) { - return LaneRepair::handleLaneRepairEvent(i_chip, TYPE_MEMBUF, 0, i_sc, - false); -} -PRDF_PLUGIN_DEFINE( Membuf, dmiBus0TooManyErrors ); + using namespace LaneRepair; + int32_t l_rc = FAIL; -int32_t dmiBus1TooManyErrors( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & i_sc ) -{ - return LaneRepair::handleLaneRepairEvent(i_chip, TYPE_MEMBUF, 1, i_sc, - false); + ExtensibleChip * mcsChip = getMembufDataBundle( i_mbChip )->getMcsChip(); + + if ( true == isSpareBitOnDMIBus( mcsChip, i_mbChip )) + { + l_rc = SUCCESS; + } + + return l_rc; } -PRDF_PLUGIN_DEFINE( Membuf, dmiBus1TooManyErrors ); +PRDF_PLUGIN_DEFINE( Membuf, checkSpareBit ); } // end namespace Membuf } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfLaneRepair.C b/src/usr/diag/prdf/common/plat/pegasus/prdfLaneRepair.C index a76413ec6..d7c95ca5a 100644 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfLaneRepair.C +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfLaneRepair.C @@ -30,6 +30,8 @@ #include <iipServiceDataCollector.h> #include <prdfExtensibleChip.H> #include <UtilHash.H> +#include <prdfCenMembufDataBundle.H> +#include <prdfP8McsDataBundle.H> using namespace TARGETING; @@ -246,8 +248,136 @@ int32_t handleLaneRepairEvent (ExtensibleChip * i_chip, l_rc |= clearIOFirs(rxBusTgt); } + if ( i_spareDeployed ) + { + l_rc |= cleanupSecondaryFirBits( i_chip, i_busType, i_busPos ); + } + return l_rc; +} + +//----------------------------------------------------------------------------- + +bool isSpareBitOnDMIBus( ExtensibleChip * i_mcsChip, ExtensibleChip * i_mbChip ) +{ + bool bitOn = false; + + do + { + // If any of these object is NULL, spare bit should not be on. + if ( ( NULL == i_mcsChip ) || ( NULL == i_mbChip )) + break; + + // check spare deployed bit on Centaur side + SCAN_COMM_REGISTER_CLASS * dmiFir = i_mbChip->getRegister( "DMIFIR" ); + int32_t rc = dmiFir->Read(); + if ( SUCCESS != rc ) + { + PRDF_ERR("isSpareBitOnDMIBus() : Failed to read DMIFIR." + "MEMBUF: 0x%08X", getHuid( i_mbChip->GetChipHandle()) ); + break; + } + if ( dmiFir->IsBitSet( 9 )) + { + bitOn = true; + break; + } + + // check spare deployed bit on Proc side + TargetHandle_t mcsTgt = i_mcsChip->GetChipHandle(); + TargetHandle_t procTgt = getConnectedParent( mcsTgt, TYPE_PROC ); + ExtensibleChip * procChip = + ( ExtensibleChip * )systemPtr->GetChip( procTgt ); + + uint32_t mcsPos = getTargetPosition( mcsTgt ); + + const char * regStr = ( 4 > mcsPos) ? "IOMCFIR_0" : "IOMCFIR_1"; + SCAN_COMM_REGISTER_CLASS * iomcFir = procChip->getRegister( regStr ); + rc = iomcFir->Read(); + if ( SUCCESS != rc ) + { + PRDF_ERR("isSpareBitOnDMIBus() : Failed to read %s." + "MCS: 0x%08X", regStr, getHuid(mcsTgt) ); + break; + } + // Bit 9, 17, 25 and 33 are for spare deployed. + // Check bit corrosponding to MCS position + uint8_t bitPos = 9 + ( mcsPos % 4 ) *8; + if ( iomcFir->IsBitSet(bitPos)) + { + bitOn = true; + } + + }while(0); + + return bitOn; +} + +//----------------------------------------------------------------------------- + +int32_t cleanupSecondaryFirBits( ExtensibleChip * i_chip, + TYPE i_busType, + uint32_t i_busPos ) +{ + int32_t l_rc = SUCCESS; + TargetHandle_t mcsTgt = NULL; + TargetHandle_t mbTgt = NULL; + ExtensibleChip * mcsChip = NULL; + ExtensibleChip * mbChip = NULL; + + //In case of spare deployed attention for DMI bus, we need to clear + // secondary MBIFIR[10] and MCIFIR[10] bits. + if ( i_busType == TYPE_MCS ) + { + mcsTgt = getConnectedChild( i_chip->GetChipHandle(), + TYPE_MCS, + i_busPos); + mcsChip = ( ExtensibleChip * )systemPtr->GetChip( mcsTgt ); + mbChip = getMcsDataBundle( mcsChip )->getMembChip(); + mbTgt = mbChip->GetChipHandle(); + + } + else if ( i_busType == TYPE_MEMBUF ) + { + mbTgt = i_chip->GetChipHandle(); + mcsChip = getMembufDataBundle( i_chip )->getMcsChip(); + mcsTgt = mcsChip->GetChipHandle(); + mbChip = i_chip; + } + + if ( ( NULL != mcsChip ) && ( NULL != mbChip )) + { + SCAN_COMM_REGISTER_CLASS * mciFir = + mcsChip->getRegister( "MCIFIR" ); + int32_t rc = mciFir->Read(); + if ( SUCCESS != rc ) + { + PRDF_ERR("cleanupSecondaryFirBits() : Failed to read MCIFIR." + "MCS: 0x%08X", getHuid(mcsTgt) ); + l_rc |= rc; + } + else if ( mciFir->IsBitSet(10)) + { + mciFir->ClearBit(10); + l_rc |= mciFir->Write(); + } + + SCAN_COMM_REGISTER_CLASS * mbiFir = + mbChip->getRegister( "MBIFIR" ); + rc = mbiFir->Read(); + if ( SUCCESS != rc ) + { + PRDF_ERR("cleanupSecondaryFirBits() : Failed to read MBIFIR." + "MEMBUF: 0x%08X", getHuid(mbTgt) ); + l_rc |= rc; + } + else if ( mbiFir->IsBitSet(10)) + { + mbiFir->ClearBit(10); + l_rc |= mbiFir->Write(); + } + } return l_rc; } -} // end namespace MemUtil +} // end namespace LaneRepair } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfLaneRepair.H b/src/usr/diag/prdf/common/plat/pegasus/prdfLaneRepair.H index d3fd7d99e..1bf1d256c 100644 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfLaneRepair.H +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfLaneRepair.H @@ -54,6 +54,33 @@ namespace LaneRepair STEP_CODE_DATA_STRUCT & i_sc, bool i_spareDeployed); + // Utility functions for secondary FIR bits + + /** + * @brief clean up secondary FIR bits ( MBI/MCIFIR bit 10 ) + * @param i_chip Chip that detected the lane repair event + * @param i_busType Bus connection type (X,A, MEMBUF, or MCS) + * @param i_busPos Bus position + * @note This will only clear FIR bits if spare deploy attention is present + * on DMI bus. It does not check for spare deployed attention type + * Calling fucntion should make ensure that. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + int32_t cleanupSecondaryFirBits( ExtensibleChip * i_chip, + TARGETING::TYPE i_busType, + uint32_t i_busPos ); + + /** + * @brief Checks if spare bit is set for DMI Bus. + * @param i_mcsChip MCS chip. + * @param i_mbChip Membuf chip. + * @return True if spare bit is set false otherwise. + * @note This function will check both ends of DMI bus to check + * if spare deployed attention is raised on any side. + */ + bool isSpareBitOnDMIBus( ExtensibleChip * i_mcsChip, + ExtensibleChip * i_mbChip ); + } // end namespace LaneRepair } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfP8Mcs.C b/src/usr/diag/prdf/common/plat/pegasus/prdfP8Mcs.C index b74e8b44e..5a3747b94 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfP8Mcs.C +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfP8Mcs.C @@ -33,6 +33,7 @@ #include <iipSystem.h> #include <prdfP8McsDataBundle.H> #include <prdfCenMembufDataBundle.H> +#include <prdfLaneRepair.H> //############################################################################## // @@ -186,6 +187,29 @@ int32_t PostAnalysis( ExtensibleChip * i_mcsChip, } PRDF_PLUGIN_DEFINE( Mcs, PostAnalysis ); +/** + * @brief Checks if spare deployed bit for DMI bus for this MCS is set. + * @param i_mcsChip MCS chip + * @param i_sc The step code data struct. + * @return SUCCESS if bit is on, FAIL otherwise. + */ +int32_t checkSpareBit( ExtensibleChip * i_mcsChip, + STEP_CODE_DATA_STRUCT & i_sc ) +{ + using namespace LaneRepair; + int32_t l_rc = FAIL; + + ExtensibleChip * mbChip = getMcsDataBundle( i_mcsChip )->getMembChip(); + + if ( true == isSpareBitOnDMIBus( i_mcsChip, mbChip )) + { + l_rc = SUCCESS; + } + + return l_rc; +} +PRDF_PLUGIN_DEFINE( Mcs, checkSpareBit ); + } // end namespace Mcs } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfP8Proc.C b/src/usr/diag/prdf/common/plat/pegasus/prdfP8Proc.C index 0d40c9ddd..0677f7f9f 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfP8Proc.C +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfP8Proc.C @@ -599,41 +599,6 @@ int32_t dmiBus3SparesExceeded( ExtensibleChip * i_chip, } PRDF_PLUGIN_DEFINE( Proc, dmiBus3SparesExceeded ); - -/** - * @brief Handle DMI Bus 0-3 too many bus errors - * @param i_chip P8 chip - * @param i_sc The step code data struct - * @returns Failure or Success - */ -int32_t dmiBus0TooManyErrors( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & i_sc ) -{ - return LaneRepair::handleLaneRepairEvent(i_chip, TYPE_MCS, 4, i_sc, false); -} -PRDF_PLUGIN_DEFINE( Proc, dmiBus0TooManyErrors ); - -int32_t dmiBus1TooManyErrors( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & i_sc ) -{ - return LaneRepair::handleLaneRepairEvent(i_chip, TYPE_MCS, 5, i_sc, false); -} -PRDF_PLUGIN_DEFINE( Proc, dmiBus1TooManyErrors ); - -int32_t dmiBus2TooManyErrors( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & i_sc ) -{ - return LaneRepair::handleLaneRepairEvent(i_chip, TYPE_MCS, 6, i_sc, false); -} -PRDF_PLUGIN_DEFINE( Proc, dmiBus2TooManyErrors ); - -int32_t dmiBus3TooManyErrors( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & i_sc ) -{ - return LaneRepair::handleLaneRepairEvent(i_chip, TYPE_MCS, 7, i_sc, false); -} -PRDF_PLUGIN_DEFINE( Proc, dmiBus3TooManyErrors ); - /** * @brief Mask attentions from MCIFIR after Centaur Unit checkstop * @param i_chip P8 chip |