diff options
Diffstat (limited to 'src/usr')
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_capp.rule | 20 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule | 19 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_ec.rule | 6 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_ex.rule | 2 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_mca.rule | 8 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule | 15 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_nimbus.rule | 1275 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_nimbus_actions.rule | 10 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_phb.rule | 6 |
9 files changed, 692 insertions, 669 deletions
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_capp.rule b/src/usr/diag/prdf/common/plat/p9/p9_capp.rule index 5ad05ecff..dcc2b371d 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_capp.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_capp.rule @@ -194,7 +194,7 @@ group gCXAFIR filter singlebit, cs_root_cause /** CXAFIR[4] * CXA Timer expired recoverable error */ - (rCXAFIR, bit(4)) ? level2_th_1; # NIMBUS_10 + (rCXAFIR, bit(4)) ? level2_th_1; /** CXAFIR[5] * Recovery sequencer hang detection @@ -204,12 +204,12 @@ group gCXAFIR filter singlebit, cs_root_cause /** CXAFIR[6] * XPT saw UE on PB data */ - (rCXAFIR, bit(6)) ? level2_th_1; # NIMBUS_10 + (rCXAFIR, bit(6)) ? level2_th_1; /** CXAFIR[7] * XPT saw SUE on PB data */ - (rCXAFIR, bit(7)) ? level2_th_1; # NIMBUS_10 + (rCXAFIR, bit(7)) ? level2_th_1_SUE; /** CXAFIR[8] * Correctable error on Snooper array. @@ -239,7 +239,7 @@ group gCXAFIR filter singlebit, cs_root_cause /** CXAFIR[13] * Recoverable errors detected in Master */ - (rCXAFIR, bit(13)) ? level2_th_1; # NIMBUS_10 + (rCXAFIR, bit(13)) ? level2_th_1; /** CXAFIR[14] * spare @@ -317,19 +317,19 @@ group gCXAFIR filter singlebit, cs_root_cause (rCXAFIR, bit(28)) ? defaultMaskedError; /** CXAFIR[29] - * CXA: PB Addr Error detected by APC on load + * CXA: PB Addr Error detected by APC : ld */ (rCXAFIR, bit(29)) ? level2_th_1; /** CXAFIR[30] - * CXA PB Addr Error detected by APC on store + * CXA PB Addr Err detected by APC : st */ (rCXAFIR, bit(30)) ? level2_th_1; /** CXAFIR[31] * CXA: PPHB0 or PHB1 i linkdown */ - (rCXAFIR, bit(31)) ? level2_th_1; # NIMBUS_10 + (rCXAFIR, bit(31)) ? level2_th_1; /** CXAFIR[32] * APC ack_dead or ack_ed_dead @@ -359,7 +359,7 @@ group gCXAFIR filter singlebit, cs_root_cause /** CXAFIR[37] * CXA: TLBI Timeout error. */ - (rCXAFIR, bit(37)) ? level2_th_1; # NIMBUS_10 + (rCXAFIR, bit(37)) ? level2_th_1; /** CXAFIR[38] * CXA: TLBI seq_err. @@ -399,12 +399,12 @@ group gCXAFIR filter singlebit, cs_root_cause /** CXAFIR[45] * Command_queue_UE */ - (rCXAFIR, bit(45)) ? self_th_1; # NIMBUS_10 + (rCXAFIR, bit(45)) ? self_th_1; /** CXAFIR[46] * PSL credit timeout error */ - (rCXAFIR, bit(46)) ? level2_th_1; # NIMBUS_10 + (rCXAFIR, bit(46)) ? level2_th_1; /** CXAFIR[47] * spare diff --git a/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule index 7a8f1e6e7..afa279b0a 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule @@ -108,11 +108,11 @@ actionclass threshold_and_mask # Special Flags # ################################################################################ -/** Set SUE generation point */ -actionclass SUEGenerationPoint -{ - flag(UERE); -}; +/** SUE source */ +actionclass SueSource { flag(UERE); }; + +/** SUE originated from somewhere else */ +actionclass SueSeen { flag(SUE); }; ################################################################################ # Simple Callouts # @@ -143,7 +143,6 @@ actionclass calloutSelfLowNoGard # Callouts with thresholds # ################################################################################ - actionclass self_th_1 { calloutSelfMed; @@ -209,6 +208,14 @@ actionclass level2_M_proc_L_th_1 }; ################################################################################ +# Callouts with flags # +################################################################################ + +actionclass level2_th_1_SUE { level2_th_1; SueSeen; }; +actionclass self_th_1_SUE { self_th_1; SueSeen; }; +actionclass self_th_1_UERE { self_th_1; SueSource; }; + +################################################################################ # Dump Types # ################################################################################ diff --git a/src/usr/diag/prdf/common/plat/p9/p9_ec.rule b/src/usr/diag/prdf/common/plat/p9/p9_ec.rule index 78942e0bb..b296f816b 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_ec.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_ec.rule @@ -559,7 +559,7 @@ group gCOREFIR filter singlebit, cs_root_cause /** COREFIR[14] * Machine check and ME = 0 Err */ - (rCOREFIR, bit(14)) ? self_th_1; + (rCOREFIR, bit(14)) ? self_th_1_SUE; /** COREFIR[15] * LSU or IFU detected UE from L2 @@ -754,12 +754,12 @@ group gCOREFIR filter singlebit, cs_root_cause /** COREFIR[57] * Other Core Core Checkstop */ - (rCOREFIR, bit(57)) ? self_th_1; + (rCOREFIR, bit(57)) ? defaultMaskedError; /** COREFIR[58] * Other Core System Checkstop */ - (rCOREFIR, bit(58)) ? self_th_1; + (rCOREFIR, bit(58)) ? defaultMaskedError; /** COREFIR[59] * SCOM error handling diff --git a/src/usr/diag/prdf/common/plat/p9/p9_ex.rule b/src/usr/diag/prdf/common/plat/p9/p9_ex.rule index 7e5daf08d..f785ac25b 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_ex.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_ex.rule @@ -555,7 +555,7 @@ group gNCUFIR filter singlebit, cs_root_cause( 8 ) /** NCUFIR[8] * NCU Store Queue Data Parity Err */ - (rNCUFIR, bit(8)) ? self_th_1; + (rNCUFIR, bit(8)) ? self_th_1_UERE; /** NCUFIR[9] * store timed out on pb diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule index 4dd5677ec..4dd93bdb0 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule @@ -302,7 +302,7 @@ group gMCACALFIR filter priority( 13 ), cs_root_cause( 4, 13, 14 ) /** MCACALFIR[13] * Persistent RCD error, port failed */ - (rMCACALFIR, bit(13)) ? mem_port_failure; + (rMCACALFIR, bit(13)) ? mem_port_failure_UERE; /** MCACALFIR[14] * RCD during periodic cal @@ -413,7 +413,7 @@ group gMCAECCFIR filter priority( 14, 17, 37 ), # ensure UEs handled before NCEs /** MCAECCFIR[14] * Mainline read UE */ - (rMCAECCFIR, bit(14)) ? mainline_ue_handling; + (rMCAECCFIR, bit(14)) ? mainline_ue_handling_UERE; /** MCAECCFIR[15] * Mainline read RCD @@ -428,7 +428,7 @@ group gMCAECCFIR filter priority( 14, 17, 37 ), # ensure UEs handled before NCEs /** MCAECCFIR[17] * Mainline read IUE */ - (rMCAECCFIR, bit(17)) ? mainline_iue_handling; + (rMCAECCFIR, bit(17)) ? mainline_iue_handling_UERE; /** MCAECCFIR[18] * Mainline read IRCD @@ -493,7 +493,7 @@ group gMCAECCFIR filter priority( 14, 17, 37 ), # ensure UEs handled before NCEs /** MCAECCFIR[37] * Maintenance IUE */ - (rMCAECCFIR, bit(37)) ? maintenance_iue_handling; + (rMCAECCFIR, bit(37)) ? maintenance_iue_handling_UERE; /** MCAECCFIR[38] * Maintenance IRCD diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule index 196a95485..a0a5d6044 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule @@ -40,9 +40,9 @@ actionclass mainline_nce_handling { funccall("AnalyzeFetchNce"); }; actionclass mainline_tce_handling { funccall("AnalyzeFetchTce"); }; /** Mainline UE handling */ -actionclass mainline_ue_handling +actionclass mainline_ue_handling_UERE { - SUEGenerationPoint; + SueSource; threshold( field(33 / 30 min ) ); # To prevent flooding. Will be unmasked # when background scrubbing resumes after # targeted diagnostics is complete. @@ -50,8 +50,9 @@ actionclass mainline_ue_handling }; /** Memory Port Failure */ -actionclass mem_port_failure +actionclass mem_port_failure_UERE { + SueSource; callout(connected(TYPE_DIMM,0), MRU_HIGH); # DIMM 0 HIGH callout(connected(TYPE_DIMM,1), MRU_HIGH); # DIMM 1 HIGH calloutSelfLow; # Self LOW @@ -70,17 +71,17 @@ actionclass rcd_parity_error }; /** Handle Mainline IUEs */ -actionclass mainline_iue_handling +actionclass mainline_iue_handling_UERE { - SUEGenerationPoint; + SueSource; # Thresholding done in the plugin funccall("AnalyzeMainlineIue"); }; /** Handle Maintenance IUEs */ -actionclass maintenance_iue_handling +actionclass maintenance_iue_handling_UERE { - SUEGenerationPoint; + SueSource; # Thresholding done in the plugin funccall("AnalyzeMaintIue"); }; diff --git a/src/usr/diag/prdf/common/plat/p9/p9_nimbus.rule b/src/usr/diag/prdf/common/plat/p9/p9_nimbus.rule index b23d76822..981e59552 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_nimbus.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_nimbus.rule @@ -824,78 +824,6 @@ chip p9_nimbus }; ############################################################################ - # P9 chip NPU0FIR - ############################################################################ - - register NPU0FIR - { - name "P9 chip NPU0FIR"; - scomaddr 0x05013C00; - reset (&, 0x05013C01); - mask (|, 0x05013C05); - capture group cNPU0FIR; - }; - - register NPU0FIR_MASK - { - name "P9 chip NPU0FIR MASK"; - scomaddr 0x05013C03; - capture group cNPU0FIR; - }; - - register NPU0FIR_ACT0 - { - name "P9 chip NPU0FIR ACT0"; - scomaddr 0x05013C06; - capture group cNPU0FIR; - capture req nonzero("NPU0FIR"); - }; - - register NPU0FIR_ACT1 - { - name "P9 chip NPU0FIR ACT1"; - scomaddr 0x05013C07; - capture group cNPU0FIR; - capture req nonzero("NPU0FIR"); - }; - - ############################################################################ - # P9 chip NPU1FIR - ############################################################################ - - register NPU1FIR - { - name "P9 chip NPU1FIR"; - scomaddr 0x05013C40; - reset (&, 0x05013C41); - mask (|, 0x05013C45); - capture group cNPU1FIR; - }; - - register NPU1FIR_MASK - { - name "P9 chip NPU1FIR MASK"; - scomaddr 0x05013C43; - capture group cNPU1FIR; - }; - - register NPU1FIR_ACT0 - { - name "P9 chip NPU1FIR ACT0"; - scomaddr 0x05013C46; - capture group cNPU1FIR; - capture req nonzero("NPU1FIR"); - }; - - register NPU1FIR_ACT1 - { - name "P9 chip NPU1FIR ACT1"; - scomaddr 0x05013C47; - capture group cNPU1FIR; - capture req nonzero("NPU1FIR"); - }; - - ############################################################################ # P9 chip PBWESTFIR ############################################################################ @@ -1371,6 +1299,78 @@ chip p9_nimbus }; ############################################################################ + # P9 chip NPU0FIR + ############################################################################ + + register NPU0FIR + { + name "P9 chip NPU0FIR"; + scomaddr 0x05013C00; + reset (&, 0x05013C01); + mask (|, 0x05013C05); + capture group cNPU0FIR; + }; + + register NPU0FIR_MASK + { + name "P9 chip NPU0FIR MASK"; + scomaddr 0x05013C03; + capture group cNPU0FIR; + }; + + register NPU0FIR_ACT0 + { + name "P9 chip NPU0FIR ACT0"; + scomaddr 0x05013C06; + capture group cNPU0FIR; + capture req nonzero("NPU0FIR"); + }; + + register NPU0FIR_ACT1 + { + name "P9 chip NPU0FIR ACT1"; + scomaddr 0x05013C07; + capture group cNPU0FIR; + capture req nonzero("NPU0FIR"); + }; + + ############################################################################ + # P9 chip NPU1FIR + ############################################################################ + + register NPU1FIR + { + name "P9 chip NPU1FIR"; + scomaddr 0x05013C40; + reset (&, 0x05013C41); + mask (|, 0x05013C45); + capture group cNPU1FIR; + }; + + register NPU1FIR_MASK + { + name "P9 chip NPU1FIR MASK"; + scomaddr 0x05013C43; + capture group cNPU1FIR; + }; + + register NPU1FIR_ACT0 + { + name "P9 chip NPU1FIR ACT0"; + scomaddr 0x05013C46; + capture group cNPU1FIR; + capture req nonzero("NPU1FIR"); + }; + + register NPU1FIR_ACT1 + { + name "P9 chip NPU1FIR ACT1"; + scomaddr 0x05013C47; + capture group cNPU1FIR; + capture req nonzero("NPU1FIR"); + }; + + ############################################################################ # XB Chiplet FIR ############################################################################ @@ -3010,17 +3010,17 @@ group gNXCQFIR filter singlebit, cs_root_cause /** NXCQFIR[0] * PBI internal parity error */ - (rNXCQFIR, bit(0)) ? self_th_1; + (rNXCQFIR, bit(0)) ? nx_th_1; /** NXCQFIR[1] * PowerBus CE error */ - (rNXCQFIR, bit(1)) ? self_th_32perDay; + (rNXCQFIR, bit(1)) ? nx_th_32perDay; /** NXCQFIR[2] * PowerBus UE error */ - (rNXCQFIR, bit(2)) ? self_th_1; + (rNXCQFIR, bit(2)) ? nx_th_1; /** NXCQFIR[3] * PBUS_ECC_SUE_FIR: PowerBus SUE error @@ -3030,12 +3030,12 @@ group gNXCQFIR filter singlebit, cs_root_cause /** NXCQFIR[4] * Inbound array CE error */ - (rNXCQFIR, bit(4)) ? self_th_32perDay; + (rNXCQFIR, bit(4)) ? nx_th_32perDay; /** NXCQFIR[5] * Inbound array UE error */ - (rNXCQFIR, bit(5)) ? self_th_1; + (rNXCQFIR, bit(5)) ? nx_th_1; /** NXCQFIR[6] * PASTE_REJECT_FIR: Paste request rejected @@ -3050,27 +3050,27 @@ group gNXCQFIR filter singlebit, cs_root_cause /** NXCQFIR[8] * PowerBus read address error */ - (rNXCQFIR, bit(8)) ? self_th_1; + (rNXCQFIR, bit(8)) ? nx_th_1; /** NXCQFIR[9] * PowerBus write address error */ - (rNXCQFIR, bit(9)) ? self_th_1; + (rNXCQFIR, bit(9)) ? nx_th_1; /** NXCQFIR[10] * PowerBus miscellaneous error */ - (rNXCQFIR, bit(10)) ? self_th_1; + (rNXCQFIR, bit(10)) ? nx_th_1; /** NXCQFIR[11] * MMIO_BAR_PE_FIR: MMIO BAR parity error */ - (rNXCQFIR, bit(11)) ? self_th_1; + (rNXCQFIR, bit(11)) ? nx_th_1; /** NXCQFIR[12] * UMAC detected UE on WC Interrupt */ - (rNXCQFIR, bit(12)) ? self_th_1; # NIMBUS_10 + (rNXCQFIR, bit(12)) ? nx_th_1; /** NXCQFIR[13] * ACK_DEAD cresp received by read command @@ -3090,27 +3090,27 @@ group gNXCQFIR filter singlebit, cs_root_cause /** NXCQFIR[16] * internal transfer hang poll time expired */ - (rNXCQFIR, bit(16)) ? self_th_1; + (rNXCQFIR, bit(16)) ? nx_th_1; /** NXCQFIR[17] * Parity error on ERAT arrays */ - (rNXCQFIR, bit(17)) ? self_th_1; + (rNXCQFIR, bit(17)) ? nx_th_1; /** NXCQFIR[18] * Correctable error on ERAT arrays */ - (rNXCQFIR, bit(18)) ? self_th_32perDay; + (rNXCQFIR, bit(18)) ? nx_th_32perDay; /** NXCQFIR[19] * Uncorrectable error on ERAT arrays */ - (rNXCQFIR, bit(19)) ? self_th_32perDay; + (rNXCQFIR, bit(19)) ? nx_th_32perDay; /** NXCQFIR[20] * SUE on ERAT arrays */ - (rNXCQFIR, bit(20)) ? defaultMaskedError; + (rNXCQFIR, bit(20)) ? nx_th_1_SUE; /** NXCQFIR[21] * NMMU hang on checkin/checkout request @@ -3120,17 +3120,17 @@ group gNXCQFIR filter singlebit, cs_root_cause /** NXCQFIR[22] * ERAT control logic error */ - (rNXCQFIR, bit(22)) ? self_th_1; + (rNXCQFIR, bit(22)) ? nx_th_1; /** NXCQFIR[23] * UEon the Powerbus data for xlate */ - (rNXCQFIR, bit(23)) ? self_th_1; + (rNXCQFIR, bit(23)) ? nx_th_1; /** NXCQFIR[24] * SUE on the Powerbus data for xlate */ - (rNXCQFIR, bit(24)) ? defaultMaskedError; + (rNXCQFIR, bit(24)) ? nx_th_1_SUE; /** NXCQFIR[25] * ACK_DEAD cresp received by UMAC read @@ -3145,17 +3145,17 @@ group gNXCQFIR filter singlebit, cs_root_cause /** NXCQFIR[27] * UE on CRB QW0/4 */ - (rNXCQFIR, bit(27)) ? self_th_1; + (rNXCQFIR, bit(27)) ? nx_th_1; /** NXCQFIR[28] * SUE on CRB QW0/4 */ - (rNXCQFIR, bit(28)) ? defaultMaskedError; + (rNXCQFIR, bit(28)) ? nx_th_1_SUE; /** NXCQFIR[29] * UMAC has detected a control logic error */ - (rNXCQFIR, bit(29)) ? self_th_1; + (rNXCQFIR, bit(29)) ? nx_th_1; /** NXCQFIR[30] * Reserved fieldUMAC_SCOM_sat_err) @@ -3170,32 +3170,32 @@ group gNXCQFIR filter singlebit, cs_root_cause /** NXCQFIR[32] * first noise source in the RNG has failed */ - (rNXCQFIR, bit(32)) ? self_th_32perDay; + (rNXCQFIR, bit(32)) ? nx_th_32perDay; /** NXCQFIR[33] * second noise source in the RNG failed */ - (rNXCQFIR, bit(33)) ? self_th_32perDay; + (rNXCQFIR, bit(33)) ? nx_th_32perDay; /** NXCQFIR[34] * RNG has detected a control logic error */ - (rNXCQFIR, bit(34)) ? self_th_1; + (rNXCQFIR, bit(34)) ? nx_th_1; /** NXCQFIR[35] * NMMU has signaled local checkstop */ - (rNXCQFIR, bit(35)) ? self_th_1; + (rNXCQFIR, bit(35)) ? nx_th_1; /** NXCQFIR[36] * VAS has signaled local checkstop */ - (rNXCQFIR, bit(36)) ? self_th_1; + (rNXCQFIR, bit(36)) ? nx_th_1; /** NXCQFIR[37] * PBCQ has detected a control logic error */ - (rNXCQFIR, bit(37)) ? self_th_1; + (rNXCQFIR, bit(37)) ? nx_th_1; /** NXCQFIR[38] * PBCQ detected failed link @@ -3205,12 +3205,17 @@ group gNXCQFIR filter singlebit, cs_root_cause /** NXCQFIR[39] * UMAC detected SUE on WC Interrupt */ - (rNXCQFIR, bit(39)) ? defaultMaskedError; + (rNXCQFIR, bit(39)) ? nx_th_1_SUE; + + /** NXCQFIR[40] + * SMF address bit = 1 error + */ + (rNXCQFIR, bit(40)) ? defaultMaskedError; - /** NXCQFIR[40:41] - * Reserved field (Access type is spares) + /** NXCQFIR[41] + * Reserved */ - (rNXCQFIR, bit(40|41)) ? defaultMaskedError; + (rNXCQFIR, bit(41)) ? defaultMaskedError; /** NXCQFIR[42] * scom error @@ -3243,12 +3248,12 @@ group gNXDMAENGFIR filter singlebit, cs_root_cause /** NXDMAENGFIR[0] * DMA hang timer expired */ - (rNXDMAENGFIR, bit(0)) ? self_th_1; # NIMBUS_10 + (rNXDMAENGFIR, bit(0)) ? nx_th_1; /** NXDMAENGFIR[1] * SHM invalid state */ - (rNXDMAENGFIR, bit(1)) ? self_th_1; # NIMBUS_10 + (rNXDMAENGFIR, bit(1)) ? nx_th_1; /** NXDMAENGFIR[2:3] * spare @@ -3258,22 +3263,22 @@ group gNXDMAENGFIR filter singlebit, cs_root_cause /** NXDMAENGFIR[4] * Channel 0 842 engine ECC CE error */ - (rNXDMAENGFIR, bit(4)) ? self_th_32perDay; + (rNXDMAENGFIR, bit(4)) ? nx_th_32perDay; /** NXDMAENGFIR[5] * Channel 0 842 engine ECC UE error */ - (rNXDMAENGFIR, bit(5)) ? self_th_1; + (rNXDMAENGFIR, bit(5)) ? nx_th_1; /** NXDMAENGFIR[6] * Channel 1 842 engine ECC CE error */ - (rNXDMAENGFIR, bit(6)) ? self_th_32perDay; + (rNXDMAENGFIR, bit(6)) ? nx_th_32perDay; /** NXDMAENGFIR[7] * Channel 1 842 engine ECC UE error */ - (rNXDMAENGFIR, bit(7)) ? self_th_1; + (rNXDMAENGFIR, bit(7)) ? nx_th_1; /** NXDMAENGFIR[8] * DMA Non-zero CSB CC detected @@ -3293,17 +3298,17 @@ group gNXDMAENGFIR filter singlebit, cs_root_cause /** NXDMAENGFIR[11] * Channel 4 GZIP ECC CE */ - (rNXDMAENGFIR, bit(11)) ? self_th_32perDay; + (rNXDMAENGFIR, bit(11)) ? nx_th_32perDay; /** NXDMAENGFIR[12] * Channel 4 GZIP ECC UE */ - (rNXDMAENGFIR, bit(12)) ? self_th_1; + (rNXDMAENGFIR, bit(12)) ? nx_th_1; /** NXDMAENGFIR[13] * Channel 4 GZIP ECC PE */ - (rNXDMAENGFIR, bit(13)) ? self_th_1; + (rNXDMAENGFIR, bit(13)) ? nx_th_1; /** NXDMAENGFIR[14] * SCOM error from other satellites @@ -3313,22 +3318,22 @@ group gNXDMAENGFIR filter singlebit, cs_root_cause /** NXDMAENGFIR[15] * DMA invalid state error (unrecoverable) */ - (rNXDMAENGFIR, bit(15)) ? self_th_1; # NIMBUS_10 + (rNXDMAENGFIR, bit(15)) ? nx_th_1; /** NXDMAENGFIR[16] * DMA invalid state error (unrecoverable) */ - (rNXDMAENGFIR, bit(16)) ? self_th_1; # NIMBUS_10 + (rNXDMAENGFIR, bit(16)) ? nx_th_1; /** NXDMAENGFIR[17] * DMA array ECC UE error */ - (rNXDMAENGFIR, bit(17)) ? self_th_1; + (rNXDMAENGFIR, bit(17)) ? nx_th_1; /** NXDMAENGFIR[18] * DMA outWR/inRD ECC UE error */ - (rNXDMAENGFIR, bit(18)) ? self_th_1; + (rNXDMAENGFIR, bit(18)) ? nx_th_1; /** NXDMAENGFIR[19] * DMA inRD done error @@ -3338,27 +3343,27 @@ group gNXDMAENGFIR filter singlebit, cs_root_cause /** NXDMAENGFIR[20] * Channel 0 invalid state error */ - (rNXDMAENGFIR, bit(20)) ? self_th_1; # NIMBUS_10 + (rNXDMAENGFIR, bit(20)) ? nx_th_1; /** NXDMAENGFIR[21] * Channel 1 invalid state error */ - (rNXDMAENGFIR, bit(21)) ? self_th_1; # NIMBUS_10 + (rNXDMAENGFIR, bit(21)) ? nx_th_1; /** NXDMAENGFIR[22] * Channel 2 invalid state error */ - (rNXDMAENGFIR, bit(22)) ? self_th_1; # NIMBUS_10 + (rNXDMAENGFIR, bit(22)) ? nx_th_1; /** NXDMAENGFIR[23] * Channel 3 invalid state error */ - (rNXDMAENGFIR, bit(23)) ? self_th_1; # NIMBUS_10 + (rNXDMAENGFIR, bit(23)) ? nx_th_1; /** NXDMAENGFIR[24] * Channel 4 invalid state error */ - (rNXDMAENGFIR, bit(24)) ? self_th_1; # NIMBUS_10 + (rNXDMAENGFIR, bit(24)) ? nx_th_1; /** NXDMAENGFIR[25:30] * spare @@ -3368,12 +3373,12 @@ group gNXDMAENGFIR filter singlebit, cs_root_cause /** NXDMAENGFIR[31] * CRB UE, on CSB/CCB */ - (rNXDMAENGFIR, bit(31)) ? self_th_1; # NIMBUS_10 + (rNXDMAENGFIR, bit(31)) ? nx_th_1; /** NXDMAENGFIR[32] * CRB SUE, on CSB/CCB */ - (rNXDMAENGFIR, bit(32)) ? self_th_1; # NIMBUS_10 + (rNXDMAENGFIR, bit(32)) ? nx_th_1_SUE; /** NXDMAENGFIR[33] * DMA outWR/inRD ECC SUE error @@ -3383,32 +3388,32 @@ group gNXDMAENGFIR filter singlebit, cs_root_cause /** NXDMAENGFIR[34] * Channel 0 watchdog timer expired */ - (rNXDMAENGFIR, bit(34)) ? self_th_32perDay; + (rNXDMAENGFIR, bit(34)) ? nx_th_32perDay; /** NXDMAENGFIR[35] * Channel 1 watchdog timer expired */ - (rNXDMAENGFIR, bit(35)) ? self_th_32perDay; + (rNXDMAENGFIR, bit(35)) ? nx_th_32perDay; /** NXDMAENGFIR[36] * Channel 2 watchdog timer expired */ - (rNXDMAENGFIR, bit(36)) ? self_th_32perDay; + (rNXDMAENGFIR, bit(36)) ? nx_th_32perDay; /** NXDMAENGFIR[37] * Channel 3 watchdog timer expired */ - (rNXDMAENGFIR, bit(37)) ? self_th_32perDay; + (rNXDMAENGFIR, bit(37)) ? nx_th_32perDay; /** NXDMAENGFIR[38] * Hypervisor local checkstop */ - (rNXDMAENGFIR, bit(38)) ? self_th_1; # NIMBUS_10 + (rNXDMAENGFIR, bit(38)) ? nx_th_1; /** NXDMAENGFIR[39] * Channel 4 watchdog timer expired */ - (rNXDMAENGFIR, bit(39)) ? self_th_32perDay; + (rNXDMAENGFIR, bit(39)) ? nx_th_32perDay; /** NXDMAENGFIR[40:47] * spare @@ -3968,27 +3973,27 @@ group gVASFIR filter singlebit, cs_root_cause /** VASFIR[24] * EG Special UE (SUE) Error */ - (rVASFIR, bit(24)) ? self_th_1; # NIMBUS_10 + (rVASFIR, bit(24)) ? self_th_1_SUE; # NIMBUS_10 /** VASFIR[25] * IN SUE Error */ - (rVASFIR, bit(25)) ? self_th_1; # NIMBUS_10 + (rVASFIR, bit(25)) ? self_th_1_SUE; # NIMBUS_10 /** VASFIR[26] * PB SUE Error */ - (rVASFIR, bit(26)) ? self_th_1; # NIMBUS_10 + (rVASFIR, bit(26)) ? self_th_1_SUE; # NIMBUS_10 /** VASFIR[27] * WC SUE Error */ - (rVASFIR, bit(27)) ? self_th_1; # NIMBUS_10 + (rVASFIR, bit(27)) ? self_th_1_SUE; # NIMBUS_10 /** VASFIR[28] * RG/MMIO SUE Error */ - (rVASFIR, bit(28)) ? self_th_1; # NIMBUS_10 + (rVASFIR, bit(28)) ? self_th_1_SUE; # NIMBUS_10 /** VASFIR[29] * PB Link Error on Read @@ -4663,17 +4668,17 @@ group gN3_LFIR filter singlebit, cs_root_cause (rN3_LFIR, bit(26|27|28|29|30|31)) ? defaultMaskedError; /** N3_LFIR[32] - * FW asserted failsafe timer + * deadman timer expired */ (rN3_LFIR, bit(32)) ? self_th_1; /** N3_LFIR[33] - * FW asserted system quisce + * system quiesce failed */ (rN3_LFIR, bit(33)) ? self_th_1; /** N3_LFIR[34] - * FW asserted Chip Quisce + * chip quiesce failed */ (rN3_LFIR, bit(34)) ? self_th_1; @@ -4693,492 +4698,6 @@ group gN3_LFIR filter singlebit, cs_root_cause # P9 chip NPU0FIR ################################################################################ -rule rNPU0FIR -{ - CHECK_STOP: - NPU0FIR & ~NPU0FIR_MASK & ~NPU0FIR_ACT0 & ~NPU0FIR_ACT1; - RECOVERABLE: - NPU0FIR & ~NPU0FIR_MASK & ~NPU0FIR_ACT0 & NPU0FIR_ACT1; - UNIT_CS: - NPU0FIR & ~NPU0FIR_MASK & NPU0FIR_ACT0 & NPU0FIR_ACT1; -}; - -group gNPU0FIR filter singlebit, cs_root_cause -{ - /** NPU0FIR[0] - * NTL array CE - */ - (rNPU0FIR, bit(0)) ? self_th_32perDay; - - /** NPU0FIR[1] - * NTL header array UE - */ - (rNPU0FIR, bit(1)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[2] - * NTL Data Array UE - */ - (rNPU0FIR, bit(2)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[3] - * NTL NVLInk Control/Header/AE PE - */ - (rNPU0FIR, bit(3)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[4] - * NTL NVLink Data Parity error - */ - (rNPU0FIR, bit(4)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[5] - * NTL NVLink Malformed Packet - */ - (rNPU0FIR, bit(5)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[6] - * NTL NVLink Unsupported Packet - */ - (rNPU0FIR, bit(6)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[7] - * NTL NVLink Config errors - */ - (rNPU0FIR, bit(7)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[8] - * NTL NVLink CRC errors or LMD=Stomp - */ - (rNPU0FIR, bit(8)) ? defaultMaskedError; - - /** NPU0FIR[9] - * NTL PRI errors - */ - (rNPU0FIR, bit(9)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[10] - * NTL logic error - */ - (rNPU0FIR, bit(10)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[11] - * NTL LMD=Data Posion - */ - (rNPU0FIR, bit(11)) ? defaultMaskedError; - - /** NPU0FIR[12] - * NTL data array SUE - */ - (rNPU0FIR, bit(12)) ? defaultMaskedError; - - /** NPU0FIR[13] - * CQ CTL/SM ASBE Array single-bit CE - */ - (rNPU0FIR, bit(13)) ? self_th_32perDay; - - /** NPU0FIR[14] - * CQ CTL/SM PBR PowerBus Recoverable err - */ - (rNPU0FIR, bit(14)) ? defaultMaskedError; - - /** NPU0FIR[15] - * CQ CTL/SM REG Register ring error - */ - (rNPU0FIR, bit(15)) ? self_th_32perDay; - - /** NPU0FIR[16] - * Data UE for MMIO store data - */ - (rNPU0FIR, bit(16)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[17] - * spare - */ - (rNPU0FIR, bit(17)) ? defaultMaskedError; - - /** NPU0FIR[18] - * CQ CTL/SM NCF NVLink config error - */ - (rNPU0FIR, bit(18)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[19] - * CQ CTL/SM NVF NVLink fatal error - */ - (rNPU0FIR, bit(19)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[20] - * spare - */ - (rNPU0FIR, bit(20)) ? defaultMaskedError; - - /** NPU0FIR[21] - * CQ CTL/SM AUE Array UE - */ - (rNPU0FIR, bit(21)) ? self_th_1; - - /** NPU0FIR[22] - * CQ CTL/SM PBP PowerBus parity error - */ - (rNPU0FIR, bit(22)) ? self_th_1; - - /** NPU0FIR[23] - * CQ CTL/SM PBF PowerBus Fatal Error - */ - (rNPU0FIR, bit(23)) ? level2_M_self_L_th_1; - - /** NPU0FIR[24] - * PowerBus configuration error - */ - (rNPU0FIR, bit(24)) ? level2_M_self_L_th_1; - - /** NPU0FIR[25] - * CQ CTL/SM FWD Forward-Progress error - */ - (rNPU0FIR, bit(25)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[26] - * CQ CTL/SM NLG NPU Logic error - */ - (rNPU0FIR, bit(26)) ? self_th_1; - - /** NPU0FIR[27] - * CQ CTL/SM UT=1 to frozen PE error - */ - (rNPU0FIR, bit(27)) ? defaultMaskedError; - - /** NPU0FIR[28] - * spare - */ - (rNPU0FIR, bit(28)) ? defaultMaskedError; - - /** NPU0FIR[29] - * CQ DAT ECC UE/SUE on data/BE arrays - */ - (rNPU0FIR, bit(29)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[30] - * CQ DAT ECC CE on data/BE arrays - */ - (rNPU0FIR, bit(30)) ? self_M_level2_L_th_32perDay; - - /** NPU0FIR[31] - * CQ DAT parity error on data/BE latches - */ - (rNPU0FIR, bit(31)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[32] - * CQ DAT parity errs on config regs - */ - (rNPU0FIR, bit(32)) ? self_th_1; - - /** NPU0FIR[33] - * CQ DAT parity errs/PowerBus rtag - */ - (rNPU0FIR, bit(33)) ? self_th_1; - - /** NPU0FIR[34] - * CQ DAT parity errs nternal state latches - */ - (rNPU0FIR, bit(34)) ? self_th_1; - - /** NPU0FIR[35] - * CQ DAT logic error - */ - (rNPU0FIR, bit(35)) ? self_th_1; - - /** NPU0FIR[36] - * Future SUE - */ - (rNPU0FIR, bit(36)) ? defaultMaskedError; - - /** NPU0FIR[37] - * ECC SUE on PB received data - */ - (rNPU0FIR, bit(37)) ? defaultMaskedError; - - /** NPU0FIR[38:39] - * spare - */ - (rNPU0FIR, bit(38|39)) ? defaultMaskedError; - - /** NPU0FIR[40] - * XTS internal logic error - */ - (rNPU0FIR, bit(40)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[41] - * XTS correctable errs in XTS SRAM - */ - (rNPU0FIR, bit(41)) ? self_M_level2_L_th_32perDay; - - /** NPU0FIR[42] - * XTS Ues in XTS internal SRAM - */ - (rNPU0FIR, bit(42)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[43] - * XTS CE on incoming stack transactions - */ - (rNPU0FIR, bit(43)) ? self_M_level2_L_th_32perDay; - - /** NPU0FIR[44] - * XTS errs incoming stack transaction - */ - (rNPU0FIR, bit(44)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[45] - * XTS errs on incoming PBUS transaction - */ - (rNPU0FIR, bit(45)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[46] - * XTS Translate Request Fail - */ - (rNPU0FIR, bit(46)) ? self_th_1; # NIMBUS_10 - - /** NPU0FIR[47:59] - * spare - */ - (rNPU0FIR, bit(47|48|49|50|51|52|53|54|55|56|57|58|59)) ? defaultMaskedError; - - /** NPU0FIR[60] - * MISC Pervasive SCOM satellite err - */ - (rNPU0FIR, bit(60)) ? defaultMaskedError; - - /** NPU0FIR[61] - * MISC Pervasive SCOM satellite err - */ - (rNPU0FIR, bit(61)) ? defaultMaskedError; - - /** NPU0FIR[62] - * Local FIR Parity Error RAS duplicate - */ - (rNPU0FIR, bit(62)) ? defaultMaskedError; - - /** NPU0FIR[63] - * Local FIR Parity Err - */ - (rNPU0FIR, bit(63)) ? defaultMaskedError; - -}; - -################################################################################ -# P9 chip NPU1FIR -################################################################################ - -rule rNPU1FIR -{ - CHECK_STOP: - NPU1FIR & ~NPU1FIR_MASK & ~NPU1FIR_ACT0 & ~NPU1FIR_ACT1; - RECOVERABLE: - NPU1FIR & ~NPU1FIR_MASK & ~NPU1FIR_ACT0 & NPU1FIR_ACT1; - UNIT_CS: - NPU1FIR & ~NPU1FIR_MASK & NPU1FIR_ACT0 & NPU1FIR_ACT1; -}; - -group gNPU1FIR filter singlebit, cs_root_cause -{ - /** NPU1FIR[0] - * NDL Brick0 stall - */ - (rNPU1FIR, bit(0)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[1] - * NDL Brick0 nostall - */ - (rNPU1FIR, bit(1)) ? defaultMaskedError; - - /** NPU1FIR[2] - * NDL Brick1 stall - */ - (rNPU1FIR, bit(2)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[3] - * NDL Brick1 nostall - */ - (rNPU1FIR, bit(3)) ? defaultMaskedError; - - /** NPU1FIR[4] - * NDL Brick2 stall - */ - (rNPU1FIR, bit(4)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[5] - * NDL Brick2 nostall - */ - (rNPU1FIR, bit(5)) ? defaultMaskedError; - - /** NPU1FIR[6] - * NDL Brick3 stall - */ - (rNPU1FIR, bit(6)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[7] - * NDL Brick3 nostall - */ - (rNPU1FIR, bit(7)) ? defaultMaskedError; - - /** NPU1FIR[8] - * NDL Brick4 stall - */ - (rNPU1FIR, bit(8)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[9] - * NDL Brick4 nostall - */ - (rNPU1FIR, bit(9)) ? defaultMaskedError; - - /** NPU1FIR[10] - * NDL Brick5 stall - */ - (rNPU1FIR, bit(10)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[11] - * NDL Brick5 nostall - */ - (rNPU1FIR, bit(11)) ? defaultMaskedError; - - /** NPU1FIR[12] - * MISC Register ring error (ie noack) - */ - (rNPU1FIR, bit(12)) ? self_th_32perDay; - - /** NPU1FIR[13] - * MISC Parity error from ibr addr regi - */ - (rNPU1FIR, bit(13)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[14] - * MISC Parity error on SCOM D/A addr reg - */ - (rNPU1FIR, bit(14)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[15] - * MISC Parity error on MISC Cntrl reg - */ - (rNPU1FIR, bit(15)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[16] - * MISC NMMU signaled Local Checkstop - */ - (rNPU1FIR, bit(16)) ? defaultMaskedError; - - /** NPU1FIR[17] - * ATS Invalid TVT entry - */ - (rNPU1FIR, bit(17)) ? defaultMaskedError; - - /** NPU1FIR[18] - * ATS TVT Address range error - */ - (rNPU1FIR, bit(18)) ? defaultMaskedError; - - /** NPU1FIR[19] - * ATS TCE Page access error - */ - (rNPU1FIR, bit(19)) ? defaultMaskedError; - - /** NPU1FIR[20] - * ATS Effective Address hit multiple TCE - */ - (rNPU1FIR, bit(20)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[21] - * ATS TCE Page access error - */ - (rNPU1FIR, bit(21)) ? defaultMaskedError; - - /** NPU1FIR[22] - * ATS Timeout on TCE tree walk - */ - (rNPU1FIR, bit(22)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[23] - * ATS Parity error on TCE cache dir array - */ - (rNPU1FIR, bit(23)) ? self_th_32perDay; - - /** NPU1FIR[24] - * ATS Parity error on TCE cache data array - */ - (rNPU1FIR, bit(24)) ? self_th_32perDay; - - /** NPU1FIR[25] - * ATS ECC UE on Effective Address array - */ - (rNPU1FIR, bit(25)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[26] - * ATS ECC CE on Effective Address array - */ - (rNPU1FIR, bit(26)) ? self_th_32perDay; - - /** NPU1FIR[27] - * ATS ECC UE on TDRmem array - */ - (rNPU1FIR, bit(27)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[28] - * ATS ECC CE on TDRmem array - */ - (rNPU1FIR, bit(28)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[29] - * ATS ECC UE on CQ CTL DMA Read - */ - (rNPU1FIR, bit(29)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[30] - * ATS ECC CE on CQ CTL DMA Read - */ - (rNPU1FIR, bit(30)) ? self_th_32perDay; - - /** NPU1FIR[31] - * ATS Parity error on TVT entry - */ - (rNPU1FIR, bit(31)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[32] - * ATS Parity err on IODA Address Reg - */ - (rNPU1FIR, bit(32)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[33] - * ATS Parity error on ATS Control Register - */ - (rNPU1FIR, bit(33)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[34] - * ATS Parity error on ATS reg - */ - (rNPU1FIR, bit(34)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[35] - * ATS Invalid IODA Table Select entry - */ - (rNPU1FIR, bit(35)) ? self_th_1; # NIMBUS_10 - - /** NPU1FIR[36:61] - * Reserved - */ - (rNPU1FIR, bit(36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|60|61)) ? defaultMaskedError; - - /** NPU1FIR[62] - * scom error - */ - (rNPU1FIR, bit(62)) ? defaultMaskedError; - - /** NPU1FIR[63] - * scom error - */ - (rNPU1FIR, bit(63)) ? defaultMaskedError; - -}; - -################################################################################ -# P9 chip NPU0FIR -################################################################################ - rule rNPU0FIR_NDD10 { CHECK_STOP: @@ -6590,12 +6109,12 @@ group gPBAMFIR filter singlebit, cs_root_cause /** PBAMFIR[3] * action0_for_OPB_error */ - (rPBAMFIR, bit(3)) ? self_th_1; + (rPBAMFIR, bit(3)) ? defaultMaskedError; /** PBAMFIR[4] * action0_for_OPB_timeout */ - (rPBAMFIR, bit(4)) ? self_th_32perDay; + (rPBAMFIR, bit(4)) ? defaultMaskedError; /** PBAMFIR[5] * action0_for_OPB_master_hang_timeout @@ -6683,7 +6202,7 @@ group gNMMUCQFIR filter singlebit, cs_root_cause /** NMMUCQFIR[7] * PowerBus Xlate SUE error */ - (rNMMUCQFIR, bit(7)) ? self_th_1; + (rNMMUCQFIR, bit(7)) ? self_th_1_SUE; /** NMMUCQFIR[8] * PowerBus CE error @@ -6786,7 +6305,7 @@ group gNMMUFIR filter singlebit, cs_root_cause /** NMMUFIR[2] * Fabric DIn xlat array SUE error */ - (rNMMUFIR, bit(2)) ? self_th_1; + (rNMMUFIR, bit(2)) ? self_th_1_SUE; /** NMMUFIR[3] * Fabric mst rd array CE error detected. @@ -7042,7 +6561,7 @@ group gINTCQFIR filter singlebit, cs_root_cause /** INTCQFIR[2] * INT_CQ_FIR_PI_ECC_SUE: */ - (rINTCQFIR, bit(2)) ? self_th_1; + (rINTCQFIR, bit(2)) ? self_th_1_SUE; /** INTCQFIR[3] * INT_CQ_FIR_ST_ECC_CE: @@ -7358,7 +6877,7 @@ group gPBIOEFIR filter singlebit, cs_root_cause( 8, 11, 14 ) /** PBIOEFIR[8] * dob01 ue */ - (rPBIOEFIR, bit(8)) ? self_th_1; + (rPBIOEFIR, bit(8)) ? self_th_1_UERE; /** PBIOEFIR[9] * d0b01 ce @@ -7373,7 +6892,7 @@ group gPBIOEFIR filter singlebit, cs_root_cause( 8, 11, 14 ) /** PBIOEFIR[11] * dob23 ue */ - (rPBIOEFIR, bit(11)) ? self_th_1; + (rPBIOEFIR, bit(11)) ? self_th_1_UERE; /** PBIOEFIR[12] * dob23 ce @@ -7388,7 +6907,7 @@ group gPBIOEFIR filter singlebit, cs_root_cause( 8, 11, 14 ) /** PBIOEFIR[14] * dob45 ue */ - (rPBIOEFIR, bit(14)) ? self_th_1; + (rPBIOEFIR, bit(14)) ? self_th_1_UERE; /** PBIOEFIR[15] * dob45 ce @@ -7659,7 +7178,7 @@ group gPBIOOFIR filter singlebit, cs_root_cause( 8, 11, 14, 17 ) /** PBIOOFIR[8] * dob01 ue */ - (rPBIOOFIR, bit(8)) ? self_th_1; + (rPBIOOFIR, bit(8)) ? self_th_1_UERE; /** PBIOOFIR[9] * dob01 ce @@ -7674,7 +7193,7 @@ group gPBIOOFIR filter singlebit, cs_root_cause( 8, 11, 14, 17 ) /** PBIOOFIR[11] * dob23 ue */ - (rPBIOOFIR, bit(11)) ? self_th_1; + (rPBIOOFIR, bit(11)) ? self_th_1_UERE; /** PBIOOFIR[12] * dob23 ce @@ -7689,7 +7208,7 @@ group gPBIOOFIR filter singlebit, cs_root_cause( 8, 11, 14, 17 ) /** PBIOOFIR[14] * dob45 ue */ - (rPBIOOFIR, bit(14)) ? self_th_1; + (rPBIOOFIR, bit(14)) ? self_th_1_UERE; /** PBIOOFIR[15] * dob45 ce @@ -7704,7 +7223,7 @@ group gPBIOOFIR filter singlebit, cs_root_cause( 8, 11, 14, 17 ) /** PBIOOFIR[17] * dob67 ue */ - (rPBIOOFIR, bit(17)) ? self_th_1; + (rPBIOOFIR, bit(17)) ? self_th_1_UERE; /** PBIOOFIR[18] * dob67 ce @@ -7934,6 +7453,492 @@ group gPBIOOFIR filter singlebit, cs_root_cause( 8, 11, 14, 17 ) }; ################################################################################ +# P9 chip NPU0FIR +################################################################################ + +rule rNPU0FIR +{ + CHECK_STOP: + NPU0FIR & ~NPU0FIR_MASK & ~NPU0FIR_ACT0 & ~NPU0FIR_ACT1; + RECOVERABLE: + NPU0FIR & ~NPU0FIR_MASK & ~NPU0FIR_ACT0 & NPU0FIR_ACT1; + UNIT_CS: + NPU0FIR & ~NPU0FIR_MASK & NPU0FIR_ACT0 & NPU0FIR_ACT1; +}; + +group gNPU0FIR filter singlebit, cs_root_cause +{ + /** NPU0FIR[0] + * NTL array CE + */ + (rNPU0FIR, bit(0)) ? self_th_32perDay; + + /** NPU0FIR[1] + * NTL header array UE + */ + (rNPU0FIR, bit(1)) ? self_th_1; + + /** NPU0FIR[2] + * NTL Data Array UE + */ + (rNPU0FIR, bit(2)) ? self_th_1; + + /** NPU0FIR[3] + * NTL NVLInk Control/Header/AE PE + */ + (rNPU0FIR, bit(3)) ? self_th_1; + + /** NPU0FIR[4] + * NTL NVLink Data Parity error + */ + (rNPU0FIR, bit(4)) ? self_th_1; + + /** NPU0FIR[5] + * NTL NVLink Malformed Packet + */ + (rNPU0FIR, bit(5)) ? self_th_1; + + /** NPU0FIR[6] + * NTL NVLink Unsupported Packet + */ + (rNPU0FIR, bit(6)) ? self_th_1; + + /** NPU0FIR[7] + * NTL NVLink Config errors + */ + (rNPU0FIR, bit(7)) ? self_th_1; + + /** NPU0FIR[8] + * NTL NVLink CRC errors or LMD=Stomp + */ + (rNPU0FIR, bit(8)) ? defaultMaskedError; + + /** NPU0FIR[9] + * NTL PRI errors + */ + (rNPU0FIR, bit(9)) ? self_th_1; + + /** NPU0FIR[10] + * NTL logic error + */ + (rNPU0FIR, bit(10)) ? self_th_1; + + /** NPU0FIR[11] + * NTL LMD=Data Posion + */ + (rNPU0FIR, bit(11)) ? defaultMaskedError; + + /** NPU0FIR[12] + * NTL data array SUE + */ + (rNPU0FIR, bit(12)) ? defaultMaskedError; + + /** NPU0FIR[13] + * CQ CTL/SM ASBE Array single-bit CE + */ + (rNPU0FIR, bit(13)) ? self_th_32perDay; + + /** NPU0FIR[14] + * CQ CTL/SM PBR PowerBus Recoverable err + */ + (rNPU0FIR, bit(14)) ? defaultMaskedError; + + /** NPU0FIR[15] + * CQ CTL/SM REG Register ring error + */ + (rNPU0FIR, bit(15)) ? self_th_32perDay; + + /** NPU0FIR[16] + * Data UE for MMIO store data + */ + (rNPU0FIR, bit(16)) ? self_th_1; + + /** NPU0FIR[17] + * spare + */ + (rNPU0FIR, bit(17)) ? defaultMaskedError; + + /** NPU0FIR[18] + * CQ CTL/SM NCF NVLink config error + */ + (rNPU0FIR, bit(18)) ? self_th_1; + + /** NPU0FIR[19] + * CQ CTL/SM NVF NVLink fatal error + */ + (rNPU0FIR, bit(19)) ? self_th_1; + + /** NPU0FIR[20] + * spare + */ + (rNPU0FIR, bit(20)) ? defaultMaskedError; + + /** NPU0FIR[21] + * CQ CTL/SM AUE Array UE + */ + (rNPU0FIR, bit(21)) ? self_th_1; + + /** NPU0FIR[22] + * CQ CTL/SM PBP PowerBus parity error + */ + (rNPU0FIR, bit(22)) ? self_th_1; + + /** NPU0FIR[23] + * CQ CTL/SM PBF PowerBus Fatal Error + */ + (rNPU0FIR, bit(23)) ? level2_M_self_L_th_1; + + /** NPU0FIR[24] + * PowerBus configuration error + */ + (rNPU0FIR, bit(24)) ? level2_M_self_L_th_1; + + /** NPU0FIR[25] + * CQ CTL/SM FWD Forward-Progress error + */ + (rNPU0FIR, bit(25)) ? self_th_1; + + /** NPU0FIR[26] + * CQ CTL/SM NLG NPU Logic error + */ + (rNPU0FIR, bit(26)) ? self_th_1; + + /** NPU0FIR[27] + * CQ CTL/SM UT=1 to frozen PE error + */ + (rNPU0FIR, bit(27)) ? defaultMaskedError; + + /** NPU0FIR[28] + * spare + */ + (rNPU0FIR, bit(28)) ? defaultMaskedError; + + /** NPU0FIR[29] + * CQ DAT ECC UE/SUE on data/BE arrays + */ + (rNPU0FIR, bit(29)) ? self_th_1; + + /** NPU0FIR[30] + * CQ DAT ECC CE on data/BE arrays + */ + (rNPU0FIR, bit(30)) ? self_M_level2_L_th_32perDay; + + /** NPU0FIR[31] + * CQ DAT parity error on data/BE latches + */ + (rNPU0FIR, bit(31)) ? self_th_1; + + /** NPU0FIR[32] + * CQ DAT parity errs on config regs + */ + (rNPU0FIR, bit(32)) ? self_th_1; + + /** NPU0FIR[33] + * CQ DAT parity errs/PowerBus rtag + */ + (rNPU0FIR, bit(33)) ? self_th_1; + + /** NPU0FIR[34] + * CQ DAT parity errs nternal state latches + */ + (rNPU0FIR, bit(34)) ? self_th_1; + + /** NPU0FIR[35] + * CQ DAT logic error + */ + (rNPU0FIR, bit(35)) ? self_th_1; + + /** NPU0FIR[36] + * Future SUE + */ + (rNPU0FIR, bit(36)) ? defaultMaskedError; + + /** NPU0FIR[37] + * ECC SUE on PB received data + */ + (rNPU0FIR, bit(37)) ? defaultMaskedError; + + /** NPU0FIR[38:39] + * spare + */ + (rNPU0FIR, bit(38|39)) ? defaultMaskedError; + + /** NPU0FIR[40] + * XTS internal logic error + */ + (rNPU0FIR, bit(40)) ? self_th_1; + + /** NPU0FIR[41] + * XTS correctable errs in XTS SRAM + */ + (rNPU0FIR, bit(41)) ? self_M_level2_L_th_32perDay; + + /** NPU0FIR[42] + * XTS Ues in XTS internal SRAM + */ + (rNPU0FIR, bit(42)) ? self_th_1; + + /** NPU0FIR[43] + * XTS CE on incoming stack transactions + */ + (rNPU0FIR, bit(43)) ? self_M_level2_L_th_32perDay; + + /** NPU0FIR[44] + * XTS errs incoming stack transaction + */ + (rNPU0FIR, bit(44)) ? self_th_1; + + /** NPU0FIR[45] + * XTS errs on incoming PBUS transaction + */ + (rNPU0FIR, bit(45)) ? self_th_1; + + /** NPU0FIR[46] + * XTS Translate Request Fail + */ + (rNPU0FIR, bit(46)) ? self_th_1; + + /** NPU0FIR[47:59] + * spare + */ + (rNPU0FIR, bit(47|48|49|50|51|52|53|54|55|56|57|58|59)) ? defaultMaskedError; + + /** NPU0FIR[60] + * MISC Pervasive SCOM satellite err + */ + (rNPU0FIR, bit(60)) ? defaultMaskedError; + + /** NPU0FIR[61] + * MISC Pervasive SCOM satellite err + */ + (rNPU0FIR, bit(61)) ? defaultMaskedError; + + /** NPU0FIR[62] + * Local FIR Parity Error RAS duplicate + */ + (rNPU0FIR, bit(62)) ? defaultMaskedError; + + /** NPU0FIR[63] + * Local FIR Parity Err + */ + (rNPU0FIR, bit(63)) ? defaultMaskedError; + +}; + +################################################################################ +# P9 chip NPU1FIR +################################################################################ + +rule rNPU1FIR +{ + CHECK_STOP: + NPU1FIR & ~NPU1FIR_MASK & ~NPU1FIR_ACT0 & ~NPU1FIR_ACT1; + RECOVERABLE: + NPU1FIR & ~NPU1FIR_MASK & ~NPU1FIR_ACT0 & NPU1FIR_ACT1; + UNIT_CS: + NPU1FIR & ~NPU1FIR_MASK & NPU1FIR_ACT0 & NPU1FIR_ACT1; +}; + +group gNPU1FIR filter singlebit, cs_root_cause +{ + /** NPU1FIR[0] + * NDL Brick0 stall + */ + (rNPU1FIR, bit(0)) ? self_th_1; + + /** NPU1FIR[1] + * NDL Brick0 nostall + */ + (rNPU1FIR, bit(1)) ? defaultMaskedError; + + /** NPU1FIR[2] + * NDL Brick1 stall + */ + (rNPU1FIR, bit(2)) ? self_th_1; + + /** NPU1FIR[3] + * NDL Brick1 nostall + */ + (rNPU1FIR, bit(3)) ? defaultMaskedError; + + /** NPU1FIR[4] + * NDL Brick2 stall + */ + (rNPU1FIR, bit(4)) ? self_th_1; + + /** NPU1FIR[5] + * NDL Brick2 nostall + */ + (rNPU1FIR, bit(5)) ? defaultMaskedError; + + /** NPU1FIR[6] + * NDL Brick3 stall + */ + (rNPU1FIR, bit(6)) ? self_th_1; + + /** NPU1FIR[7] + * NDL Brick3 nostall + */ + (rNPU1FIR, bit(7)) ? defaultMaskedError; + + /** NPU1FIR[8] + * NDL Brick4 stall + */ + (rNPU1FIR, bit(8)) ? self_th_1; + + /** NPU1FIR[9] + * NDL Brick4 nostall + */ + (rNPU1FIR, bit(9)) ? defaultMaskedError; + + /** NPU1FIR[10] + * NDL Brick5 stall + */ + (rNPU1FIR, bit(10)) ? self_th_1; + + /** NPU1FIR[11] + * NDL Brick5 nostall + */ + (rNPU1FIR, bit(11)) ? defaultMaskedError; + + /** NPU1FIR[12] + * MISC Register ring error (ie noack) + */ + (rNPU1FIR, bit(12)) ? self_th_32perDay; + + /** NPU1FIR[13] + * MISC Parity error from ibr addr regi + */ + (rNPU1FIR, bit(13)) ? self_th_1; + + /** NPU1FIR[14] + * MISC Parity error on SCOM D/A addr reg + */ + (rNPU1FIR, bit(14)) ? self_th_1; + + /** NPU1FIR[15] + * MISC Parity error on MISC Cntrl reg + */ + (rNPU1FIR, bit(15)) ? self_th_1; + + /** NPU1FIR[16] + * MISC NMMU signaled Local Checkstop + */ + (rNPU1FIR, bit(16)) ? defaultMaskedError; + + /** NPU1FIR[17] + * ATS Invalid TVT entry + */ + (rNPU1FIR, bit(17)) ? defaultMaskedError; + + /** NPU1FIR[18] + * ATS TVT Address range error + */ + (rNPU1FIR, bit(18)) ? defaultMaskedError; + + /** NPU1FIR[19] + * ATS TCE Page access error + */ + (rNPU1FIR, bit(19)) ? defaultMaskedError; + + /** NPU1FIR[20] + * ATS Effective Address hit multiple TCE + */ + (rNPU1FIR, bit(20)) ? self_th_1; + + /** NPU1FIR[21] + * ATS TCE Page access error + */ + (rNPU1FIR, bit(21)) ? defaultMaskedError; + + /** NPU1FIR[22] + * ATS Timeout on TCE tree walk + */ + (rNPU1FIR, bit(22)) ? self_th_1; + + /** NPU1FIR[23] + * ATS Parity error on TCE cache dir array + */ + (rNPU1FIR, bit(23)) ? self_th_32perDay; + + /** NPU1FIR[24] + * ATS Parity error on TCE cache data array + */ + (rNPU1FIR, bit(24)) ? self_th_32perDay; + + /** NPU1FIR[25] + * ATS ECC UE on Effective Address array + */ + (rNPU1FIR, bit(25)) ? self_th_1; + + /** NPU1FIR[26] + * ATS ECC CE on Effective Address array + */ + (rNPU1FIR, bit(26)) ? self_th_32perDay; + + /** NPU1FIR[27] + * ATS ECC UE on TDRmem array + */ + (rNPU1FIR, bit(27)) ? self_th_1; + + /** NPU1FIR[28] + * ATS ECC CE on TDRmem array + */ + (rNPU1FIR, bit(28)) ? self_th_1; + + /** NPU1FIR[29] + * ATS ECC UE on CQ CTL DMA Read + */ + (rNPU1FIR, bit(29)) ? self_th_1; + + /** NPU1FIR[30] + * ATS ECC CE on CQ CTL DMA Read + */ + (rNPU1FIR, bit(30)) ? self_th_32perDay; + + /** NPU1FIR[31] + * ATS Parity error on TVT entry + */ + (rNPU1FIR, bit(31)) ? self_th_1; + + /** NPU1FIR[32] + * ATS Parity err on IODA Address Reg + */ + (rNPU1FIR, bit(32)) ? self_th_1; + + /** NPU1FIR[33] + * ATS Parity error on ATS Control Register + */ + (rNPU1FIR, bit(33)) ? self_th_1; + + /** NPU1FIR[34] + * ATS Parity error on ATS reg + */ + (rNPU1FIR, bit(34)) ? self_th_1; + + /** NPU1FIR[35] + * ATS Invalid IODA Table Select entry + */ + (rNPU1FIR, bit(35)) ? self_th_1; + + /** NPU1FIR[36:61] + * Reserved + */ + (rNPU1FIR, bit(36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|60|61)) ? defaultMaskedError; + + /** NPU1FIR[62] + * scom error + */ + (rNPU1FIR, bit(62)) ? defaultMaskedError; + + /** NPU1FIR[63] + * scom error + */ + (rNPU1FIR, bit(63)) ? defaultMaskedError; + +}; + +################################################################################ # XB Chiplet FIR ################################################################################ diff --git a/src/usr/diag/prdf/common/plat/p9/p9_nimbus_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_nimbus_actions.rule index 189e4e193..899393c1e 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_nimbus_actions.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_nimbus_actions.rule @@ -69,3 +69,13 @@ actionclass nx_th_32perDay threshold32pday; }; +/** Callout connected NX, threshold 1 */ +actionclass nx_th_1 +{ + callout(connected(TYPE_NX,0), MRU_MED); + threshold1; +}; + +/** Callout connected NX, threshold 1, SUE originated from somewhere else */ +actionclass nx_th_1_SUE { nx_th_1; SueSeen; }; + diff --git a/src/usr/diag/prdf/common/plat/p9/p9_phb.rule b/src/usr/diag/prdf/common/plat/p9/p9_phb.rule index 656665ad7..2e458a4c3 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_phb.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_phb.rule @@ -387,7 +387,7 @@ group gPCIFIR filter singlebit, cs_root_cause /** PCIFIR[0] * PBAIB register parity error */ - (rPCIFIR, bit(0)) ? self_th_1; + (rPCIFIR, bit(0)) ? externalAttention; /** PCIFIR[1] * Hardware error @@ -397,12 +397,12 @@ group gPCIFIR filter singlebit, cs_root_cause /** PCIFIR[2] * AIB interface error */ - (rPCIFIR, bit(2)) ? self_th_1; + (rPCIFIR, bit(2)) ? externalAttention; /** PCIFIR[3] * ETU reset error */ - (rPCIFIR, bit(3)) ? self_th_1; + (rPCIFIR, bit(3)) ? externalAttention; /** PCIFIR[4] * PEC scom error |